[Tolk] Rewrite lexer, spaces are not mandatory anymore

A new lexer is noticeably faster and memory efficient (although splitting a file to tokens is negligible in a whole pipeline). But the purpose of rewriting lexer was not just to speed up, but to allow writing code without spaces: `2+2` is now 4, not a valid identifier as earlier. The variety of symbols allowed in identifier has greatly reduced and is now similar to other languages. SrcLocation became 8 bytes on stack everywhere. Command-line flags were also reworked: - the input for Tolk compiler is only a single file now, it's parsed, and parsing continues while new #include are resolved - flags like -A -P and so on are no more needed, actually
2025-03-09 15:40:10 +00:00 · 2024-10-31 10:59:23 +04:00 · 2024-10-31 10:59:23 +04:00 · f0e6470d0b
commit f0e6470d0b
parent 0bcc0b3c12
26 changed files with 2042 additions and 2129 deletions
--- a/crypto/fift/utils.cpp
+++ b/crypto/fift/utils.cpp
@ -211,20 +211,39 @@ td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::s
                              fift_dir);
 }

-td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir, bool is_raw) {
+td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code) {
  std::stringstream ss;
  std::string sb;
  sb.reserve(asm_code.size() + 100);
-  sb.append("\"Asm.fif\" include\n ");
-  sb.append(is_raw ? "<{" : "");
+  sb.append("\"Asm.fif\" include\n <{\n");
  sb.append(asm_code.data(), asm_code.size());
-  sb.append(is_raw ? "}>c" : "");
-  sb.append(" boc>B \"res\" B>file");
+  sb.append("\n}>c boc>B \"res\" B>file");

-  TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false, fift_dir));
+  TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false));
  TRY_RESULT(res, run_fift(std::move(source_lookup), &ss));
  TRY_RESULT(boc, res.read_file("res"));
  return vm::std_boc_deserialize(std::move(boc.data));
 }

+td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir) {
+  std::string main_fif;
+  main_fif.reserve(program_code.size() + 100);
+  main_fif.append(program_code.data(), program_code.size());
+  main_fif.append(R"( dup hashB B>X      $>B "hex" B>file)");   // write codeHashHex to a file
+  main_fif.append(R"(     boc>B B>base64 $>B "boc" B>file)");   // write codeBoc64 to a file
+
+  std::stringstream fift_output_stream;
+  TRY_RESULT(source_lookup, create_source_lookup(std::move(main_fif), true, true, false, false, false, false, false, fift_dir));
+  TRY_RESULT(res, run_fift(std::move(source_lookup), &fift_output_stream));
+
+  TRY_RESULT(boc, res.read_file("boc"));
+  TRY_RESULT(hex, res.read_file("hex"));
+
+  return CompiledProgramOutput{
+    std::move(program_code),
+    std::move(boc.data),
+    std::move(hex.data),
+  };
+}
+
 }  // namespace fift
--- a/crypto/fift/utils.h
+++ b/crypto/fift/utils.h
@ -26,11 +26,21 @@ struct FiftOutput {
  SourceLookup source_lookup;
  std::string output;
 };
+
+// given a valid Fift code PROGRAM{ ... }END>c, compile_asm_program() returns this output
+// now it's used primarily for wasm output (see tolk-js, for example)
+struct CompiledProgramOutput {
+  std::string fiftCode;
+  std::string codeBoc64;
+  std::string codeHashHex;
+};
+
 td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::string fift_dir = "",
                                                        bool need_preamble = true, bool need_asm = true,
                                                        bool need_ton_util = true, bool need_lisp = true,
                                                        bool need_w3_code = true);
 td::Result<FiftOutput> mem_run_fift(std::string source, std::vector<std::string> args = {}, std::string fift_dir = "");
 td::Result<FiftOutput> mem_run_fift(SourceLookup source_lookup, std::vector<std::string> args);
-td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir = "", bool is_raw = true);
+td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code);
+td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir);
 }  // namespace fift