mirror of
				https://github.com/ton-blockchain/ton
				synced 2025-03-09 15:40:10 +00:00 
			
		
		
		
	[Tolk] Rewrite lexer, spaces are not mandatory anymore
A new lexer is noticeably faster and memory efficient (although splitting a file to tokens is negligible in a whole pipeline). But the purpose of rewriting lexer was not just to speed up, but to allow writing code without spaces: `2+2` is now 4, not a valid identifier as earlier. The variety of symbols allowed in identifier has greatly reduced and is now similar to other languages. SrcLocation became 8 bytes on stack everywhere. Command-line flags were also reworked: - the input for Tolk compiler is only a single file now, it's parsed, and parsing continues while new #include are resolved - flags like -A -P and so on are no more needed, actually
This commit is contained in:
		
							parent
							
								
									0bcc0b3c12
								
							
						
					
					
						commit
						f0e6470d0b
					
				
					 26 changed files with 2042 additions and 2129 deletions
				
			
		| 
						 | 
				
			
			@ -211,20 +211,39 @@ td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::s
 | 
			
		|||
                              fift_dir);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir, bool is_raw) {
 | 
			
		||||
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code) {
 | 
			
		||||
  std::stringstream ss;
 | 
			
		||||
  std::string sb;
 | 
			
		||||
  sb.reserve(asm_code.size() + 100);
 | 
			
		||||
  sb.append("\"Asm.fif\" include\n ");
 | 
			
		||||
  sb.append(is_raw ? "<{" : "");
 | 
			
		||||
  sb.append("\"Asm.fif\" include\n <{\n");
 | 
			
		||||
  sb.append(asm_code.data(), asm_code.size());
 | 
			
		||||
  sb.append(is_raw ? "}>c" : "");
 | 
			
		||||
  sb.append(" boc>B \"res\" B>file");
 | 
			
		||||
  sb.append("\n}>c boc>B \"res\" B>file");
 | 
			
		||||
 | 
			
		||||
  TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false, fift_dir));
 | 
			
		||||
  TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false));
 | 
			
		||||
  TRY_RESULT(res, run_fift(std::move(source_lookup), &ss));
 | 
			
		||||
  TRY_RESULT(boc, res.read_file("res"));
 | 
			
		||||
  return vm::std_boc_deserialize(std::move(boc.data));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir) {
 | 
			
		||||
  std::string main_fif;
 | 
			
		||||
  main_fif.reserve(program_code.size() + 100);
 | 
			
		||||
  main_fif.append(program_code.data(), program_code.size());
 | 
			
		||||
  main_fif.append(R"( dup hashB B>X      $>B "hex" B>file)");   // write codeHashHex to a file
 | 
			
		||||
  main_fif.append(R"(     boc>B B>base64 $>B "boc" B>file)");   // write codeBoc64 to a file
 | 
			
		||||
 | 
			
		||||
  std::stringstream fift_output_stream;
 | 
			
		||||
  TRY_RESULT(source_lookup, create_source_lookup(std::move(main_fif), true, true, false, false, false, false, false, fift_dir));
 | 
			
		||||
  TRY_RESULT(res, run_fift(std::move(source_lookup), &fift_output_stream));
 | 
			
		||||
 | 
			
		||||
  TRY_RESULT(boc, res.read_file("boc"));
 | 
			
		||||
  TRY_RESULT(hex, res.read_file("hex"));
 | 
			
		||||
 | 
			
		||||
  return CompiledProgramOutput{
 | 
			
		||||
    std::move(program_code),
 | 
			
		||||
    std::move(boc.data),
 | 
			
		||||
    std::move(hex.data),
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace fift
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -26,11 +26,21 @@ struct FiftOutput {
 | 
			
		|||
  SourceLookup source_lookup;
 | 
			
		||||
  std::string output;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// given a valid Fift code PROGRAM{ ... }END>c, compile_asm_program() returns this output
 | 
			
		||||
// now it's used primarily for wasm output (see tolk-js, for example)
 | 
			
		||||
struct CompiledProgramOutput {
 | 
			
		||||
  std::string fiftCode;
 | 
			
		||||
  std::string codeBoc64;
 | 
			
		||||
  std::string codeHashHex;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::string fift_dir = "",
 | 
			
		||||
                                                        bool need_preamble = true, bool need_asm = true,
 | 
			
		||||
                                                        bool need_ton_util = true, bool need_lisp = true,
 | 
			
		||||
                                                        bool need_w3_code = true);
 | 
			
		||||
td::Result<FiftOutput> mem_run_fift(std::string source, std::vector<std::string> args = {}, std::string fift_dir = "");
 | 
			
		||||
td::Result<FiftOutput> mem_run_fift(SourceLookup source_lookup, std::vector<std::string> args);
 | 
			
		||||
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir = "", bool is_raw = true);
 | 
			
		||||
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code);
 | 
			
		||||
td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir);
 | 
			
		||||
}  // namespace fift
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,10 +37,10 @@
 | 
			
		|||
 | 
			
		||||
td::Result<std::string> compile_internal(char *config_json) {
 | 
			
		||||
  TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json)))
 | 
			
		||||
  auto &obj = input_json.get_object();
 | 
			
		||||
  td::JsonObject& config = input_json.get_object();
 | 
			
		||||
 | 
			
		||||
  TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false));
 | 
			
		||||
  TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false));
 | 
			
		||||
  TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optLevel", false));
 | 
			
		||||
  TRY_RESULT(sources_obj, td::get_json_object_field(config, "sources", td::JsonValue::Type::Array, false));
 | 
			
		||||
 | 
			
		||||
  auto &sources_arr = sources_obj.get_array();
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -52,29 +52,25 @@ td::Result<std::string> compile_internal(char *config_json) {
 | 
			
		|||
 | 
			
		||||
  funC::opt_level = std::max(0, opt_level);
 | 
			
		||||
  funC::program_envelope = true;
 | 
			
		||||
  funC::asm_preamble = true;
 | 
			
		||||
  funC::verbosity = 0;
 | 
			
		||||
  funC::indent = 1;
 | 
			
		||||
 | 
			
		||||
  std::ostringstream outs, errs;
 | 
			
		||||
  auto compile_res = funC::func_proceed(sources, outs, errs);
 | 
			
		||||
 | 
			
		||||
  if (compile_res != 0) {
 | 
			
		||||
    return td::Status::Error(std::string("Func compilation error: ") + errs.str());
 | 
			
		||||
  int funC_res = funC::func_proceed(sources, outs, errs);
 | 
			
		||||
  if (funC_res != 0) {
 | 
			
		||||
    return td::Status::Error("FunC compilation error: " + errs.str());
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false));
 | 
			
		||||
  TRY_RESULT(boc, vm::std_boc_serialize(code_cell));
 | 
			
		||||
  TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/"));
 | 
			
		||||
 | 
			
		||||
  td::JsonBuilder result_json;
 | 
			
		||||
  auto result_obj = result_json.enter_object();
 | 
			
		||||
  result_obj("status", "ok");
 | 
			
		||||
  result_obj("codeBoc", td::base64_encode(boc));
 | 
			
		||||
  result_obj("fiftCode", outs.str());
 | 
			
		||||
  result_obj("codeHashHex", code_cell->get_hash().to_hex());
 | 
			
		||||
  result_obj.leave();
 | 
			
		||||
 | 
			
		||||
  outs.clear();
 | 
			
		||||
  errs.clear();
 | 
			
		||||
  auto obj = result_json.enter_object();
 | 
			
		||||
  obj("status", "ok");
 | 
			
		||||
  obj("fiftCode", std::move(fift_res.fiftCode));
 | 
			
		||||
  obj("codeBoc", std::move(fift_res.codeBoc64));
 | 
			
		||||
  obj("codeHashHex", std::move(fift_res.codeHashHex));
 | 
			
		||||
  obj.leave();
 | 
			
		||||
 | 
			
		||||
  return result_json.string_builder().as_cslice().str();
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -572,9 +572,9 @@ int atanh_f261(int x, int n) inline_ref {
 | 
			
		|||
    s -= 1;
 | 
			
		||||
  }
 | 
			
		||||
  x += t;
 | 
			
		||||
  int 2x = 2 * x;
 | 
			
		||||
  int y = lshift256divr(2x, (x >> 1) - t);
 | 
			
		||||
  ;; y = 2x - (mulrshiftr256(2x, y) ~>> 2);  ;; this line could improve precision on very rare occasions
 | 
			
		||||
  int `2x` = 2 * x;
 | 
			
		||||
  int y = lshift256divr(`2x`, (x >> 1) - t);
 | 
			
		||||
  ;; y = `2x` - (mulrshiftr256(2x, y) ~>> 2);  ;; this line could improve precision on very rare occasions
 | 
			
		||||
  return (atanh_f258(y, 36), s);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,10 +1,9 @@
 | 
			
		|||
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
 | 
			
		||||
 | 
			
		||||
set(TOLK_SOURCE
 | 
			
		||||
        srcread.cpp
 | 
			
		||||
        src-file.cpp
 | 
			
		||||
        lexer.cpp
 | 
			
		||||
        symtable.cpp
 | 
			
		||||
        keywords.cpp
 | 
			
		||||
        unify-types.cpp
 | 
			
		||||
        parse-tolk.cpp
 | 
			
		||||
        abscode.cpp
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -24,29 +24,19 @@ namespace tolk {
 | 
			
		|||
 * 
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, const SrcLocation* loc)
 | 
			
		||||
    : v_type(_type), idx(_idx), cls(_cls), coord(0) {
 | 
			
		||||
TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc)
 | 
			
		||||
    : v_type(_type), idx(_idx), cls(_cls), coord(0), where(loc) {
 | 
			
		||||
  if (sym) {
 | 
			
		||||
    name = sym->sym_idx;
 | 
			
		||||
    sym->value->idx = _idx;
 | 
			
		||||
  }
 | 
			
		||||
  if (loc) {
 | 
			
		||||
    where = std::make_unique<SrcLocation>(*loc);
 | 
			
		||||
  }
 | 
			
		||||
  if (!_type) {
 | 
			
		||||
    v_type = TypeExpr::new_hole();
 | 
			
		||||
  }
 | 
			
		||||
  if (cls == _Named) {
 | 
			
		||||
    undefined = true;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void TmpVar::set_location(const SrcLocation& loc) {
 | 
			
		||||
  if (where) {
 | 
			
		||||
    *where = loc;
 | 
			
		||||
  } else {
 | 
			
		||||
    where = std::make_unique<SrcLocation>(loc);
 | 
			
		||||
  }
 | 
			
		||||
void TmpVar::set_location(SrcLocation loc) {
 | 
			
		||||
  where = loc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void TmpVar::dump(std::ostream& os) const {
 | 
			
		||||
| 
						 | 
				
			
			@ -469,10 +459,10 @@ void CodeBlob::print(std::ostream& os, int flags) const {
 | 
			
		|||
  if ((flags & 8) != 0) {
 | 
			
		||||
    for (const auto& var : vars) {
 | 
			
		||||
      var.dump(os);
 | 
			
		||||
      if (var.where && (flags & 1) != 0) {
 | 
			
		||||
        var.where->show(os);
 | 
			
		||||
      if (var.where.is_defined() && (flags & 1) != 0) {
 | 
			
		||||
        var.where.show(os);
 | 
			
		||||
        os << " defined here:\n";
 | 
			
		||||
        var.where->show_context(os);
 | 
			
		||||
        var.where.show_context(os);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -483,7 +473,7 @@ void CodeBlob::print(std::ostream& os, int flags) const {
 | 
			
		|||
  os << "-------- END ---------\n\n";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, const SrcLocation* location) {
 | 
			
		||||
var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation location) {
 | 
			
		||||
  vars.emplace_back(var_cnt, cls, var_type, sym, location);
 | 
			
		||||
  if (sym) {
 | 
			
		||||
    sym->value->idx = var_cnt;
 | 
			
		||||
| 
						 | 
				
			
			@ -501,7 +491,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) {
 | 
			
		|||
    SymDef* arg_sym;
 | 
			
		||||
    SrcLocation arg_loc;
 | 
			
		||||
    std::tie(arg_type, arg_sym, arg_loc) = par;
 | 
			
		||||
    list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, &arg_loc));
 | 
			
		||||
    list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, arg_loc));
 | 
			
		||||
  }
 | 
			
		||||
  emplace_back(loc, Op::_Import, list);
 | 
			
		||||
  in_var_cnt = var_cnt;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -36,7 +36,7 @@ int CodeBlob::split_vars(bool strict) {
 | 
			
		|||
  for (int j = 0; j < var_cnt; j++) {
 | 
			
		||||
    TmpVar& var = vars[j];
 | 
			
		||||
    if (strict && var.v_type->minw != var.v_type->maxw) {
 | 
			
		||||
      throw ParseError{var.where.get(), "variable does not have fixed width, cannot manipulate it"};
 | 
			
		||||
      throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"};
 | 
			
		||||
    }
 | 
			
		||||
    std::vector<TypeExpr*> comp_types;
 | 
			
		||||
    int k = var.v_type->extract_components(comp_types);
 | 
			
		||||
| 
						 | 
				
			
			@ -45,7 +45,7 @@ int CodeBlob::split_vars(bool strict) {
 | 
			
		|||
    if (k != 1) {
 | 
			
		||||
      var.coord = ~((n << 8) + k);
 | 
			
		||||
      for (int i = 0; i < k; i++) {
 | 
			
		||||
        auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where.get());
 | 
			
		||||
        auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where);
 | 
			
		||||
        tolk_assert(v == n + i);
 | 
			
		||||
        tolk_assert(vars[v].idx == v);
 | 
			
		||||
        vars[v].name = vars[j].name;
 | 
			
		||||
| 
						 | 
				
			
			@ -54,7 +54,7 @@ int CodeBlob::split_vars(bool strict) {
 | 
			
		|||
      n += k;
 | 
			
		||||
      ++changes;
 | 
			
		||||
    } else if (strict && var.v_type->minw != 1) {
 | 
			
		||||
      throw ParseError{var.where.get(),
 | 
			
		||||
      throw ParseError{var.where,
 | 
			
		||||
                            "cannot work with variable or variable component of width greater than one"};
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -95,7 +95,7 @@ SymDef* define_builtin_const(std::string name, TypeExpr* const_type, Args&&... a
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
bool SymValAsmFunc::compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in,
 | 
			
		||||
                            const SrcLocation& where) const {
 | 
			
		||||
                            SrcLocation where) const {
 | 
			
		||||
  if (simple_compile) {
 | 
			
		||||
    return dest.append(simple_compile(out, in, where));
 | 
			
		||||
  } else if (ext_compile) {
 | 
			
		||||
| 
						 | 
				
			
			@ -186,7 +186,7 @@ int emulate_mul(int a, int b) {
 | 
			
		|||
  return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int emulate_and(int a, int b) {
 | 
			
		||||
int emulate_bitwise_and(int a, int b) {
 | 
			
		||||
  int both = a & b, any = a | b;
 | 
			
		||||
  int r = VarDescr::_Int;
 | 
			
		||||
  if (any & VarDescr::_Nan) {
 | 
			
		||||
| 
						 | 
				
			
			@ -204,7 +204,7 @@ int emulate_and(int a, int b) {
 | 
			
		|||
  return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int emulate_or(int a, int b) {
 | 
			
		||||
int emulate_bitwise_or(int a, int b) {
 | 
			
		||||
  if (b & VarDescr::_Zero) {
 | 
			
		||||
    return a;
 | 
			
		||||
  } else if (a & VarDescr::_Zero) {
 | 
			
		||||
| 
						 | 
				
			
			@ -222,7 +222,7 @@ int emulate_or(int a, int b) {
 | 
			
		|||
  return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int emulate_xor(int a, int b) {
 | 
			
		||||
int emulate_bitwise_xor(int a, int b) {
 | 
			
		||||
  if (b & VarDescr::_Zero) {
 | 
			
		||||
    return a;
 | 
			
		||||
  } else if (a & VarDescr::_Zero) {
 | 
			
		||||
| 
						 | 
				
			
			@ -241,7 +241,7 @@ int emulate_xor(int a, int b) {
 | 
			
		|||
  return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int emulate_not(int a) {
 | 
			
		||||
int emulate_bitwise_not(int a) {
 | 
			
		||||
  if ((a & VarDescr::ConstZero) == VarDescr::ConstZero) {
 | 
			
		||||
    return VarDescr::ConstTrue;
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -436,7 +436,7 @@ AsmOp push_const(td::RefInt256 x) {
 | 
			
		|||
  return AsmOp::IntConst(std::move(x));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_add(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_add(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0], &y = args[1];
 | 
			
		||||
  if (x.is_int_const() && y.is_int_const()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -478,7 +478,7 @@ AsmOp compile_add(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
 | 
			
		|||
  return exec_op("ADD", 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_sub(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_sub(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0], &y = args[1];
 | 
			
		||||
  if (x.is_int_const() && y.is_int_const()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -511,7 +511,7 @@ AsmOp compile_sub(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
 | 
			
		|||
  return exec_op("SUB", 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_negate(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_unary_minus(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 1);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0];
 | 
			
		||||
  if (x.is_int_const()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -526,7 +526,19 @@ AsmOp compile_negate(std::vector<VarDescr>& res, std::vector<VarDescr>& args, co
 | 
			
		|||
  return exec_op("NEGATE", 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_and(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_unary_plus(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 1);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0];
 | 
			
		||||
  if (x.is_int_const()) {
 | 
			
		||||
    r.set_const(x.int_const);
 | 
			
		||||
    x.unused();
 | 
			
		||||
    return push_const(r.int_const);
 | 
			
		||||
  }
 | 
			
		||||
  r.val = x.val;
 | 
			
		||||
  return AsmOp::Nop();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_bitwise_and(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0], &y = args[1];
 | 
			
		||||
  if (x.is_int_const() && y.is_int_const()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -535,11 +547,11 @@ AsmOp compile_and(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
 | 
			
		|||
    y.unused();
 | 
			
		||||
    return push_const(r.int_const);
 | 
			
		||||
  }
 | 
			
		||||
  r.val = emulate_and(x.val, y.val);
 | 
			
		||||
  r.val = emulate_bitwise_and(x.val, y.val);
 | 
			
		||||
  return exec_op("AND", 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_or(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_bitwise_or(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0], &y = args[1];
 | 
			
		||||
  if (x.is_int_const() && y.is_int_const()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -548,11 +560,11 @@ AsmOp compile_or(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
 | 
			
		|||
    y.unused();
 | 
			
		||||
    return push_const(r.int_const);
 | 
			
		||||
  }
 | 
			
		||||
  r.val = emulate_or(x.val, y.val);
 | 
			
		||||
  r.val = emulate_bitwise_or(x.val, y.val);
 | 
			
		||||
  return exec_op("OR", 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_xor(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_bitwise_xor(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0], &y = args[1];
 | 
			
		||||
  if (x.is_int_const() && y.is_int_const()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -561,11 +573,11 @@ AsmOp compile_xor(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
 | 
			
		|||
    y.unused();
 | 
			
		||||
    return push_const(r.int_const);
 | 
			
		||||
  }
 | 
			
		||||
  r.val = emulate_xor(x.val, y.val);
 | 
			
		||||
  r.val = emulate_bitwise_xor(x.val, y.val);
 | 
			
		||||
  return exec_op("XOR", 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_not(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_bitwise_not(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 1);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0];
 | 
			
		||||
  if (x.is_int_const()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -573,11 +585,11 @@ AsmOp compile_not(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
 | 
			
		|||
    x.unused();
 | 
			
		||||
    return push_const(r.int_const);
 | 
			
		||||
  }
 | 
			
		||||
  r.val = emulate_not(x.val);
 | 
			
		||||
  r.val = emulate_bitwise_not(x.val);
 | 
			
		||||
  return exec_op("NOT", 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where) {
 | 
			
		||||
  if (x.is_int_const() && y.is_int_const()) {
 | 
			
		||||
    r.set_const(x.int_const * y.int_const);
 | 
			
		||||
    if (!r.int_const->is_valid()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -645,12 +657,12 @@ AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocat
 | 
			
		|||
  return exec_op("MUL", 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_mul(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_mul(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  return compile_mul_internal(res[0], args[0], args[1], where);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_lshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
 | 
			
		||||
AsmOp compile_lshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0], &y = args[1];
 | 
			
		||||
  if (y.is_int_const()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -692,7 +704,7 @@ AsmOp compile_lshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, co
 | 
			
		|||
  return exec_op("LSHIFT", 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_rshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where,
 | 
			
		||||
AsmOp compile_rshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where,
 | 
			
		||||
                     int round_mode) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0], &y = args[1];
 | 
			
		||||
| 
						 | 
				
			
			@ -722,7 +734,7 @@ AsmOp compile_rshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, co
 | 
			
		|||
  return exec_op(rshift, 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where, int round_mode) {
 | 
			
		||||
AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where, int round_mode) {
 | 
			
		||||
  if (x.is_int_const() && y.is_int_const()) {
 | 
			
		||||
    r.set_const(div(x.int_const, y.int_const, round_mode));
 | 
			
		||||
    if (!r.int_const->is_valid()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -762,12 +774,12 @@ AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocat
 | 
			
		|||
  return exec_op(op, 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_div(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where, int round_mode) {
 | 
			
		||||
AsmOp compile_div(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where, int round_mode) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  return compile_div_internal(res[0], args[0], args[1], where, round_mode);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_mod(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where,
 | 
			
		||||
AsmOp compile_mod(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where,
 | 
			
		||||
                  int round_mode) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 2);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0], &y = args[1];
 | 
			
		||||
| 
						 | 
				
			
			@ -808,7 +820,7 @@ AsmOp compile_mod(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
 | 
			
		|||
  return exec_op(op, 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_muldiv(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where,
 | 
			
		||||
AsmOp compile_muldiv(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where,
 | 
			
		||||
                     int round_mode) {
 | 
			
		||||
  tolk_assert(res.size() == 1 && args.size() == 3);
 | 
			
		||||
  VarDescr &r = res[0], &x = args[0], &y = args[1], &z = args[2];
 | 
			
		||||
| 
						 | 
				
			
			@ -978,7 +990,7 @@ AsmOp compile_cmp_int(std::vector<VarDescr>& res, std::vector<VarDescr>& args, i
 | 
			
		|||
  return exec_op(cmp_names[mode], 2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_throw(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) {
 | 
			
		||||
AsmOp compile_throw(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
 | 
			
		||||
  tolk_assert(res.empty() && args.size() == 1);
 | 
			
		||||
  VarDescr& x = args[0];
 | 
			
		||||
  if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1010,7 +1022,7 @@ AsmOp compile_cond_throw(std::vector<VarDescr>& res, std::vector<VarDescr>& args
 | 
			
		|||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
AsmOp compile_throw_arg(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) {
 | 
			
		||||
AsmOp compile_throw_arg(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
 | 
			
		||||
  tolk_assert(res.empty() && args.size() == 2);
 | 
			
		||||
  VarDescr &x = args[1];
 | 
			
		||||
  if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1101,7 +1113,7 @@ AsmOp compile_fetch_slice(std::vector<VarDescr>& res, std::vector<VarDescr>& arg
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
// <type> <type>_at(tuple t, int index) asm "INDEXVAR";
 | 
			
		||||
AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) {
 | 
			
		||||
AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
 | 
			
		||||
  tolk_assert(args.size() == 2 && res.size() == 1);
 | 
			
		||||
  auto& y = args[1];
 | 
			
		||||
  if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1112,7 +1124,7 @@ AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args,
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
// int null?(X arg)
 | 
			
		||||
AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) {
 | 
			
		||||
AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
 | 
			
		||||
  tolk_assert(args.size() == 1 && res.size() == 1);
 | 
			
		||||
  auto &x = args[0], &r = res[0];
 | 
			
		||||
  if (x.always_null() || x.always_not_null()) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1128,12 +1140,12 @@ AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, c
 | 
			
		|||
void define_builtins() {
 | 
			
		||||
  using namespace std::placeholders;
 | 
			
		||||
  auto Unit = TypeExpr::new_unit();
 | 
			
		||||
  auto Int = TypeExpr::new_atomic(_Int);
 | 
			
		||||
  auto Cell = TypeExpr::new_atomic(_Cell);
 | 
			
		||||
  auto Slice = TypeExpr::new_atomic(_Slice);
 | 
			
		||||
  auto Builder = TypeExpr::new_atomic(_Builder);
 | 
			
		||||
  // auto Null = TypeExpr::new_atomic(_Null);
 | 
			
		||||
  auto Tuple = TypeExpr::new_atomic(_Tuple);
 | 
			
		||||
  auto Int = TypeExpr::new_atomic(TypeExpr::_Int);
 | 
			
		||||
  auto Cell = TypeExpr::new_atomic(TypeExpr::_Cell);
 | 
			
		||||
  auto Slice = TypeExpr::new_atomic(TypeExpr::_Slice);
 | 
			
		||||
  auto Builder = TypeExpr::new_atomic(TypeExpr::_Builder);
 | 
			
		||||
  // auto Null = TypeExpr::new_atomic(TypeExpr::_Null);
 | 
			
		||||
  auto Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple);
 | 
			
		||||
  auto Int2 = TypeExpr::new_tensor({Int, Int});
 | 
			
		||||
  auto Int3 = TypeExpr::new_tensor({Int, Int, Int});
 | 
			
		||||
  auto TupleInt = TypeExpr::new_tensor({Tuple, Int});
 | 
			
		||||
| 
						 | 
				
			
			@ -1156,9 +1168,16 @@ void define_builtins() {
 | 
			
		|||
  //auto arith_null_op = TypeExpr::new_map(TypeExpr::new_unit(), Int);
 | 
			
		||||
  auto throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit));
 | 
			
		||||
  auto cond_throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int, Int}), Unit));
 | 
			
		||||
 | 
			
		||||
  // prevent unused vars warnings (there vars are created to acquire initial id of TypeExpr::value)
 | 
			
		||||
  static_cast<void>(Z);
 | 
			
		||||
  static_cast<void>(XY);
 | 
			
		||||
  static_cast<void>(Cell);
 | 
			
		||||
 | 
			
		||||
  define_builtin_func("_+_", arith_bin_op, compile_add);
 | 
			
		||||
  define_builtin_func("_-_", arith_bin_op, compile_sub);
 | 
			
		||||
  define_builtin_func("-_", arith_un_op, compile_negate);
 | 
			
		||||
  define_builtin_func("-_", arith_un_op, compile_unary_minus);
 | 
			
		||||
  define_builtin_func("+_", arith_un_op, compile_unary_plus);
 | 
			
		||||
  define_builtin_func("_*_", arith_bin_op, compile_mul);
 | 
			
		||||
  define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1));
 | 
			
		||||
  define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0));
 | 
			
		||||
| 
						 | 
				
			
			@ -1175,10 +1194,10 @@ void define_builtins() {
 | 
			
		|||
  define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1));
 | 
			
		||||
  define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0));
 | 
			
		||||
  define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1));
 | 
			
		||||
  define_builtin_func("_&_", arith_bin_op, compile_and);
 | 
			
		||||
  define_builtin_func("_|_", arith_bin_op, compile_or);
 | 
			
		||||
  define_builtin_func("_^_", arith_bin_op, compile_xor);
 | 
			
		||||
  define_builtin_func("~_", arith_un_op, compile_not);
 | 
			
		||||
  define_builtin_func("_&_", arith_bin_op, compile_bitwise_and);
 | 
			
		||||
  define_builtin_func("_|_", arith_bin_op, compile_bitwise_or);
 | 
			
		||||
  define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor);
 | 
			
		||||
  define_builtin_func("~_", arith_un_op, compile_bitwise_not);
 | 
			
		||||
  define_builtin_func("^_+=_", arith_bin_op, compile_add);
 | 
			
		||||
  define_builtin_func("^_-=_", arith_bin_op, compile_sub);
 | 
			
		||||
  define_builtin_func("^_*=_", arith_bin_op, compile_mul);
 | 
			
		||||
| 
						 | 
				
			
			@ -1192,9 +1211,9 @@ void define_builtins() {
 | 
			
		|||
  define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1));
 | 
			
		||||
  define_builtin_func("^_~>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0));
 | 
			
		||||
  define_builtin_func("^_^>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1));
 | 
			
		||||
  define_builtin_func("^_&=_", arith_bin_op, compile_and);
 | 
			
		||||
  define_builtin_func("^_|=_", arith_bin_op, compile_or);
 | 
			
		||||
  define_builtin_func("^_^=_", arith_bin_op, compile_xor);
 | 
			
		||||
  define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and);
 | 
			
		||||
  define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or);
 | 
			
		||||
  define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor);
 | 
			
		||||
  define_builtin_func("muldiv", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1));
 | 
			
		||||
  define_builtin_func("muldivr", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0));
 | 
			
		||||
  define_builtin_func("muldivc", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -41,25 +41,19 @@ Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list<Expr*> _arglist)
 | 
			
		|||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Expr::chk_rvalue(const Lexem& lem) const {
 | 
			
		||||
void Expr::chk_rvalue(const Lexer& lex) const {
 | 
			
		||||
  if (!is_rvalue()) {
 | 
			
		||||
    lem.error_at("rvalue expected before `", "`");
 | 
			
		||||
    lex.error_at("rvalue expected before `", "`");
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Expr::chk_lvalue(const Lexem& lem) const {
 | 
			
		||||
void Expr::chk_lvalue(const Lexer& lex) const {
 | 
			
		||||
  if (!is_lvalue()) {
 | 
			
		||||
    lem.error_at("lvalue expected before `", "`");
 | 
			
		||||
    lex.error_at("lvalue expected before `", "`");
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Expr::chk_type(const Lexem& lem) const {
 | 
			
		||||
  if (!is_type()) {
 | 
			
		||||
    lem.error_at("type expression expected before `", "`");
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool Expr::deduce_type(const Lexem& lem) {
 | 
			
		||||
bool Expr::deduce_type(const Lexer& lex) {
 | 
			
		||||
  if (e_type) {
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -83,7 +77,7 @@ bool Expr::deduce_type(const Lexem& lem) {
 | 
			
		|||
        std::ostringstream os;
 | 
			
		||||
        os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type "
 | 
			
		||||
           << fun_type->args[0] << ": " << ue;
 | 
			
		||||
        lem.error(os.str());
 | 
			
		||||
        lex.error(os.str());
 | 
			
		||||
      }
 | 
			
		||||
      e_type = fun_type->args[1];
 | 
			
		||||
      TypeExpr::remove_indirect(e_type);
 | 
			
		||||
| 
						 | 
				
			
			@ -98,7 +92,7 @@ bool Expr::deduce_type(const Lexem& lem) {
 | 
			
		|||
        std::ostringstream os;
 | 
			
		||||
        os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type
 | 
			
		||||
           << ": " << ue;
 | 
			
		||||
        lem.error(os.str());
 | 
			
		||||
        lex.error(os.str());
 | 
			
		||||
      }
 | 
			
		||||
      e_type = fun_type->args[1];
 | 
			
		||||
      TypeExpr::remove_indirect(e_type);
 | 
			
		||||
| 
						 | 
				
			
			@ -113,7 +107,7 @@ bool Expr::deduce_type(const Lexem& lem) {
 | 
			
		|||
        std::ostringstream os;
 | 
			
		||||
        os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type "
 | 
			
		||||
           << args[0]->e_type << ": " << ue;
 | 
			
		||||
        lem.error(os.str());
 | 
			
		||||
        lex.error(os.str());
 | 
			
		||||
      }
 | 
			
		||||
      e_type = args[0]->e_type;
 | 
			
		||||
      TypeExpr::remove_indirect(e_type);
 | 
			
		||||
| 
						 | 
				
			
			@ -130,7 +124,7 @@ bool Expr::deduce_type(const Lexem& lem) {
 | 
			
		|||
        os << "cannot implicitly assign an expression of type " << args[1]->e_type
 | 
			
		||||
           << " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val)
 | 
			
		||||
           << "` : " << ue;
 | 
			
		||||
        lem.error(os.str());
 | 
			
		||||
        lex.error(os.str());
 | 
			
		||||
      }
 | 
			
		||||
      e_type = rhs_type->args[1];
 | 
			
		||||
      TypeExpr::remove_indirect(e_type);
 | 
			
		||||
| 
						 | 
				
			
			@ -139,13 +133,13 @@ bool Expr::deduce_type(const Lexem& lem) {
 | 
			
		|||
    }
 | 
			
		||||
    case _CondExpr: {
 | 
			
		||||
      tolk_assert(args.size() == 3);
 | 
			
		||||
      auto flag_type = TypeExpr::new_atomic(_Int);
 | 
			
		||||
      auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int);
 | 
			
		||||
      try {
 | 
			
		||||
        unify(args[0]->e_type, flag_type);
 | 
			
		||||
      } catch (UnifyError& ue) {
 | 
			
		||||
        std::ostringstream os;
 | 
			
		||||
        os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue;
 | 
			
		||||
        lem.error(os.str());
 | 
			
		||||
        lex.error(os.str());
 | 
			
		||||
      }
 | 
			
		||||
      try {
 | 
			
		||||
        unify(args[1]->e_type, args[2]->e_type);
 | 
			
		||||
| 
						 | 
				
			
			@ -153,7 +147,7 @@ bool Expr::deduce_type(const Lexem& lem) {
 | 
			
		|||
        std::ostringstream os;
 | 
			
		||||
        os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and "
 | 
			
		||||
           << args[2]->e_type << " : " << ue;
 | 
			
		||||
        lem.error(os.str());
 | 
			
		||||
        lex.error(os.str());
 | 
			
		||||
      }
 | 
			
		||||
      e_type = args[1]->e_type;
 | 
			
		||||
      TypeExpr::remove_indirect(e_type);
 | 
			
		||||
| 
						 | 
				
			
			@ -176,13 +170,13 @@ int Expr::define_new_vars(CodeBlob& code) {
 | 
			
		|||
    }
 | 
			
		||||
    case _Var:
 | 
			
		||||
      if (val < 0) {
 | 
			
		||||
        val = code.create_var(TmpVar::_Named, e_type, sym, &here);
 | 
			
		||||
        val = code.create_var(TmpVar::_Named, e_type, sym, here);
 | 
			
		||||
        return 1;
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
    case _Hole:
 | 
			
		||||
      if (val < 0) {
 | 
			
		||||
        val = code.create_var(TmpVar::_Tmp, e_type, nullptr, &here);
 | 
			
		||||
        val = code.create_var(TmpVar::_Tmp, e_type, nullptr, here);
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -202,7 +196,7 @@ int Expr::predefine_vars() {
 | 
			
		|||
    }
 | 
			
		||||
    case _Var:
 | 
			
		||||
      if (!sym) {
 | 
			
		||||
        tolk_assert(val < 0 && here.defined());
 | 
			
		||||
        tolk_assert(val < 0 && here.is_defined());
 | 
			
		||||
        if (prohibited_var_names.count(symbols.get_name(~val))) {
 | 
			
		||||
          throw ParseError{
 | 
			
		||||
              here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"};
 | 
			
		||||
| 
						 | 
				
			
			@ -212,7 +206,7 @@ int Expr::predefine_vars() {
 | 
			
		|||
        if (!sym) {
 | 
			
		||||
          throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"};
 | 
			
		||||
        }
 | 
			
		||||
        sym->value = new SymVal{SymVal::_Var, -1, e_type};
 | 
			
		||||
        sym->value = new SymVal{SymValKind::_Var, -1, e_type};
 | 
			
		||||
        return 1;
 | 
			
		||||
      }
 | 
			
		||||
      break;
 | 
			
		||||
| 
						 | 
				
			
			@ -221,17 +215,17 @@ int Expr::predefine_vars() {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
var_idx_t Expr::new_tmp(CodeBlob& code) const {
 | 
			
		||||
  return code.create_tmp_var(e_type, &here);
 | 
			
		||||
  return code.create_tmp_var(e_type, here);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void add_set_globs(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>& globs, const SrcLocation& here) {
 | 
			
		||||
void add_set_globs(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>& globs, SrcLocation here) {
 | 
			
		||||
  for (const auto& p : globs) {
 | 
			
		||||
    auto& op = code.emplace_back(here, Op::_SetGlob, std::vector<var_idx_t>{}, std::vector<var_idx_t>{ p.second }, p.first);
 | 
			
		||||
    op.set_impure(code);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) {
 | 
			
		||||
std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) {
 | 
			
		||||
  while (lhs->is_type_apply()) {
 | 
			
		||||
    lhs = lhs->args.at(0);
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -245,7 +239,7 @@ std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, con
 | 
			
		|||
    auto right = rhs->pre_compile(code);
 | 
			
		||||
    TypeExpr::remove_indirect(rhs->e_type);
 | 
			
		||||
    auto unpacked_type = rhs->e_type->args.at(0);
 | 
			
		||||
    std::vector<var_idx_t> tmp{code.create_tmp_var(unpacked_type, &rhs->here)};
 | 
			
		||||
    std::vector<var_idx_t> tmp{code.create_tmp_var(unpacked_type, rhs->here)};
 | 
			
		||||
    code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right));
 | 
			
		||||
    auto tvar = new Expr{Expr::_Var};
 | 
			
		||||
    tvar->set_val(tmp[0]);
 | 
			
		||||
| 
						 | 
				
			
			@ -286,14 +280,14 @@ std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *>& args, CodeB
 | 
			
		|||
    for (size_t j = 0; j < res_lists[i].size(); ++j) {
 | 
			
		||||
      TmpVar& var = code.vars.at(res_lists[i][j]);
 | 
			
		||||
      if (!lval_globs && (var.cls & TmpVar::_Named)) {
 | 
			
		||||
        var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](const SrcLocation &here) mutable {
 | 
			
		||||
        var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable {
 | 
			
		||||
          if (!done) {
 | 
			
		||||
            done = true;
 | 
			
		||||
            modified_vars.push_back({i, j, cur_ops});
 | 
			
		||||
          }
 | 
			
		||||
        });
 | 
			
		||||
      } else {
 | 
			
		||||
        var.on_modification.push_back([](const SrcLocation &) {
 | 
			
		||||
        var.on_modification.push_back([](SrcLocation) {
 | 
			
		||||
        });
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			@ -307,8 +301,8 @@ std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *>& args, CodeB
 | 
			
		|||
  for (size_t idx = modified_vars.size(); idx--; ) {
 | 
			
		||||
    const ModifiedVar &m = modified_vars[idx];
 | 
			
		||||
    var_idx_t orig_v = res_lists[m.i][m.j];
 | 
			
		||||
    var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where.get());
 | 
			
		||||
    std::unique_ptr<Op> op = std::make_unique<Op>(*code.vars[orig_v].where, Op::_Let);
 | 
			
		||||
    var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where);
 | 
			
		||||
    std::unique_ptr<Op> op = std::make_unique<Op>(code.vars[orig_v].where, Op::_Let);
 | 
			
		||||
    op->left = {tmp_v};
 | 
			
		||||
    op->right = {orig_v};
 | 
			
		||||
    op->next = std::move((*m.cur_ops));
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,129 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
    This file is part of TON Blockchain Library.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU Lesser General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU Lesser General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU Lesser General Public License
 | 
			
		||||
    along with TON Blockchain Library.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
#include "tolk.h"
 | 
			
		||||
 | 
			
		||||
namespace tolk {
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * 
 | 
			
		||||
 *   KEYWORD DEFINITION
 | 
			
		||||
 * 
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
void define_keywords() {
 | 
			
		||||
  symbols.add_kw_char('+')
 | 
			
		||||
      .add_kw_char('-')
 | 
			
		||||
      .add_kw_char('*')
 | 
			
		||||
      .add_kw_char('/')
 | 
			
		||||
      .add_kw_char('%')
 | 
			
		||||
      .add_kw_char('?')
 | 
			
		||||
      .add_kw_char(':')
 | 
			
		||||
      .add_kw_char(',')
 | 
			
		||||
      .add_kw_char(';')
 | 
			
		||||
      .add_kw_char('(')
 | 
			
		||||
      .add_kw_char(')')
 | 
			
		||||
      .add_kw_char('[')
 | 
			
		||||
      .add_kw_char(']')
 | 
			
		||||
      .add_kw_char('{')
 | 
			
		||||
      .add_kw_char('}')
 | 
			
		||||
      .add_kw_char('=')
 | 
			
		||||
      .add_kw_char('_')
 | 
			
		||||
      .add_kw_char('<')
 | 
			
		||||
      .add_kw_char('>')
 | 
			
		||||
      .add_kw_char('&')
 | 
			
		||||
      .add_kw_char('|')
 | 
			
		||||
      .add_kw_char('^')
 | 
			
		||||
      .add_kw_char('~');
 | 
			
		||||
 | 
			
		||||
  symbols.add_keyword("==", Keyword::_Eq)
 | 
			
		||||
      .add_keyword("!=", Keyword::_Neq)
 | 
			
		||||
      .add_keyword("<=", Keyword::_Leq)
 | 
			
		||||
      .add_keyword(">=", Keyword::_Geq)
 | 
			
		||||
      .add_keyword("<=>", Keyword::_Spaceship)
 | 
			
		||||
      .add_keyword("<<", Keyword::_Lshift)
 | 
			
		||||
      .add_keyword(">>", Keyword::_Rshift)
 | 
			
		||||
      .add_keyword("~>>", Keyword::_RshiftR)
 | 
			
		||||
      .add_keyword("^>>", Keyword::_RshiftC)
 | 
			
		||||
      .add_keyword("~/", Keyword::_DivR)
 | 
			
		||||
      .add_keyword("^/", Keyword::_DivC)
 | 
			
		||||
      .add_keyword("~%", Keyword::_ModR)
 | 
			
		||||
      .add_keyword("^%", Keyword::_ModC)
 | 
			
		||||
      .add_keyword("/%", Keyword::_DivMod)
 | 
			
		||||
      .add_keyword("+=", Keyword::_PlusLet)
 | 
			
		||||
      .add_keyword("-=", Keyword::_MinusLet)
 | 
			
		||||
      .add_keyword("*=", Keyword::_TimesLet)
 | 
			
		||||
      .add_keyword("/=", Keyword::_DivLet)
 | 
			
		||||
      .add_keyword("~/=", Keyword::_DivRLet)
 | 
			
		||||
      .add_keyword("^/=", Keyword::_DivCLet)
 | 
			
		||||
      .add_keyword("%=", Keyword::_ModLet)
 | 
			
		||||
      .add_keyword("~%=", Keyword::_ModRLet)
 | 
			
		||||
      .add_keyword("^%=", Keyword::_ModCLet)
 | 
			
		||||
      .add_keyword("<<=", Keyword::_LshiftLet)
 | 
			
		||||
      .add_keyword(">>=", Keyword::_RshiftLet)
 | 
			
		||||
      .add_keyword("~>>=", Keyword::_RshiftRLet)
 | 
			
		||||
      .add_keyword("^>>=", Keyword::_RshiftCLet)
 | 
			
		||||
      .add_keyword("&=", Keyword::_AndLet)
 | 
			
		||||
      .add_keyword("|=", Keyword::_OrLet)
 | 
			
		||||
      .add_keyword("^=", Keyword::_XorLet);
 | 
			
		||||
 | 
			
		||||
  symbols.add_keyword("return", Keyword::_Return)
 | 
			
		||||
      .add_keyword("var", Keyword::_Var)
 | 
			
		||||
      .add_keyword("repeat", Keyword::_Repeat)
 | 
			
		||||
      .add_keyword("do", Keyword::_Do)
 | 
			
		||||
      .add_keyword("while", Keyword::_While)
 | 
			
		||||
      .add_keyword("until", Keyword::_Until)
 | 
			
		||||
      .add_keyword("try", Keyword::_Try)
 | 
			
		||||
      .add_keyword("catch", Keyword::_Catch)
 | 
			
		||||
      .add_keyword("if", Keyword::_If)
 | 
			
		||||
      .add_keyword("ifnot", Keyword::_Ifnot)
 | 
			
		||||
      .add_keyword("then", Keyword::_Then)
 | 
			
		||||
      .add_keyword("else", Keyword::_Else)
 | 
			
		||||
      .add_keyword("elseif", Keyword::_Elseif)
 | 
			
		||||
      .add_keyword("elseifnot", Keyword::_Elseifnot);
 | 
			
		||||
 | 
			
		||||
  symbols.add_keyword("int", Keyword::_Int)
 | 
			
		||||
      .add_keyword("cell", Keyword::_Cell)
 | 
			
		||||
      .add_keyword("slice", Keyword::_Slice)
 | 
			
		||||
      .add_keyword("builder", Keyword::_Builder)
 | 
			
		||||
      .add_keyword("cont", Keyword::_Cont)
 | 
			
		||||
      .add_keyword("tuple", Keyword::_Tuple)
 | 
			
		||||
      .add_keyword("type", Keyword::_Type)
 | 
			
		||||
      .add_keyword("->", Keyword::_Mapsto)
 | 
			
		||||
      .add_keyword("forall", Keyword::_Forall);
 | 
			
		||||
 | 
			
		||||
  symbols.add_keyword("extern", Keyword::_Extern)
 | 
			
		||||
      .add_keyword("global", Keyword::_Global)
 | 
			
		||||
      .add_keyword("asm", Keyword::_Asm)
 | 
			
		||||
      .add_keyword("impure", Keyword::_Impure)
 | 
			
		||||
      .add_keyword("pure", Keyword::_Pure)
 | 
			
		||||
      .add_keyword("inline", Keyword::_Inline)
 | 
			
		||||
      .add_keyword("inline_ref", Keyword::_InlineRef)
 | 
			
		||||
      .add_keyword("builtin", Keyword::_Builtin)
 | 
			
		||||
      .add_keyword("auto_apply", Keyword::_AutoApply)
 | 
			
		||||
      .add_keyword("method_id", Keyword::_MethodId)
 | 
			
		||||
      .add_keyword("get", Keyword::_Get)
 | 
			
		||||
      .add_keyword("operator", Keyword::_Operator)
 | 
			
		||||
      .add_keyword("infix", Keyword::_Infix)
 | 
			
		||||
      .add_keyword("infixl", Keyword::_Infixl)
 | 
			
		||||
      .add_keyword("infixr", Keyword::_Infixr)
 | 
			
		||||
      .add_keyword("const", Keyword::_Const);
 | 
			
		||||
 | 
			
		||||
  symbols.add_keyword("#pragma", Keyword::_PragmaHashtag)
 | 
			
		||||
      .add_keyword("#include", Keyword::_IncludeHashtag);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace tolk
 | 
			
		||||
							
								
								
									
										869
									
								
								tolk/lexer.cpp
									
										
									
									
									
								
							
							
						
						
									
										869
									
								
								tolk/lexer.cpp
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -16,335 +16,632 @@
 | 
			
		|||
*/
 | 
			
		||||
#include "lexer.h"
 | 
			
		||||
#include "symtable.h"
 | 
			
		||||
#include <sstream>
 | 
			
		||||
#include <cassert>
 | 
			
		||||
 | 
			
		||||
namespace tolk {
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *
 | 
			
		||||
 *   LEXER
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
// By 'chunk' in lexer I mean a token or a list of tokens parsed simultaneously.
 | 
			
		||||
// E.g., when we meet "str", ChunkString is called, it emits tok_string.
 | 
			
		||||
// E.g., when we meet "str"x, ChunkString emits not only tok_string, but tok_string_modifier.
 | 
			
		||||
// E.g., when we meet //, ChunkInlineComment is called, it emits nothing (just skips a line).
 | 
			
		||||
// We store all valid chunks lexers in a prefix tree (LexingTrie), see below.
 | 
			
		||||
struct ChunkLexerBase {
 | 
			
		||||
  ChunkLexerBase(const ChunkLexerBase&) = delete;
 | 
			
		||||
  ChunkLexerBase &operator=(const ChunkLexerBase&) = delete;
 | 
			
		||||
  ChunkLexerBase() = default;
 | 
			
		||||
 | 
			
		||||
std::string Lexem::lexem_name_str(int idx) {
 | 
			
		||||
  if (idx == Eof) {
 | 
			
		||||
    return "end of file";
 | 
			
		||||
  } else if (idx == Ident) {
 | 
			
		||||
    return "identifier";
 | 
			
		||||
  } else if (idx == Number) {
 | 
			
		||||
    return "number";
 | 
			
		||||
  } else if (idx == String) {
 | 
			
		||||
    return "string";
 | 
			
		||||
  } else if (idx == Special) {
 | 
			
		||||
    return "special";
 | 
			
		||||
  } else if (symbols.get_keyword(idx)) {
 | 
			
		||||
    return "`" + symbols.get_keyword(idx)->str + "`";
 | 
			
		||||
  } else {
 | 
			
		||||
    std::ostringstream os{"<unknown lexem of type "};
 | 
			
		||||
    os << idx << ">";
 | 
			
		||||
    return os.str();
 | 
			
		||||
  }
 | 
			
		||||
  virtual bool parse(Lexer* lex) const = 0;
 | 
			
		||||
  virtual ~ChunkLexerBase() = default;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class T>
 | 
			
		||||
static T* singleton() {
 | 
			
		||||
  static T obj;
 | 
			
		||||
  return &obj;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string Lexem::name_str() const {
 | 
			
		||||
  if (tp == Ident) {
 | 
			
		||||
    return std::string{"identifier `"} + symbols.get_name(val) + "`";
 | 
			
		||||
  } else if (tp == String) {
 | 
			
		||||
    return std::string{"string \""} + str + '"';
 | 
			
		||||
  } else {
 | 
			
		||||
    return lexem_name_str(tp);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
// LexingTrie is a prefix tree storing all available Tolk language constructs.
 | 
			
		||||
// It's effectively a map of a prefix to ChunkLexerBase.
 | 
			
		||||
class LexingTrie {
 | 
			
		||||
  LexingTrie** next{nullptr};   // either nullptr or [256]
 | 
			
		||||
  ChunkLexerBase* val{nullptr}; // non-null for leafs
 | 
			
		||||
 | 
			
		||||
bool is_number(std::string str) {
 | 
			
		||||
  auto st = str.begin(), en = str.end();
 | 
			
		||||
  if (st == en) {
 | 
			
		||||
    return false;
 | 
			
		||||
  GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_next_allocated() {
 | 
			
		||||
    if (next == nullptr) {
 | 
			
		||||
      next = new LexingTrie*[256];
 | 
			
		||||
      std::memset(next, 0, 256 * sizeof(LexingTrie*));
 | 
			
		||||
    }
 | 
			
		||||
  if (*st == '-') {
 | 
			
		||||
    st++;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_symbol_allocated(uint8_t symbol) const {
 | 
			
		||||
    if (next[symbol] == nullptr) {
 | 
			
		||||
      next[symbol] = new LexingTrie;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  // Maps a prefix onto a chunk lexer.
 | 
			
		||||
  // E.g. "    -> ChunkString
 | 
			
		||||
  // E.g. """  -> ChunkMultilineString
 | 
			
		||||
  void add_prefix(const char* s, ChunkLexerBase* val) {
 | 
			
		||||
    LexingTrie* cur = this;
 | 
			
		||||
 | 
			
		||||
    for (; *s; ++s) {
 | 
			
		||||
      uint8_t symbol = static_cast<uint8_t>(*s);
 | 
			
		||||
      cur->ensure_next_allocated();
 | 
			
		||||
      cur->ensure_symbol_allocated(symbol);
 | 
			
		||||
      cur = cur->next[symbol];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#ifdef TOLK_DEBUG
 | 
			
		||||
    assert(!cur->val);
 | 
			
		||||
#endif
 | 
			
		||||
    cur->val = val;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Maps a pattern onto a chunk lexer.
 | 
			
		||||
  // E.g. -[0-9] -> ChunkNegativeNumber
 | 
			
		||||
  // Internally, it expands the pattern to all possible prefixes: -0, -1, etc.
 | 
			
		||||
  // (for example, [0-9][a-z_$] gives 10*28=280 prefixes)
 | 
			
		||||
  void add_pattern(const char* pattern, ChunkLexerBase* val) {
 | 
			
		||||
    std::vector<LexingTrie*> all_possible_trie{this};
 | 
			
		||||
 | 
			
		||||
    for (const char* c = pattern; *c; ++c) {
 | 
			
		||||
      std::string to_append;
 | 
			
		||||
      if (*c == '[') {
 | 
			
		||||
        c++;
 | 
			
		||||
        while (*c != ']') { // assume that input is corrent, no out-of-string checks
 | 
			
		||||
          if (*(c + 1) == '-') {
 | 
			
		||||
            char l = *c, r = *(c + 2);
 | 
			
		||||
            for (char symbol = l; symbol <= r; ++symbol) {
 | 
			
		||||
              to_append += symbol;
 | 
			
		||||
            }
 | 
			
		||||
            c += 3;
 | 
			
		||||
          } else {
 | 
			
		||||
            to_append += *c;
 | 
			
		||||
            c++;
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      } else {
 | 
			
		||||
        to_append += *c;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      std::vector<LexingTrie*> next_all_possible_trie;
 | 
			
		||||
      next_all_possible_trie.reserve(all_possible_trie.size() * to_append.size());
 | 
			
		||||
      for (LexingTrie* cur : all_possible_trie) {
 | 
			
		||||
        cur->ensure_next_allocated();
 | 
			
		||||
        for (uint8_t symbol : to_append) {
 | 
			
		||||
          cur->ensure_symbol_allocated(symbol);
 | 
			
		||||
          next_all_possible_trie.emplace_back(cur->next[symbol]);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      all_possible_trie = std::move(next_all_possible_trie);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for (LexingTrie* trie : all_possible_trie) {
 | 
			
		||||
      trie->val = val;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Looks up a chunk lexer given a string (in practice, s points to cur position in the middle of the file).
 | 
			
		||||
  // It returns the deepest case: pointing to ", it will return ChunkMultilineString if """, or ChunkString otherwize.
 | 
			
		||||
  ChunkLexerBase* get_deepest(const char* s) const {
 | 
			
		||||
    const LexingTrie* best = this;
 | 
			
		||||
 | 
			
		||||
    for (const LexingTrie* cur = this; cur && cur->next; ++s) {
 | 
			
		||||
      cur = cur->next[static_cast<uint8_t>(*s)];  // if s reaches \0, cur will just become nullptr, and loop will end
 | 
			
		||||
      if (cur && cur->val) {
 | 
			
		||||
        best = cur;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return best->val;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// ----------------------------------------------------------------------
 | 
			
		||||
// A list of valid parsed chunks.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
// An inline comment, starting from '//'
 | 
			
		||||
struct ChunkInlineComment final : ChunkLexerBase {
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    lex->skip_line();
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// A multiline comment, starting from '/*'
 | 
			
		||||
// Note, that nested comments are not supported.
 | 
			
		||||
struct ChunkMultilineComment final : ChunkLexerBase {
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    while (!lex->is_eof()) {
 | 
			
		||||
      // todo drop -} later
 | 
			
		||||
      if ((lex->char_at() == '-' && lex->char_at(1) == '}') || (lex->char_at() == '*' && lex->char_at(1) == '/')) {
 | 
			
		||||
        lex->skip_chars(2);
 | 
			
		||||
        return true;
 | 
			
		||||
      }
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
    }
 | 
			
		||||
    return true; // it's okay if comment extends past end of file
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// A string, starting from "
 | 
			
		||||
// Note, that there are no escape symbols inside: the purpose of strings in Tolk just doesn't need it.
 | 
			
		||||
// After a closing quote, a string modifier may be present, like "Ef8zMzMzMzMzMzMzMzMzMzM0vF"a.
 | 
			
		||||
// If present, it emits a separate tok_string_modifier.
 | 
			
		||||
struct ChunkString final : ChunkLexerBase {
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    const char* str_begin = lex->c_str();
 | 
			
		||||
    lex->skip_chars(1);
 | 
			
		||||
    while (!lex->is_eof() && lex->char_at() != '"' && lex->char_at() != '\n') {
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
    }
 | 
			
		||||
    if (lex->char_at() != '"') {
 | 
			
		||||
      lex->error("string extends past end of line");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1);
 | 
			
		||||
    lex->skip_chars(1);
 | 
			
		||||
    lex->add_token(tok_string_const, str_val);
 | 
			
		||||
 | 
			
		||||
    if (std::isalpha(lex->char_at())) {
 | 
			
		||||
      std::string_view modifier_val(lex->c_str(), 1);
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
      lex->add_token(tok_string_modifier, modifier_val);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// A string starting from """
 | 
			
		||||
// Used for multiline asm constructions. Can not have a postfix modifier.
 | 
			
		||||
struct ChunkMultilineString final : ChunkLexerBase {
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    const char* str_begin = lex->c_str();
 | 
			
		||||
    lex->skip_chars(3);
 | 
			
		||||
    while (!lex->is_eof()) {
 | 
			
		||||
      if (lex->char_at() == '"' && lex->char_at(1) == '"' && lex->char_at(2) == '"') {
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
    }
 | 
			
		||||
    if (lex->is_eof()) {
 | 
			
		||||
      lex->error("string extends past end of file");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string_view str_val(str_begin + 3, lex->c_str() - str_begin - 3);
 | 
			
		||||
    lex->skip_chars(3);
 | 
			
		||||
    lex->add_token(tok_string_const, str_val);
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// A number, may be a hex one.
 | 
			
		||||
struct ChunkNumber final : ChunkLexerBase {
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    const char* str_begin = lex->c_str();
 | 
			
		||||
    bool hex = false;
 | 
			
		||||
  if (st + 1 < en && *st == '0' && st[1] == 'x') {
 | 
			
		||||
    st += 2;
 | 
			
		||||
    if (lex->char_at() == '0' && lex->char_at(1) == 'x') {
 | 
			
		||||
      lex->skip_chars(2);
 | 
			
		||||
      hex = true;
 | 
			
		||||
    }
 | 
			
		||||
  if (st == en) {
 | 
			
		||||
    if (lex->is_eof()) {
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
  while (st < en) {
 | 
			
		||||
    int c = *st;
 | 
			
		||||
    while (!lex->is_eof()) {
 | 
			
		||||
      char c = lex->char_at();
 | 
			
		||||
      if (c >= '0' && c <= '9') {
 | 
			
		||||
      ++st;
 | 
			
		||||
        lex->skip_chars(1);
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      if (!hex) {
 | 
			
		||||
      return false;
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      c |= 0x20;
 | 
			
		||||
      if (c < 'a' || c > 'f') {
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string_view str_val(str_begin, lex->c_str() - str_begin);
 | 
			
		||||
    lex->add_token(tok_int_const, str_val);
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Anything starting from # is a compiler directive.
 | 
			
		||||
// Technically, #include and #pragma can be mapped as separate chunks,
 | 
			
		||||
// but storing such long strings in a trie increases its memory usage.
 | 
			
		||||
struct ChunkCompilerDirective final : ChunkLexerBase {
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    const char* str_begin = lex->c_str();
 | 
			
		||||
 | 
			
		||||
    lex->skip_chars(1);
 | 
			
		||||
    while (std::isalnum(lex->char_at())) {
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string_view str_val(str_begin, lex->c_str() - str_begin);
 | 
			
		||||
    if (str_val == "#include") {
 | 
			
		||||
      lex->add_token(tok_include, str_val);
 | 
			
		||||
      return true;
 | 
			
		||||
    }
 | 
			
		||||
    if (str_val == "#pragma") {
 | 
			
		||||
      lex->add_token(tok_pragma, str_val);
 | 
			
		||||
      return true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    lex->error("unknown compiler directive");
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Tokens like !=, &, etc. emit just a simple TokenType.
 | 
			
		||||
// Since they are stored in trie, "parsing" them is just skipping len chars.
 | 
			
		||||
struct ChunkSimpleToken final : ChunkLexerBase {
 | 
			
		||||
  TokenType tp;
 | 
			
		||||
  int len;
 | 
			
		||||
 | 
			
		||||
  ChunkSimpleToken(TokenType tp, int len) : tp(tp), len(len) {}
 | 
			
		||||
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    std::string_view str_val(lex->c_str(), len);
 | 
			
		||||
    lex->add_token(tp, str_val);
 | 
			
		||||
    lex->skip_chars(len);
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Spaces and other space-like symbols are just skipped.
 | 
			
		||||
struct ChunkSkipWhitespace final : ChunkLexerBase {
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    lex->skip_chars(1);
 | 
			
		||||
    lex->skip_spaces();
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Here we handle corner cases of grammar that are requested on demand.
 | 
			
		||||
// E.g., for 'pragma version >0.5.0', '0.5.0' should be parsed specially to emit tok_semver.
 | 
			
		||||
// See TolkLanguageGrammar::parse_next_chunk_special().
 | 
			
		||||
struct ChunkSpecialParsing {
 | 
			
		||||
  static bool parse_pragma_name(Lexer* lex) {
 | 
			
		||||
    const char* str_begin = lex->c_str();
 | 
			
		||||
    while (std::isalnum(lex->char_at()) || lex->char_at() == '-') {
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string_view str_val(str_begin, lex->c_str() - str_begin);
 | 
			
		||||
    if (str_val.empty()) {
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
    ++st;
 | 
			
		||||
    lex->add_token(tok_pragma_name, str_val);
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static bool parse_semver(Lexer* lex) {
 | 
			
		||||
    const char* str_begin = lex->c_str();
 | 
			
		||||
    while (std::isdigit(lex->char_at()) || lex->char_at() == '.') {
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string_view str_val(str_begin, lex->c_str() - str_begin);
 | 
			
		||||
    if (str_val.empty()) {
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
    lex->add_token(tok_semver, str_val);
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Anything starting from a valid identifier beginning symbol is parsed as an identifier.
 | 
			
		||||
// But if a resulting string is a keyword, a corresponding token is emitted instead of tok_identifier.
 | 
			
		||||
struct ChunkIdentifierOrKeyword final : ChunkLexerBase {
 | 
			
		||||
  // having parsed str up to the valid end, look up whether it's a valid keyword
 | 
			
		||||
  // in the future, this could be a bit more effective than just comparing strings (e.g. gperf),
 | 
			
		||||
  // but nevertheless, performance of the naive code below is reasonably good
 | 
			
		||||
  static TokenType maybe_keyword(std::string_view str) {
 | 
			
		||||
    switch (str.size()) {
 | 
			
		||||
      case 1:
 | 
			
		||||
        if (str == "~") return tok_bitwise_not;  // todo attention
 | 
			
		||||
        if (str == "_") return tok_underscore;  // todo attention
 | 
			
		||||
        break;
 | 
			
		||||
      case 2:
 | 
			
		||||
        if (str == "do") return tok_do;
 | 
			
		||||
        if (str == "if") return tok_if;
 | 
			
		||||
        break;
 | 
			
		||||
      case 3:
 | 
			
		||||
        if (str == "int") return tok_int;
 | 
			
		||||
        if (str == "var") return tok_var;
 | 
			
		||||
        if (str == "asm") return tok_asm;
 | 
			
		||||
        if (str == "get") return tok_get;
 | 
			
		||||
        if (str == "try") return tok_try;
 | 
			
		||||
        break;
 | 
			
		||||
      case 4:
 | 
			
		||||
        if (str == "else") return tok_else;
 | 
			
		||||
        if (str == "pure") return tok_pure;
 | 
			
		||||
        if (str == "then") return tok_then;
 | 
			
		||||
        if (str == "cell") return tok_cell;
 | 
			
		||||
        if (str == "cont") return tok_cont;
 | 
			
		||||
        if (str == "type") return tok_type; // todo unused token?
 | 
			
		||||
        break;
 | 
			
		||||
      case 5:
 | 
			
		||||
        if (str == "slice") return tok_slice;
 | 
			
		||||
        if (str == "tuple") return tok_tuple;
 | 
			
		||||
        if (str == "const") return tok_const;
 | 
			
		||||
        if (str == "while") return tok_while;
 | 
			
		||||
        if (str == "until") return tok_until;
 | 
			
		||||
        if (str == "catch") return tok_catch;
 | 
			
		||||
        if (str == "ifnot") return tok_ifnot;
 | 
			
		||||
        break;
 | 
			
		||||
      case 6:
 | 
			
		||||
        if (str == "return") return tok_return;
 | 
			
		||||
        if (str == "repeat") return tok_repeat;
 | 
			
		||||
        if (str == "elseif") return tok_elseif;
 | 
			
		||||
        if (str == "forall") return tok_forall;
 | 
			
		||||
        if (str == "extern") return tok_extern;
 | 
			
		||||
        if (str == "global") return tok_global;
 | 
			
		||||
        if (str == "impure") return tok_impure;
 | 
			
		||||
        if (str == "inline") return tok_inline;
 | 
			
		||||
        break;
 | 
			
		||||
      case 7:
 | 
			
		||||
        if (str == "builder") return tok_builder;
 | 
			
		||||
        if (str == "builtin") return tok_builtin;
 | 
			
		||||
        break;
 | 
			
		||||
      case 8:
 | 
			
		||||
        if (str == "operator") return tok_operator;
 | 
			
		||||
        break;
 | 
			
		||||
      case 9:
 | 
			
		||||
        if (str == "elseifnot") return tok_elseifnot;
 | 
			
		||||
        if (str == "method_id") return tok_method_id;
 | 
			
		||||
        break;
 | 
			
		||||
      case 10:
 | 
			
		||||
        if (str == "inline_ref") return tok_inlineref;
 | 
			
		||||
        if (str == "auto_apply") return tok_autoapply;
 | 
			
		||||
        break;
 | 
			
		||||
      default:
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    return tok_empty;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    const char* sym_begin = lex->c_str();
 | 
			
		||||
    lex->skip_chars(1);
 | 
			
		||||
    while (!lex->is_eof()) {
 | 
			
		||||
      char c = lex->char_at();
 | 
			
		||||
      // the pattern of valid identifier first symbol is provided in trie, here we test for identifier middle
 | 
			
		||||
      bool allowed_in_identifier = std::isalnum(c) || c == '_' || c == '$' || c == ':' || c == '?' || c == '!' || c == '\'';
 | 
			
		||||
      if (!allowed_in_identifier) {
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string_view str_val(sym_begin, lex->c_str() - sym_begin);
 | 
			
		||||
    if (TokenType kw_tok = maybe_keyword(str_val)) {
 | 
			
		||||
      lex->add_token(kw_tok, str_val);
 | 
			
		||||
    } else {
 | 
			
		||||
      symbols.lookup_add(static_cast<std::string>(str_val));
 | 
			
		||||
      lex->add_token(tok_identifier, str_val);
 | 
			
		||||
    }
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int Lexem::classify() {
 | 
			
		||||
  if (tp != Unknown) {
 | 
			
		||||
    return tp;
 | 
			
		||||
// Like in Kotlin, `backticks` can be used to wrap identifiers (both in declarations/usage, both for vars/functions).
 | 
			
		||||
// E.g.: function `do`() { var `with spaces` = 1; }
 | 
			
		||||
// This could be useful to use reserved names as identifiers (in a probable codegen from TL, for example).
 | 
			
		||||
struct ChunkIdentifierInBackticks final : ChunkLexerBase {
 | 
			
		||||
  bool parse(Lexer* lex) const override {
 | 
			
		||||
    const char* str_begin = lex->c_str();
 | 
			
		||||
    lex->skip_chars(1);
 | 
			
		||||
    while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') {
 | 
			
		||||
      if (std::isspace(lex->char_at())) { // probably, I'll remove this restriction after rewriting symtable and cur_sym_idx
 | 
			
		||||
        lex->error("An identifier can't have a space in its name (even inside backticks)");
 | 
			
		||||
      }
 | 
			
		||||
  sym_idx_t i = symbols.lookup(str);
 | 
			
		||||
  if (i) {
 | 
			
		||||
    assert(str == symbols[i]->str);
 | 
			
		||||
    str = symbols[i]->str;
 | 
			
		||||
    sym_idx_t idx = symbols[i]->idx;
 | 
			
		||||
    tp = (idx < 0 ? -idx : Ident);
 | 
			
		||||
    val = i;
 | 
			
		||||
  } else if (is_number(str)) {
 | 
			
		||||
    tp = Number;
 | 
			
		||||
  } else {
 | 
			
		||||
    tp = 0;
 | 
			
		||||
      lex->skip_chars(1);
 | 
			
		||||
    }
 | 
			
		||||
  if (tp == Unknown) {
 | 
			
		||||
    tp = Ident;
 | 
			
		||||
    val = symbols.lookup(str, 1);
 | 
			
		||||
    if (lex->char_at() != '`') {
 | 
			
		||||
      lex->error("Unclosed backtick `");
 | 
			
		||||
    }
 | 
			
		||||
  return tp;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) {
 | 
			
		||||
  str = _str;
 | 
			
		||||
  loc = _loc;
 | 
			
		||||
  tp = _tp;
 | 
			
		||||
  val = _val;
 | 
			
		||||
  return classify();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Lexer::Lexer(SourceReader& _src, std::string active_chars, std::string quote_chars, std::string multiline_quote)
 | 
			
		||||
    : src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined),
 | 
			
		||||
      multiline_quote(std::move(multiline_quote)) {
 | 
			
		||||
  std::memset(char_class, 0, sizeof(char_class));
 | 
			
		||||
  unsigned char activity = cc::active;
 | 
			
		||||
  for (char c : active_chars) {
 | 
			
		||||
    if (c == ' ') {
 | 
			
		||||
      if (!--activity) {
 | 
			
		||||
        activity = cc::allow_repeat;
 | 
			
		||||
      }
 | 
			
		||||
    } else if ((unsigned)c < 0x80) {
 | 
			
		||||
      char_class[(unsigned)c] |= activity;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  for (int c : quote_chars) {
 | 
			
		||||
    if (c > ' ' && c <= 0x7f) {
 | 
			
		||||
      char_class[(unsigned)c] |= cc::quote_char;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Lexer::set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts) {
 | 
			
		||||
  set_spec(eol_cmt, eol_cmts);
 | 
			
		||||
  set_spec(cmt_op, open_cmts);
 | 
			
		||||
  set_spec(cmt_cl, close_cmts);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Lexer::set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2) {
 | 
			
		||||
  set_spec(eol_cmt2, eol_cmts2);
 | 
			
		||||
  set_spec(cmt_op2, open_cmts2);
 | 
			
		||||
  set_spec(cmt_cl2, close_cmts2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Lexer::start_parsing() {
 | 
			
		||||
  next();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Lexer::set_spec(std::array<int, 3>& arr, std::string setup) {
 | 
			
		||||
  arr[0] = arr[1] = arr[2] = -0x100;
 | 
			
		||||
  std::size_t n = setup.size(), i;
 | 
			
		||||
  for (i = 0; i < n; i++) {
 | 
			
		||||
    if (setup[i] == ' ') {
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
    if (i == n - 1 || setup[i + 1] == ' ') {
 | 
			
		||||
      arr[0] = setup[i];
 | 
			
		||||
    } else if (i == n - 2 || (i < n - 2 && setup[i + 2] == ' ')) {
 | 
			
		||||
      arr[1] = setup[i];
 | 
			
		||||
      arr[2] = setup[++i];
 | 
			
		||||
    } else {
 | 
			
		||||
      while (i < n && setup[i] != ' ') {
 | 
			
		||||
        i++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool Lexer::is_multiline_quote(const char* begin, const char* end) {
 | 
			
		||||
  if (multiline_quote.empty()) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  for (const char& c : multiline_quote) {
 | 
			
		||||
    if (begin == end || *begin != c) {
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
    ++begin;
 | 
			
		||||
  }
 | 
			
		||||
    std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1);
 | 
			
		||||
    lex->skip_chars(1);
 | 
			
		||||
    symbols.lookup_add(static_cast<std::string>(str_val));
 | 
			
		||||
    lex->add_token(tok_identifier, str_val);
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Lexer::expect(int exp_tp, const char* msg) {
 | 
			
		||||
  if (tp() != exp_tp) {
 | 
			
		||||
    throw ParseError{lexem.loc, (msg ? std::string{msg} : Lexem::lexem_name_str(exp_tp)) + " expected instead of " +
 | 
			
		||||
                                    cur().name_str()};
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// ----------------------------------------------------------------------
 | 
			
		||||
// Here we define a grammar of Tolk.
 | 
			
		||||
// All valid chunks prefixes are stored in trie.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
struct TolkLanguageGrammar {
 | 
			
		||||
  static LexingTrie trie;
 | 
			
		||||
 | 
			
		||||
  static bool parse_next_chunk(Lexer* lex) {
 | 
			
		||||
    const ChunkLexerBase* best = trie.get_deepest(lex->c_str());
 | 
			
		||||
    return best && best->parse(lex);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static bool parse_next_chunk_special(Lexer* lex, TokenType parse_next_as) {
 | 
			
		||||
    switch (parse_next_as) {
 | 
			
		||||
      case tok_pragma_name:
 | 
			
		||||
        return ChunkSpecialParsing::parse_pragma_name(lex);
 | 
			
		||||
      case tok_semver:
 | 
			
		||||
        return ChunkSpecialParsing::parse_semver(lex);
 | 
			
		||||
      default:
 | 
			
		||||
        assert(false);
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void register_token(const char* str, int len, TokenType tp) {
 | 
			
		||||
    trie.add_prefix(str, new ChunkSimpleToken(tp, len));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void init() {
 | 
			
		||||
    trie.add_prefix("//", singleton<ChunkInlineComment>());
 | 
			
		||||
    trie.add_prefix(";;", singleton<ChunkInlineComment>());
 | 
			
		||||
    trie.add_prefix("/*", singleton<ChunkMultilineComment>());
 | 
			
		||||
    trie.add_prefix("{-", singleton<ChunkMultilineComment>());
 | 
			
		||||
    trie.add_prefix(R"(")", singleton<ChunkString>());
 | 
			
		||||
    trie.add_prefix(R"(""")", singleton<ChunkMultilineString>());
 | 
			
		||||
    trie.add_prefix(" ", singleton<ChunkSkipWhitespace>());
 | 
			
		||||
    trie.add_prefix("\t", singleton<ChunkSkipWhitespace>());
 | 
			
		||||
    trie.add_prefix("\r", singleton<ChunkSkipWhitespace>());
 | 
			
		||||
    trie.add_prefix("\n", singleton<ChunkSkipWhitespace>());
 | 
			
		||||
    trie.add_prefix("#", singleton<ChunkCompilerDirective>());
 | 
			
		||||
 | 
			
		||||
    trie.add_pattern("[0-9]", singleton<ChunkNumber>());
 | 
			
		||||
    // todo think of . ~
 | 
			
		||||
    trie.add_pattern("[a-zA-Z_$.~]", singleton<ChunkIdentifierOrKeyword>());
 | 
			
		||||
    trie.add_prefix("`", singleton<ChunkIdentifierInBackticks>());
 | 
			
		||||
 | 
			
		||||
    register_token("+", 1, tok_plus);
 | 
			
		||||
    register_token("-", 1, tok_minus);
 | 
			
		||||
    register_token("*", 1, tok_mul);
 | 
			
		||||
    register_token("/", 1, tok_div);
 | 
			
		||||
    register_token("%", 1, tok_mod);
 | 
			
		||||
    register_token("?", 1, tok_question);
 | 
			
		||||
    register_token(":", 1, tok_colon);
 | 
			
		||||
    register_token(",", 1, tok_comma);
 | 
			
		||||
    register_token(";", 1, tok_semicolon);
 | 
			
		||||
    register_token("(", 1, tok_oppar);
 | 
			
		||||
    register_token(")", 1, tok_clpar);
 | 
			
		||||
    register_token("[", 1, tok_opbracket);
 | 
			
		||||
    register_token("]", 1, tok_clbracket);
 | 
			
		||||
    register_token("{", 1, tok_opbrace);
 | 
			
		||||
    register_token("}", 1, tok_clbrace);
 | 
			
		||||
    register_token("=", 1, tok_assign);
 | 
			
		||||
    register_token("<", 1, tok_lt);
 | 
			
		||||
    register_token(">", 1, tok_gt);
 | 
			
		||||
    register_token("&", 1, tok_bitwise_and);
 | 
			
		||||
    register_token("|", 1, tok_bitwise_or);
 | 
			
		||||
    register_token("^", 1, tok_bitwise_xor);
 | 
			
		||||
    register_token("==", 2, tok_eq);
 | 
			
		||||
    register_token("!=", 2, tok_neq);
 | 
			
		||||
    register_token("<=", 2, tok_leq);
 | 
			
		||||
    register_token(">=", 2, tok_geq);
 | 
			
		||||
    register_token("<<", 2, tok_lshift);
 | 
			
		||||
    register_token(">>", 2, tok_rshift);
 | 
			
		||||
    register_token("~/", 2, tok_divR);
 | 
			
		||||
    register_token("^/", 2, tok_divC);
 | 
			
		||||
    register_token("~%", 2, tok_modR);
 | 
			
		||||
    register_token("^%", 2, tok_modC);
 | 
			
		||||
    register_token("/%", 2, tok_divmod);
 | 
			
		||||
    register_token("+=", 2, tok_set_plus);
 | 
			
		||||
    register_token("-=", 2, tok_set_minus);
 | 
			
		||||
    register_token("*=", 2, tok_set_mul);
 | 
			
		||||
    register_token("/=", 2, tok_set_div);
 | 
			
		||||
    register_token("%=", 2, tok_set_mod);
 | 
			
		||||
    register_token("&=", 2, tok_set_bitwise_and);
 | 
			
		||||
    register_token("|=", 2, tok_set_bitwise_or);
 | 
			
		||||
    register_token("^=", 2, tok_set_bitwise_xor);
 | 
			
		||||
    register_token("->", 2, tok_mapsto);
 | 
			
		||||
    register_token("<=>", 3, tok_spaceship);
 | 
			
		||||
    register_token("~>>", 3, tok_rshiftR);
 | 
			
		||||
    register_token("^>>", 3, tok_rshiftC);
 | 
			
		||||
    register_token("~/=", 3, tok_set_divR);
 | 
			
		||||
    register_token("^/=", 3, tok_set_divC);
 | 
			
		||||
    register_token("~%=", 3, tok_set_modR);
 | 
			
		||||
    register_token("^%=", 3, tok_set_modC);
 | 
			
		||||
    register_token("<<=", 3, tok_set_lshift);
 | 
			
		||||
    register_token(">>=", 3, tok_set_rshift);
 | 
			
		||||
    register_token("~>>=", 4, tok_set_rshiftR);
 | 
			
		||||
    register_token("^>>=", 4, tok_set_rshiftC);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
LexingTrie TolkLanguageGrammar::trie;
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// ----------------------------------------------------------------------
 | 
			
		||||
// The Lexer class is to be used outside (by parser, which constructs AST from tokens).
 | 
			
		||||
// It's streaming. It means, that `next()` parses a next token on demand
 | 
			
		||||
// (instead of parsing all file contents to vector<Token> and iterating over it).
 | 
			
		||||
// Parsing on demand uses effectively less memory.
 | 
			
		||||
// Note, that chunks, being parsed, call `add_token()`, and a chunk may add multiple tokens at once.
 | 
			
		||||
// That's why a small cirlular buffer for tokens is used.
 | 
			
		||||
// `last_token_idx` actually means a number of total tokens added.
 | 
			
		||||
// `cur_token_idx` is a number of returned by `next()`.
 | 
			
		||||
// It's assumed that an input file has already been loaded, its contents is present and won't be deleted
 | 
			
		||||
// (`start`, `cur` and `end`, as well as every Token str_val, points inside file->text).
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
Lexer::Lexer(const SrcFile* file)
 | 
			
		||||
  : file(file)
 | 
			
		||||
  , p_start(file->text.data())
 | 
			
		||||
  , p_end(p_start + file->text.size())
 | 
			
		||||
  , p_next(p_start)
 | 
			
		||||
  , location(file) {
 | 
			
		||||
  next();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const Lexem& Lexer::next() {
 | 
			
		||||
  if (peek_lexem.valid()) {
 | 
			
		||||
    lexem = std::move(peek_lexem);
 | 
			
		||||
    peek_lexem.clear({}, Lexem::Undefined);
 | 
			
		||||
    eof = (lexem.tp == Lexem::Eof);
 | 
			
		||||
    return lexem;
 | 
			
		||||
  }
 | 
			
		||||
  if (eof) {
 | 
			
		||||
    return lexem.clear(src.here(), Lexem::Eof);
 | 
			
		||||
  }
 | 
			
		||||
  long long comm = 1;
 | 
			
		||||
  // the code below is very complicated, because it tried to support one-symbol start/end and nesting
 | 
			
		||||
  // in Tolk, we decided to stop supporting nesting (it was never used in practice and almost impossible for js highlighters)
 | 
			
		||||
  // later on I'll simplify this code (more precisely, rewrite lexer from scratch)
 | 
			
		||||
  while (!src.seek_eof()) {
 | 
			
		||||
    int cc = src.cur_char(), nc = src.next_char();
 | 
			
		||||
    // note, that in practice, [0]-th element is -256, condition for [0]-th is always false
 | 
			
		||||
    // todo rewrite this all in the future
 | 
			
		||||
    if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2]) || cc == eol_cmt2[0] || (cc == eol_cmt2[1] && nc == eol_cmt2[2])) {
 | 
			
		||||
      if (comm == 1) {    // just "//" — skip a whole line
 | 
			
		||||
        src.load_line();
 | 
			
		||||
      } else {            // if "//" is nested into "/*", continue reading, since "*/" may be met
 | 
			
		||||
        src.advance(1);
 | 
			
		||||
      }
 | 
			
		||||
    } else if (cc == cmt_op[1] && nc == cmt_op[2] || cc == cmt_op2[1] && nc == cmt_op2[2]) {
 | 
			
		||||
      src.advance(2);
 | 
			
		||||
      comm = comm * 2 + 1;
 | 
			
		||||
    } else if (cc == cmt_op[0] || cc == cmt_op2[0]) {  // always false
 | 
			
		||||
      src.advance(1);
 | 
			
		||||
      comm *= 2;
 | 
			
		||||
    } else if (comm == 1) {
 | 
			
		||||
      break; // means that we are not inside a comment
 | 
			
		||||
    } else if (cc == cmt_cl[1] && nc == cmt_cl[2] || cc == cmt_cl2[1] && nc == cmt_cl2[2]) {
 | 
			
		||||
      if (!(comm & 1)) { // always false
 | 
			
		||||
        src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] +
 | 
			
		||||
                  "`");
 | 
			
		||||
      }
 | 
			
		||||
      // note that {- may be closed with */, but assume it's ok (we'll get rid of {- in the future)
 | 
			
		||||
      comm = 1;
 | 
			
		||||
      src.advance(2);
 | 
			
		||||
    } else if (cc == cmt_cl[0] || cc == cmt_cl2[0]) { // always false
 | 
			
		||||
      if (!(comm & 1)) {
 | 
			
		||||
        src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] +
 | 
			
		||||
                  "`");
 | 
			
		||||
      }
 | 
			
		||||
      comm = 1;
 | 
			
		||||
      src.advance(1);
 | 
			
		||||
    } else {
 | 
			
		||||
      src.advance(1);
 | 
			
		||||
    }
 | 
			
		||||
    if (comm < 0) {
 | 
			
		||||
      src.error("too many nested comments");
 | 
			
		||||
void Lexer::next() {
 | 
			
		||||
  while (cur_token_idx == last_token_idx && !is_eof()) {
 | 
			
		||||
    update_location();
 | 
			
		||||
    if (!TolkLanguageGrammar::parse_next_chunk(this)) {
 | 
			
		||||
      error("Failed to parse");
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  if (src.seek_eof()) {
 | 
			
		||||
    eof = true;
 | 
			
		||||
    if (comm > 1) {
 | 
			
		||||
      src.error("comment extends past end of file");
 | 
			
		||||
  if (is_eof()) {
 | 
			
		||||
    add_token(tok_eof, file->text);
 | 
			
		||||
  }
 | 
			
		||||
    return lexem.clear(src.here(), Lexem::Eof);
 | 
			
		||||
  }
 | 
			
		||||
  if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) {
 | 
			
		||||
    src.advance(multiline_quote.size());
 | 
			
		||||
    const char* end = nullptr;
 | 
			
		||||
    SrcLocation here = src.here();
 | 
			
		||||
    std::string body;
 | 
			
		||||
    while (!src.is_eof()) {
 | 
			
		||||
      if (src.is_eoln()) {
 | 
			
		||||
        body.push_back('\n');
 | 
			
		||||
        src.load_line();
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) {
 | 
			
		||||
        end = src.get_ptr();
 | 
			
		||||
        src.advance(multiline_quote.size());
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      body.push_back(src.cur_char());
 | 
			
		||||
      src.advance(1);
 | 
			
		||||
    }
 | 
			
		||||
    if (!end) {
 | 
			
		||||
      src.error("string extends past end of file");
 | 
			
		||||
    }
 | 
			
		||||
    lexem.set(body, here, Lexem::String);
 | 
			
		||||
    int c = src.cur_char();
 | 
			
		||||
    if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
 | 
			
		||||
      lexem.val = c;
 | 
			
		||||
      src.advance(1);
 | 
			
		||||
    }
 | 
			
		||||
    return lexem;
 | 
			
		||||
  }
 | 
			
		||||
  int c = src.cur_char();
 | 
			
		||||
  const char* end = src.get_ptr();
 | 
			
		||||
  if (is_quote_char(c) || c == '`') {
 | 
			
		||||
    int qc = c;
 | 
			
		||||
    ++end;
 | 
			
		||||
    while (end < src.get_end_ptr() && *end != qc) {
 | 
			
		||||
      ++end;
 | 
			
		||||
    }
 | 
			
		||||
    if (*end != qc) {
 | 
			
		||||
      src.error(qc == '`' ? "a `back-quoted` token extends past end of line" : "string extends past end of line");
 | 
			
		||||
    }
 | 
			
		||||
    lexem.set(std::string{src.get_ptr() + 1, end}, src.here(), qc == '`' ? Lexem::Unknown : Lexem::String);
 | 
			
		||||
    src.set_ptr(end + 1);
 | 
			
		||||
    c = src.cur_char();
 | 
			
		||||
    if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
 | 
			
		||||
      lexem.val = c;
 | 
			
		||||
      src.set_ptr(end + 2);
 | 
			
		||||
    }
 | 
			
		||||
    // std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl;
 | 
			
		||||
    return lexem;
 | 
			
		||||
  }
 | 
			
		||||
  int len = 0, pc = -0x100;
 | 
			
		||||
  while (end < src.get_end_ptr()) {
 | 
			
		||||
    c = *end;
 | 
			
		||||
    bool repeated = (c == pc && is_repeatable(c));
 | 
			
		||||
    if (c == ' ' || c == 9 || (len && is_left_active(c) && !repeated)) {
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    ++len;
 | 
			
		||||
    ++end;
 | 
			
		||||
    if (is_right_active(c) && !repeated) {
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
    pc = c;
 | 
			
		||||
  }
 | 
			
		||||
  lexem.set(std::string{src.get_ptr(), end}, src.here());
 | 
			
		||||
  src.set_ptr(end);
 | 
			
		||||
  // std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl;
 | 
			
		||||
  return lexem;
 | 
			
		||||
  cur_token = tokens_circularbuf[++cur_token_idx & 7];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const Lexem& Lexer::peek() {
 | 
			
		||||
  if (peek_lexem.valid()) {
 | 
			
		||||
    return peek_lexem;
 | 
			
		||||
void Lexer::next_special(TokenType parse_next_as, const char* str_expected) {
 | 
			
		||||
  assert(cur_token_idx == last_token_idx);
 | 
			
		||||
  skip_spaces();
 | 
			
		||||
  update_location();
 | 
			
		||||
  if (!TolkLanguageGrammar::parse_next_chunk_special(this, parse_next_as)) {
 | 
			
		||||
    error(std::string(str_expected) + " expected");
 | 
			
		||||
  }
 | 
			
		||||
  cur_token = tokens_circularbuf[++cur_token_idx & 7];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int Lexer::cur_sym_idx() const {
 | 
			
		||||
  assert(tok() == tok_identifier);
 | 
			
		||||
  return symbols.lookup_add(cur_str_std_string());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Lexer::error(const std::string& err_msg) const {
 | 
			
		||||
  throw ParseError(cur_location(), err_msg);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Lexer::error_at(const std::string& prefix, const std::string& suffix) const {
 | 
			
		||||
  throw ParseError(cur_location(), prefix + cur_str_std_string() + suffix);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Lexer::on_expect_call_failed(const char* str_expected) const {
 | 
			
		||||
  throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + cur_str_std_string() + "`");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void lexer_init() {
 | 
			
		||||
  TolkLanguageGrammar::init();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// todo #ifdef TOLK_PROFILING
 | 
			
		||||
// As told above, `next()` produces tokens on demand, while AST is being generated.
 | 
			
		||||
// Hence, it's difficult to measure Lexer performance separately.
 | 
			
		||||
// This function can be called just to tick Lexer performance, it just scans all input files.
 | 
			
		||||
// There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful.
 | 
			
		||||
void lexer_measure_performance(const std::vector<SrcFile*>& files_to_just_parse) {
 | 
			
		||||
  for (const SrcFile* file : files_to_just_parse) {
 | 
			
		||||
    Lexer lex(file);
 | 
			
		||||
    while (!lex.is_eof()) {
 | 
			
		||||
      lex.next();
 | 
			
		||||
    }
 | 
			
		||||
  if (eof) {
 | 
			
		||||
    return lexem.clear(src.here(), Lexem::Eof);
 | 
			
		||||
  }
 | 
			
		||||
  Lexem keep = std::move(lexem);
 | 
			
		||||
  next();
 | 
			
		||||
  peek_lexem = std::move(lexem);
 | 
			
		||||
  lexem = std::move(keep);
 | 
			
		||||
  eof = false;
 | 
			
		||||
  return peek_lexem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace tolk
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										293
									
								
								tolk/lexer.h
									
										
									
									
									
								
							
							
						
						
									
										293
									
								
								tolk/lexer.h
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -15,104 +15,225 @@
 | 
			
		|||
    along with TON Blockchain Library.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
#pragma once
 | 
			
		||||
#include "srcread.h"
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <cstring>
 | 
			
		||||
 | 
			
		||||
#include "platform-utils.h"
 | 
			
		||||
#include "src-file.h"
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
namespace tolk {
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *
 | 
			
		||||
 *   LEXER
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
enum TokenType {
 | 
			
		||||
  tok_empty,
 | 
			
		||||
 | 
			
		||||
struct Lexem {
 | 
			
		||||
  enum { Undefined = -2, Eof = -1, Unknown = 0, Ident = 0, Number = 1, Special = 2, String = 3 };
 | 
			
		||||
  int tp;
 | 
			
		||||
  int val;
 | 
			
		||||
  std::string str;
 | 
			
		||||
  SrcLocation loc;
 | 
			
		||||
  int classify();
 | 
			
		||||
  Lexem(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0)
 | 
			
		||||
      : tp(_tp), val(_val), str(_str), loc(_loc) {
 | 
			
		||||
    classify();
 | 
			
		||||
  }
 | 
			
		||||
  int set(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0);
 | 
			
		||||
  Lexem& clear(const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) {
 | 
			
		||||
    tp = _tp;
 | 
			
		||||
    val = _val;
 | 
			
		||||
    loc = _loc;
 | 
			
		||||
    str = "";
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
  bool valid() const {
 | 
			
		||||
    return tp != Undefined;
 | 
			
		||||
  }
 | 
			
		||||
  std::string name_str() const;
 | 
			
		||||
  void error(std::string _str) const {
 | 
			
		||||
    throw ParseError{loc, _str};
 | 
			
		||||
  }
 | 
			
		||||
  void error_at(std::string str1, std::string str2) const {
 | 
			
		||||
    error(str1 + str + str2);
 | 
			
		||||
  }
 | 
			
		||||
  tok_int_const,
 | 
			
		||||
  tok_string_const,
 | 
			
		||||
  tok_string_modifier,
 | 
			
		||||
 | 
			
		||||
  static std::string lexem_name_str(int idx);
 | 
			
		||||
  tok_identifier,
 | 
			
		||||
 | 
			
		||||
  tok_plus,
 | 
			
		||||
  tok_minus,
 | 
			
		||||
  tok_mul,
 | 
			
		||||
  tok_div,
 | 
			
		||||
  tok_mod,
 | 
			
		||||
  tok_question,
 | 
			
		||||
  tok_colon,
 | 
			
		||||
  tok_comma,
 | 
			
		||||
  tok_semicolon,
 | 
			
		||||
  tok_oppar,
 | 
			
		||||
  tok_clpar,
 | 
			
		||||
  tok_opbracket,
 | 
			
		||||
  tok_clbracket,
 | 
			
		||||
  tok_opbrace,
 | 
			
		||||
  tok_clbrace,
 | 
			
		||||
  tok_assign,
 | 
			
		||||
  tok_underscore,
 | 
			
		||||
  tok_lt,
 | 
			
		||||
  tok_gt,
 | 
			
		||||
  tok_bitwise_and,
 | 
			
		||||
  tok_bitwise_or,
 | 
			
		||||
  tok_bitwise_xor,
 | 
			
		||||
  tok_bitwise_not,
 | 
			
		||||
  tok_dot,
 | 
			
		||||
 | 
			
		||||
  tok_eq,
 | 
			
		||||
  tok_neq,
 | 
			
		||||
  tok_leq,
 | 
			
		||||
  tok_geq,
 | 
			
		||||
  tok_spaceship,
 | 
			
		||||
  tok_lshift,
 | 
			
		||||
  tok_rshift,
 | 
			
		||||
  tok_rshiftR,
 | 
			
		||||
  tok_rshiftC,
 | 
			
		||||
  tok_divR,
 | 
			
		||||
  tok_divC,
 | 
			
		||||
  tok_modR,
 | 
			
		||||
  tok_modC,
 | 
			
		||||
  tok_divmod,
 | 
			
		||||
  tok_set_plus,
 | 
			
		||||
  tok_set_minus,
 | 
			
		||||
  tok_set_mul,
 | 
			
		||||
  tok_set_div,
 | 
			
		||||
  tok_set_divR,
 | 
			
		||||
  tok_set_divC,
 | 
			
		||||
  tok_set_mod,
 | 
			
		||||
  tok_set_modR,
 | 
			
		||||
  tok_set_modC,
 | 
			
		||||
  tok_set_lshift,
 | 
			
		||||
  tok_set_rshift,
 | 
			
		||||
  tok_set_rshiftR,
 | 
			
		||||
  tok_set_rshiftC,
 | 
			
		||||
  tok_set_bitwise_and,
 | 
			
		||||
  tok_set_bitwise_or,
 | 
			
		||||
  tok_set_bitwise_xor,
 | 
			
		||||
 | 
			
		||||
  tok_return,
 | 
			
		||||
  tok_var,
 | 
			
		||||
  tok_repeat,
 | 
			
		||||
  tok_do,
 | 
			
		||||
  tok_while,
 | 
			
		||||
  tok_until,
 | 
			
		||||
  tok_try,
 | 
			
		||||
  tok_catch,
 | 
			
		||||
  tok_if,
 | 
			
		||||
  tok_ifnot,
 | 
			
		||||
  tok_then,
 | 
			
		||||
  tok_else,
 | 
			
		||||
  tok_elseif,
 | 
			
		||||
  tok_elseifnot,
 | 
			
		||||
 | 
			
		||||
  tok_int,
 | 
			
		||||
  tok_cell,
 | 
			
		||||
  tok_slice,
 | 
			
		||||
  tok_builder,
 | 
			
		||||
  tok_cont,
 | 
			
		||||
  tok_tuple,
 | 
			
		||||
  tok_type,
 | 
			
		||||
  tok_mapsto,
 | 
			
		||||
  tok_forall,
 | 
			
		||||
 | 
			
		||||
  tok_extern,
 | 
			
		||||
  tok_global,
 | 
			
		||||
  tok_asm,
 | 
			
		||||
  tok_impure,
 | 
			
		||||
  tok_pure,
 | 
			
		||||
  tok_inline,
 | 
			
		||||
  tok_inlineref,
 | 
			
		||||
  tok_builtin,
 | 
			
		||||
  tok_autoapply,
 | 
			
		||||
  tok_method_id,
 | 
			
		||||
  tok_get,
 | 
			
		||||
  tok_operator,
 | 
			
		||||
  tok_infix,
 | 
			
		||||
  tok_infixl,
 | 
			
		||||
  tok_infixr,
 | 
			
		||||
  tok_const,
 | 
			
		||||
 | 
			
		||||
  tok_pragma,
 | 
			
		||||
  tok_pragma_name,
 | 
			
		||||
  tok_semver,
 | 
			
		||||
  tok_include,
 | 
			
		||||
 | 
			
		||||
  tok_eof
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// All tolk language is parsed into tokens.
 | 
			
		||||
// Lexer::next() returns a Token.
 | 
			
		||||
struct Token {
 | 
			
		||||
  TokenType type = tok_empty;
 | 
			
		||||
  std::string_view str_val;
 | 
			
		||||
 | 
			
		||||
  Token() = default;
 | 
			
		||||
  Token(TokenType type, std::string_view str_val): type(type), str_val(str_val) {}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Lexer::next() is a method to be used externally (while parsing tolk file to AST).
 | 
			
		||||
// It's streaming: `next()` parses a token on demand.
 | 
			
		||||
// For comments, see lexer.cpp, a comment above Lexer constructor.
 | 
			
		||||
class Lexer {
 | 
			
		||||
  SourceReader& src;
 | 
			
		||||
  bool eof;
 | 
			
		||||
  Lexem lexem, peek_lexem;
 | 
			
		||||
  unsigned char char_class[128];
 | 
			
		||||
  std::array<int, 3> eol_cmt, cmt_op, cmt_cl;    // for ;; {- -}
 | 
			
		||||
  std::array<int, 3> eol_cmt2, cmt_op2, cmt_cl2; // for // /* */
 | 
			
		||||
  std::string multiline_quote;
 | 
			
		||||
  enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 };
 | 
			
		||||
  Token tokens_circularbuf[8]{};
 | 
			
		||||
  int last_token_idx = -1;
 | 
			
		||||
  int cur_token_idx = -1;
 | 
			
		||||
  Token cur_token;  // = tokens_circularbuf[cur_token_idx & 7]
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  bool eof_found() const {
 | 
			
		||||
    return eof;
 | 
			
		||||
  }
 | 
			
		||||
  explicit Lexer(SourceReader& _src, std::string active_chars = ";,() ~.",
 | 
			
		||||
    std::string quote_chars = "\"", std::string multiline_quote = "\"\"\"");
 | 
			
		||||
  const SrcFile* file;
 | 
			
		||||
  const char *p_start, *p_end, *p_next;
 | 
			
		||||
  SrcLocation location;
 | 
			
		||||
 | 
			
		||||
  void set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts);
 | 
			
		||||
  void set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2);
 | 
			
		||||
  void start_parsing();
 | 
			
		||||
 | 
			
		||||
  const Lexem& next();
 | 
			
		||||
  const Lexem& cur() const {
 | 
			
		||||
    return lexem;
 | 
			
		||||
  }
 | 
			
		||||
  const Lexem& peek();
 | 
			
		||||
  int tp() const {
 | 
			
		||||
    return lexem.tp;
 | 
			
		||||
  }
 | 
			
		||||
  void expect(int exp_tp, const char* msg = 0);
 | 
			
		||||
  int classify_char(unsigned c) const {
 | 
			
		||||
    return c < 0x80 ? char_class[c] : 0;
 | 
			
		||||
  }
 | 
			
		||||
  bool is_active(int c) const {
 | 
			
		||||
    return (classify_char(c) & cc::active) == cc::active;
 | 
			
		||||
  }
 | 
			
		||||
  bool is_left_active(int c) const {
 | 
			
		||||
    return (classify_char(c) & cc::left_active);
 | 
			
		||||
  }
 | 
			
		||||
  bool is_right_active(int c) const {
 | 
			
		||||
    return (classify_char(c) & cc::right_active);
 | 
			
		||||
  }
 | 
			
		||||
  bool is_repeatable(int c) const {
 | 
			
		||||
    return (classify_char(c) & cc::allow_repeat);
 | 
			
		||||
  }
 | 
			
		||||
  bool is_quote_char(int c) const {
 | 
			
		||||
    return (classify_char(c) & cc::quote_char);
 | 
			
		||||
  void update_location() {
 | 
			
		||||
    location.char_offset = static_cast<int>(p_next - p_start);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  void set_spec(std::array<int, 3>& arr, std::string setup);
 | 
			
		||||
  bool is_multiline_quote(const char* begin, const char* end);
 | 
			
		||||
  GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
 | 
			
		||||
  void on_expect_call_failed(const char* str_expected) const;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  explicit Lexer(const SrcFile* file);
 | 
			
		||||
  Lexer(const Lexer&) = delete;
 | 
			
		||||
  Lexer &operator=(const Lexer&) = delete;
 | 
			
		||||
 | 
			
		||||
  void add_token(TokenType type, std::string_view str) {
 | 
			
		||||
    tokens_circularbuf[++last_token_idx & 7] = Token(type, str);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void skip_spaces() {
 | 
			
		||||
    while (std::isspace(*p_next)) {
 | 
			
		||||
      ++p_next;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void skip_line() {
 | 
			
		||||
    while (p_next < p_end && *p_next != '\n' && *p_next != '\r') {
 | 
			
		||||
      ++p_next;
 | 
			
		||||
    }
 | 
			
		||||
    while (*p_next == '\n' || *p_next == '\r') {
 | 
			
		||||
      ++p_next;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void skip_chars(int n) {
 | 
			
		||||
    p_next += n;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  bool is_eof() const {
 | 
			
		||||
    return p_next >= p_end;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  char char_at() const { return *p_next; }
 | 
			
		||||
  char char_at(int shift) const { return *(p_next + shift); }
 | 
			
		||||
  const char* c_str() const { return p_next; }
 | 
			
		||||
 | 
			
		||||
  TokenType tok() const { return cur_token.type; }
 | 
			
		||||
  std::string_view cur_str() const { return cur_token.str_val; }
 | 
			
		||||
  std::string cur_str_std_string() const { return static_cast<std::string>(cur_token.str_val); }
 | 
			
		||||
  SrcLocation cur_location() const { return location; }
 | 
			
		||||
  int cur_sym_idx() const;
 | 
			
		||||
 | 
			
		||||
  void next();
 | 
			
		||||
  void next_special(TokenType parse_next_as, const char* str_expected);
 | 
			
		||||
 | 
			
		||||
  void check(TokenType next_tok, const char* str_expected) const {
 | 
			
		||||
    if (cur_token.type != next_tok) {
 | 
			
		||||
      on_expect_call_failed(str_expected); // unlikely path, not inlined
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  void expect(TokenType next_tok, const char* str_expected) {
 | 
			
		||||
    if (cur_token.type != next_tok) {
 | 
			
		||||
      on_expect_call_failed(str_expected);
 | 
			
		||||
    }
 | 
			
		||||
    next();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
 | 
			
		||||
  void error(const std::string& err_msg) const;
 | 
			
		||||
  GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
 | 
			
		||||
  void error_at(const std::string& prefix, const std::string& suffix) const;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void lexer_init();
 | 
			
		||||
 | 
			
		||||
// todo #ifdef TOLK_PROFILING
 | 
			
		||||
void lexer_measure_performance(const std::vector<SrcFile*>& files_to_just_parse);
 | 
			
		||||
 | 
			
		||||
}  // namespace tolk
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -612,7 +612,7 @@ bool Optimizer::optimize() {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode) {
 | 
			
		||||
  Optimizer opt(std::move(op_list), op_rewrite_comments, mode);
 | 
			
		||||
  Optimizer opt(std::move(op_list), false, mode);
 | 
			
		||||
  opt.optimize();
 | 
			
		||||
  return opt.extract_code();
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										1191
									
								
								tolk/parse-tolk.cpp
									
										
									
									
									
								
							
							
						
						
									
										1191
									
								
								tolk/parse-tolk.cpp
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										44
									
								
								tolk/platform-utils.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								tolk/platform-utils.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,44 @@
 | 
			
		|||
/*
 | 
			
		||||
    This file is part of TON Blockchain source code.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain is free software; you can redistribute it and/or
 | 
			
		||||
    modify it under the terms of the GNU General Public License
 | 
			
		||||
    as published by the Free Software Foundation; either version 2
 | 
			
		||||
    of the License, or (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with TON Blockchain.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
    In addition, as a special exception, the copyright holders give permission
 | 
			
		||||
    to link the code of portions of this program with the OpenSSL library.
 | 
			
		||||
    You must obey the GNU General Public License in all respects for all
 | 
			
		||||
    of the code used other than OpenSSL. If you modify file(s) with this
 | 
			
		||||
    exception, you may extend this exception to your version of the file(s),
 | 
			
		||||
    but you are not obligated to do so. If you do not wish to do so, delete this
 | 
			
		||||
    exception statement from your version. If you delete this exception statement
 | 
			
		||||
    from all source files in the program, then also delete it here.
 | 
			
		||||
*/
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#if __GNUC__
 | 
			
		||||
#define GNU_ATTRIBUTE_COLD [[gnu::cold]]
 | 
			
		||||
#define GNU_ATTRIBUTE_NORETURN [[gnu::noreturn]]
 | 
			
		||||
#define GNU_ATTRIBUTE_ALWAYS_INLINE [[gnu::always_inline]]
 | 
			
		||||
#else
 | 
			
		||||
#define GNU_ATTRIBUTE_COLD
 | 
			
		||||
#define GNU_ATTRIBUTE_NORETURN [[noreturn]]
 | 
			
		||||
#define GNU_ATTRIBUTE_ALWAYS_INLINE
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(__GNUC__)
 | 
			
		||||
#define LIKELY(x) __builtin_expect(x, true)
 | 
			
		||||
#define UNLIKELY(x) __builtin_expect(x, false)
 | 
			
		||||
#else
 | 
			
		||||
#define LIKELY(x) (x)
 | 
			
		||||
#define UNLIKELY(x) (x)
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										164
									
								
								tolk/src-file.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										164
									
								
								tolk/src-file.cpp
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,164 @@
 | 
			
		|||
/*
 | 
			
		||||
    This file is part of TON Blockchain Library.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU Lesser General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU Lesser General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU Lesser General Public License
 | 
			
		||||
    along with TON Blockchain Library.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
#include "src-file.h"
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
namespace tolk {
 | 
			
		||||
 | 
			
		||||
extern AllRegisteredSrcFiles all_src_files;
 | 
			
		||||
extern std::string stdlib_filename;
 | 
			
		||||
 | 
			
		||||
static_assert(sizeof(SrcLocation) == 8);
 | 
			
		||||
 | 
			
		||||
const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const {
 | 
			
		||||
  for (const SrcFile* file : all_src_files) {
 | 
			
		||||
    if (file->file_id == file_id) {
 | 
			
		||||
      return file;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const {
 | 
			
		||||
  for (const SrcFile* file : all_src_files) {
 | 
			
		||||
    if (file->abs_filename == abs_filename) {
 | 
			
		||||
      return file;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const SrcFile* AllRegisteredSrcFiles::register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from) {
 | 
			
		||||
  SrcFile* created = new SrcFile(++last_file_id, rel_filename, abs_filename, std::move(text), included_from);
 | 
			
		||||
  all_src_files.push_back(created);
 | 
			
		||||
  return created;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
bool SrcFile::is_entrypoint_file() const {
 | 
			
		||||
  return file_id == (stdlib_filename.empty() ? 0 : 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool SrcFile::is_offset_valid(int offset) const {
 | 
			
		||||
  return offset >= 0 && offset < static_cast<int>(text.size());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SrcFile::SrcPosition SrcFile::convert_offset(int offset) const {
 | 
			
		||||
  if (!is_offset_valid(offset)) {
 | 
			
		||||
    return SrcPosition{offset, -1, -1, "invalid offset"};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int line_idx = 0;
 | 
			
		||||
  int char_idx = 0;
 | 
			
		||||
  int line_offset = 0;
 | 
			
		||||
  for (int i = 0; i < offset; ++i) {
 | 
			
		||||
    char c = text[i];
 | 
			
		||||
    if (c == '\n') {
 | 
			
		||||
      line_idx++;
 | 
			
		||||
      char_idx = 0;
 | 
			
		||||
      line_offset = i + 1;
 | 
			
		||||
    } else {
 | 
			
		||||
      char_idx++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  size_t line_len = text.size() - line_offset;
 | 
			
		||||
  for (int i = line_offset; i < static_cast<int>(text.size()); ++i) {
 | 
			
		||||
    if (text[i] == '\n') {
 | 
			
		||||
      line_len = i - line_offset;
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::string_view line_str(text.data() + line_offset, line_len);
 | 
			
		||||
  return SrcPosition{offset, line_idx + 1, char_idx + 1, line_str};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const SrcFile* src_file) {
 | 
			
		||||
  return os << (src_file ? src_file->rel_filename : "unknown-location");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const Fatal& fatal) {
 | 
			
		||||
  return os << fatal.what();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const SrcFile* SrcLocation::get_src_file() const {
 | 
			
		||||
  return all_src_files.find_file(file_id);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SrcLocation::show(std::ostream& os) const {
 | 
			
		||||
  const SrcFile* src_file = get_src_file();
 | 
			
		||||
  os << src_file;
 | 
			
		||||
  if (src_file && src_file->is_offset_valid(char_offset)) {
 | 
			
		||||
    SrcFile::SrcPosition pos = src_file->convert_offset(char_offset);
 | 
			
		||||
    os << ':' << pos.line_no <<  ':' << pos.char_no;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SrcLocation::show_context(std::ostream& os) const {
 | 
			
		||||
  const SrcFile* src_file = get_src_file();
 | 
			
		||||
  if (!src_file || !src_file->is_offset_valid(char_offset)) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  SrcFile::SrcPosition pos = src_file->convert_offset(char_offset);
 | 
			
		||||
  os << "  "  << pos.line_str << "\n";
 | 
			
		||||
 | 
			
		||||
  os << "  ";
 | 
			
		||||
  for (int i = 1; i < pos.char_no; ++i) {
 | 
			
		||||
    os << ' ';
 | 
			
		||||
  }
 | 
			
		||||
  os << '^' << "\n";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, SrcLocation loc) {
 | 
			
		||||
  loc.show(os);
 | 
			
		||||
  return os;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SrcLocation::show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const {
 | 
			
		||||
  show(os);
 | 
			
		||||
  if (!err_type.empty()) {
 | 
			
		||||
    os << ": " << err_type;
 | 
			
		||||
  }
 | 
			
		||||
  os << ": " << message << std::endl;
 | 
			
		||||
  show_context(os);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SrcLocation::show_note(const std::string& err_msg) const {
 | 
			
		||||
  show_general_error(std::cerr, err_msg, "note");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SrcLocation::show_warning(const std::string& err_msg) const {
 | 
			
		||||
  show_general_error(std::cerr, err_msg, "warning");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SrcLocation::show_error(const std::string& err_msg) const {
 | 
			
		||||
  show_general_error(std::cerr, err_msg, "error");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const ParseError& error) {
 | 
			
		||||
  error.show(os);
 | 
			
		||||
  return os;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ParseError::show(std::ostream& os) const {
 | 
			
		||||
  os << where << ": error: " << message << std::endl;
 | 
			
		||||
  where.show_context(os);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace tolk
 | 
			
		||||
							
								
								
									
										120
									
								
								tolk/src-file.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								tolk/src-file.h
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,120 @@
 | 
			
		|||
/*
 | 
			
		||||
    This file is part of TON Blockchain Library.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU Lesser General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU Lesser General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU Lesser General Public License
 | 
			
		||||
    along with TON Blockchain Library.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
namespace tolk {
 | 
			
		||||
 | 
			
		||||
struct SrcFile {
 | 
			
		||||
  struct SrcPosition {
 | 
			
		||||
    int offset;
 | 
			
		||||
    int line_no;
 | 
			
		||||
    int char_no;
 | 
			
		||||
    std::string_view line_str;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  int file_id;
 | 
			
		||||
  std::string rel_filename;
 | 
			
		||||
  std::string abs_filename;
 | 
			
		||||
  std::string text;
 | 
			
		||||
  const SrcFile* included_from{nullptr};
 | 
			
		||||
 | 
			
		||||
  SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text, const SrcFile* included_from)
 | 
			
		||||
    : file_id(file_id)
 | 
			
		||||
    , rel_filename(std::move(rel_filename))
 | 
			
		||||
    , abs_filename(std::move(abs_filename))
 | 
			
		||||
    , text(std::move(text))
 | 
			
		||||
    , included_from(included_from) { }
 | 
			
		||||
 | 
			
		||||
  SrcFile(const SrcFile& other) = delete;
 | 
			
		||||
  SrcFile &operator=(const SrcFile&) = delete;
 | 
			
		||||
 | 
			
		||||
  bool is_entrypoint_file() const;
 | 
			
		||||
  bool is_offset_valid(int offset) const;
 | 
			
		||||
  SrcPosition convert_offset(int offset) const;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class AllRegisteredSrcFiles {
 | 
			
		||||
  std::vector<SrcFile*> all_src_files;
 | 
			
		||||
  int last_file_id = -1;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  const SrcFile *find_file(int file_id) const;
 | 
			
		||||
  const SrcFile* find_file(const std::string& abs_filename) const;
 | 
			
		||||
  const SrcFile* register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from);
 | 
			
		||||
  const std::vector<SrcFile*>& get_all_files() const { return all_src_files; }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct Fatal final : std::exception {
 | 
			
		||||
  std::string message;
 | 
			
		||||
 | 
			
		||||
  explicit Fatal(std::string _msg) : message(std::move(_msg)) {
 | 
			
		||||
  }
 | 
			
		||||
  const char* what() const noexcept override {
 | 
			
		||||
    return message.c_str();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const Fatal& fatal);
 | 
			
		||||
 | 
			
		||||
// SrcLocation points to a location (line, column) in some loaded .tolk source SrcFile.
 | 
			
		||||
// Note, that instead of storing src_file, line_no, etc., only 2 ints are stored.
 | 
			
		||||
// The purpose is: sizeof(SrcLocation) == 8, so it's just passed/stored without pointers/refs, just like int64_t.
 | 
			
		||||
// When decoding SrcLocation into human-readable format, it's converted to SrcFile::SrcPosition via offset.
 | 
			
		||||
class SrcLocation {
 | 
			
		||||
  friend class Lexer;
 | 
			
		||||
 | 
			
		||||
  int file_id = -1;       // file_id from AllRegisteredSrcFiles
 | 
			
		||||
  int char_offset = -1;   // offset from SrcFile::text
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  SrcLocation() = default;
 | 
			
		||||
  explicit SrcLocation(const SrcFile* src_file) : file_id(src_file->file_id) {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  bool is_defined() const { return file_id != -1; }
 | 
			
		||||
  const SrcFile* get_src_file() const;
 | 
			
		||||
 | 
			
		||||
  void show(std::ostream& os) const;
 | 
			
		||||
  void show_context(std::ostream& os) const;
 | 
			
		||||
 | 
			
		||||
  void show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const;
 | 
			
		||||
  void show_note(const std::string& err_msg) const;
 | 
			
		||||
  void show_warning(const std::string& err_msg) const;
 | 
			
		||||
  void show_error(const std::string& err_msg) const;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, SrcLocation loc);
 | 
			
		||||
 | 
			
		||||
struct ParseError : std::exception {
 | 
			
		||||
  SrcLocation where;
 | 
			
		||||
  std::string message;
 | 
			
		||||
  ParseError(SrcLocation _where, std::string _msg) : where(_where), message(std::move(_msg)) {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const char* what() const noexcept override {
 | 
			
		||||
    return message.c_str();
 | 
			
		||||
  }
 | 
			
		||||
  void show(std::ostream& os) const;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const ParseError& error);
 | 
			
		||||
 | 
			
		||||
}  // namespace tolk
 | 
			
		||||
							
								
								
									
										228
									
								
								tolk/srcread.cpp
									
										
									
									
									
								
							
							
						
						
									
										228
									
								
								tolk/srcread.cpp
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -1,228 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
    This file is part of TON Blockchain Library.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU Lesser General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU Lesser General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU Lesser General Public License
 | 
			
		||||
    along with TON Blockchain Library.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
#include "srcread.h"
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
 | 
			
		||||
namespace tolk {
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *
 | 
			
		||||
 *   SOURCE FILE READER
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const FileDescr* fdescr) {
 | 
			
		||||
  return os << (fdescr ? (fdescr->is_stdin ? "stdin" : fdescr->filename) : "unknown-location");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const Fatal& fatal) {
 | 
			
		||||
  return os << fatal.get_msg();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char* FileDescr::convert_offset(long offset, long* line_no, long* line_pos, long* line_size) const {
 | 
			
		||||
  long lno = 0, lpos = -1, lsize = 0;
 | 
			
		||||
  const char* lstart = nullptr;
 | 
			
		||||
  if (offset >= 0 && offset < (long)text.size()) {
 | 
			
		||||
    auto it = std::upper_bound(line_offs.begin(), line_offs.end(), offset);
 | 
			
		||||
    lno = it - line_offs.begin();
 | 
			
		||||
    if (lno && it != line_offs.end()) {
 | 
			
		||||
      lsize = it[0] - it[-1];
 | 
			
		||||
      lpos = offset - it[-1];
 | 
			
		||||
      lstart = text.data() + it[-1];
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    lno = (long)line_offs.size();
 | 
			
		||||
  }
 | 
			
		||||
  if (line_no) {
 | 
			
		||||
    *line_no = lno;
 | 
			
		||||
  }
 | 
			
		||||
  if (line_pos) {
 | 
			
		||||
    *line_pos = lpos;
 | 
			
		||||
  }
 | 
			
		||||
  if (line_size) {
 | 
			
		||||
    *line_size = lsize;
 | 
			
		||||
  }
 | 
			
		||||
  return lstart;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char* FileDescr::push_line(std::string new_line) {
 | 
			
		||||
  if (line_offs.empty()) {
 | 
			
		||||
    line_offs.push_back(0);
 | 
			
		||||
  }
 | 
			
		||||
  std::size_t cur_size = text.size();
 | 
			
		||||
  text += new_line;
 | 
			
		||||
  text += '\0';
 | 
			
		||||
  line_offs.push_back((long)text.size());
 | 
			
		||||
  return text.data() + cur_size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SrcLocation::show(std::ostream& os) const {
 | 
			
		||||
  os << fdescr;
 | 
			
		||||
  long line_no, line_pos;
 | 
			
		||||
  if (fdescr && convert_pos(&line_no, &line_pos)) {
 | 
			
		||||
    os << ':' << line_no;
 | 
			
		||||
    if (line_pos >= 0) {
 | 
			
		||||
      os << ':' << (line_pos + 1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool SrcLocation::show_context(std::ostream& os) const {
 | 
			
		||||
  long line_no, line_pos, line_size;
 | 
			
		||||
  if (!fdescr || !convert_pos(&line_no, &line_pos, &line_size)) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  bool skip_left = (line_pos > 200), skip_right = (line_pos + 200u < line_size);
 | 
			
		||||
  const char* here = fdescr->text.data() + char_offs;
 | 
			
		||||
  const char* base = here - line_pos;
 | 
			
		||||
  const char* start = skip_left ? here - 100 : base;
 | 
			
		||||
  const char* end = skip_right ? here + 100 : base + line_size;
 | 
			
		||||
  os << "  ";
 | 
			
		||||
  if (skip_left) {
 | 
			
		||||
    os << "... ";
 | 
			
		||||
  }
 | 
			
		||||
  for (const char* ptr = start; ptr < end; ptr++) {
 | 
			
		||||
    os << (char)*ptr;
 | 
			
		||||
  }
 | 
			
		||||
  if (skip_right) {
 | 
			
		||||
    os << " ...";
 | 
			
		||||
  }
 | 
			
		||||
  os << std::endl;
 | 
			
		||||
  os << "  ";
 | 
			
		||||
  if (skip_left) {
 | 
			
		||||
    os << "... ";
 | 
			
		||||
  }
 | 
			
		||||
  for (const char* ptr = start; ptr < here; ptr++) {
 | 
			
		||||
    char c = *ptr;
 | 
			
		||||
    os << (c == 9 || c == 10 ? c : ' ');
 | 
			
		||||
  }
 | 
			
		||||
  os << '^' << std::endl;
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc) {
 | 
			
		||||
  loc.show(os);
 | 
			
		||||
  return os;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SrcLocation::show_gen_error(std::ostream& os, std::string message, std::string err_type) const {
 | 
			
		||||
  show(os);
 | 
			
		||||
  if (!err_type.empty()) {
 | 
			
		||||
    os << ": " << err_type;
 | 
			
		||||
  }
 | 
			
		||||
  os << ": " << message << std::endl;
 | 
			
		||||
  show_context(os);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const Error& error) {
 | 
			
		||||
  error.show(os);
 | 
			
		||||
  return os;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ParseError::show(std::ostream& os) const {
 | 
			
		||||
  os << where << ": error: " << message << std::endl;
 | 
			
		||||
  where.show_context(os);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SourceReader::SourceReader(std::istream* _is, FileDescr* _fdescr)
 | 
			
		||||
    : ifs(_is), fdescr(_fdescr), loc(_fdescr), eof(false), cur_line_len(0), start(0), cur(0), end(0) {
 | 
			
		||||
  load_line();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SourceReader::set_eof() {
 | 
			
		||||
  if (!eof) {
 | 
			
		||||
    eof = true;
 | 
			
		||||
    start = cur = end = 0;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int SourceReader::skip_spc() {
 | 
			
		||||
  if (!cur) {
 | 
			
		||||
    return 0;
 | 
			
		||||
  }
 | 
			
		||||
  const char* ptr = cur;
 | 
			
		||||
  int res = 0;
 | 
			
		||||
  while (*ptr == ' ' || *ptr == 9) {
 | 
			
		||||
    ++ptr;
 | 
			
		||||
    ++res;
 | 
			
		||||
  }
 | 
			
		||||
  set_ptr(ptr);
 | 
			
		||||
  return res;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool SourceReader::seek_eof() {
 | 
			
		||||
  while (seek_eoln()) {
 | 
			
		||||
    if (!load_line()) {
 | 
			
		||||
      return true;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char* SourceReader::set_ptr(const char* ptr) {
 | 
			
		||||
  if (ptr != cur) {
 | 
			
		||||
    if (ptr < cur || ptr > end) {
 | 
			
		||||
      error("parsing position went outside of line");
 | 
			
		||||
    }
 | 
			
		||||
    loc.char_offs += ptr - cur;
 | 
			
		||||
    cur = ptr;
 | 
			
		||||
  }
 | 
			
		||||
  return ptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool SourceReader::load_line() {
 | 
			
		||||
  if (eof) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  loc.set_eof();
 | 
			
		||||
  if (ifs->eof()) {
 | 
			
		||||
    set_eof();
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  std::getline(*ifs, cur_line);
 | 
			
		||||
  if (ifs->fail()) {
 | 
			
		||||
    set_eof();
 | 
			
		||||
    if (!ifs->eof()) {
 | 
			
		||||
      error("cannot read line from source stream");
 | 
			
		||||
    }
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  std::size_t len = cur_line.size();
 | 
			
		||||
  if (len > 0xffffff) {
 | 
			
		||||
    set_eof();
 | 
			
		||||
    error("line too long");
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  if (len && cur_line.back() == '\r') {
 | 
			
		||||
    // CP/M line breaks support
 | 
			
		||||
    cur_line.pop_back();
 | 
			
		||||
    --len;
 | 
			
		||||
  }
 | 
			
		||||
  cur_line_len = (int)len;
 | 
			
		||||
  if (fdescr) {
 | 
			
		||||
    cur = start = fdescr->push_line(std::move(cur_line));
 | 
			
		||||
    end = start + len;
 | 
			
		||||
    loc.char_offs = (std::size_t)(cur - fdescr->text.data());
 | 
			
		||||
    cur_line.clear();
 | 
			
		||||
  } else {
 | 
			
		||||
    cur = start = cur_line.c_str();
 | 
			
		||||
    end = start + cur_line_len;
 | 
			
		||||
  }
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // namespace tolk
 | 
			
		||||
							
								
								
									
										162
									
								
								tolk/srcread.h
									
										
									
									
									
								
							
							
						
						
									
										162
									
								
								tolk/srcread.h
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -1,162 +0,0 @@
 | 
			
		|||
/*
 | 
			
		||||
    This file is part of TON Blockchain Library.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is free software: you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU Lesser General Public License as published by
 | 
			
		||||
    the Free Software Foundation, either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    TON Blockchain Library is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU Lesser General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU Lesser General Public License
 | 
			
		||||
    along with TON Blockchain Library.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
namespace tolk {
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *
 | 
			
		||||
 *   SOURCE FILE READER
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
struct FileDescr {
 | 
			
		||||
  std::string filename;
 | 
			
		||||
  std::string text;
 | 
			
		||||
  std::vector<long> line_offs;
 | 
			
		||||
  bool is_stdin;
 | 
			
		||||
  bool is_main = false;
 | 
			
		||||
  FileDescr(std::string _fname, bool _stdin = false) : filename(std::move(_fname)), is_stdin(_stdin) {
 | 
			
		||||
  }
 | 
			
		||||
  const char* push_line(std::string new_line);
 | 
			
		||||
  const char* convert_offset(long offset, long* line_no, long* line_pos, long* line_size = nullptr) const;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct Fatal {
 | 
			
		||||
  std::string message;
 | 
			
		||||
  Fatal(std::string _msg) : message(std::move(_msg)) {
 | 
			
		||||
  }
 | 
			
		||||
  std::string get_msg() const {
 | 
			
		||||
    return message;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const Fatal& fatal);
 | 
			
		||||
 | 
			
		||||
struct SrcLocation {
 | 
			
		||||
  const FileDescr* fdescr;
 | 
			
		||||
  long char_offs;
 | 
			
		||||
  SrcLocation() : fdescr(nullptr), char_offs(-1) {
 | 
			
		||||
  }
 | 
			
		||||
  SrcLocation(const FileDescr* _fdescr, long offs = -1) : fdescr(_fdescr), char_offs(-1) {
 | 
			
		||||
  }
 | 
			
		||||
  bool defined() const {
 | 
			
		||||
    return fdescr;
 | 
			
		||||
  }
 | 
			
		||||
  bool eof() const {
 | 
			
		||||
    return char_offs == -1;
 | 
			
		||||
  }
 | 
			
		||||
  void set_eof() {
 | 
			
		||||
    char_offs = -1;
 | 
			
		||||
  }
 | 
			
		||||
  const char* convert_pos(long* line_no, long* line_pos, long* line_size = nullptr) const {
 | 
			
		||||
    return defined() ? fdescr->convert_offset(char_offs, line_no, line_pos, line_size) : nullptr;
 | 
			
		||||
  }
 | 
			
		||||
  void show(std::ostream& os) const;
 | 
			
		||||
  bool show_context(std::ostream& os) const;
 | 
			
		||||
  void show_gen_error(std::ostream& os, std::string message, std::string err_type = "") const;
 | 
			
		||||
  void show_note(std::string err_msg) const {
 | 
			
		||||
    show_gen_error(std::cerr, err_msg, "note");
 | 
			
		||||
  }
 | 
			
		||||
  void show_warning(std::string err_msg) const {
 | 
			
		||||
    show_gen_error(std::cerr, err_msg, "warning");
 | 
			
		||||
  }
 | 
			
		||||
  void show_error(std::string err_msg) const {
 | 
			
		||||
    show_gen_error(std::cerr, err_msg, "error");
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc);
 | 
			
		||||
 | 
			
		||||
struct Error {
 | 
			
		||||
  virtual ~Error() = default;
 | 
			
		||||
  virtual void show(std::ostream& os) const = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const Error& error);
 | 
			
		||||
 | 
			
		||||
struct ParseError : Error {
 | 
			
		||||
  SrcLocation where;
 | 
			
		||||
  std::string message;
 | 
			
		||||
  ParseError(const SrcLocation& _where, std::string _msg) : where(_where), message(_msg) {
 | 
			
		||||
  }
 | 
			
		||||
  ParseError(const SrcLocation* _where, std::string _msg) : message(_msg) {
 | 
			
		||||
    if (_where) {
 | 
			
		||||
      where = *_where;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  ~ParseError() override = default;
 | 
			
		||||
  void show(std::ostream& os) const override;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class SourceReader {
 | 
			
		||||
  std::istream* ifs;
 | 
			
		||||
  FileDescr* fdescr;
 | 
			
		||||
  SrcLocation loc;
 | 
			
		||||
  bool eof;
 | 
			
		||||
  std::string cur_line;
 | 
			
		||||
  int cur_line_len;
 | 
			
		||||
  void set_eof();
 | 
			
		||||
  const char *start, *cur, *end;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  SourceReader(std::istream* _is, FileDescr* _fdescr);
 | 
			
		||||
  bool load_line();
 | 
			
		||||
  bool is_eof() const {
 | 
			
		||||
    return eof;
 | 
			
		||||
  }
 | 
			
		||||
  int is_eoln() const {
 | 
			
		||||
    return cur == end;
 | 
			
		||||
  }
 | 
			
		||||
  int skip_spc();
 | 
			
		||||
  bool seek_eoln() {
 | 
			
		||||
    skip_spc();
 | 
			
		||||
    return is_eoln();
 | 
			
		||||
  }
 | 
			
		||||
  bool seek_eof();
 | 
			
		||||
  const char* cur_line_cstr() const {
 | 
			
		||||
    return cur_line.c_str();
 | 
			
		||||
  }
 | 
			
		||||
  const SrcLocation& here() const {
 | 
			
		||||
    return loc;
 | 
			
		||||
  }
 | 
			
		||||
  char cur_char() const {
 | 
			
		||||
    return *cur;
 | 
			
		||||
  }
 | 
			
		||||
  char next_char() const {
 | 
			
		||||
    return cur[1];
 | 
			
		||||
  }
 | 
			
		||||
  const char* get_ptr() const {
 | 
			
		||||
    return cur;
 | 
			
		||||
  }
 | 
			
		||||
  const char* get_end_ptr() const {
 | 
			
		||||
    return end;
 | 
			
		||||
  }
 | 
			
		||||
  const char* set_ptr(const char* ptr);
 | 
			
		||||
  void advance(int n) {
 | 
			
		||||
    set_ptr(get_ptr() + n);
 | 
			
		||||
  }
 | 
			
		||||
  void error(std::string err_msg) {
 | 
			
		||||
    throw ParseError{loc, err_msg};
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
}  // namespace tolk
 | 
			
		||||
| 
						 | 
				
			
			@ -28,13 +28,19 @@ namespace tolk {
 | 
			
		|||
 | 
			
		||||
int scope_level;
 | 
			
		||||
 | 
			
		||||
SymTable<100003> symbols;
 | 
			
		||||
SymTable symbols;
 | 
			
		||||
 | 
			
		||||
SymDef* sym_def[symbols.hprime + 1];
 | 
			
		||||
SymDef* global_sym_def[symbols.hprime + 1];
 | 
			
		||||
SymDef* sym_def[symbols.SIZE_PRIME + 1];
 | 
			
		||||
SymDef* global_sym_def[symbols.SIZE_PRIME + 1];
 | 
			
		||||
std::vector<std::pair<int, SymDef>> symbol_stack;
 | 
			
		||||
std::vector<SrcLocation> scope_opened_at;
 | 
			
		||||
 | 
			
		||||
Symbol::Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {
 | 
			
		||||
  subclass = this->str[0] == '.'   ? SymbolSubclass::dot_identifier
 | 
			
		||||
             : this->str[0] == '~' ? SymbolSubclass::tilde_identifier
 | 
			
		||||
                                   : SymbolSubclass::undef;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string Symbol::unknown_symbol_name(sym_idx_t i) {
 | 
			
		||||
  if (!i) {
 | 
			
		||||
    return "_";
 | 
			
		||||
| 
						 | 
				
			
			@ -45,57 +51,43 @@ std::string Symbol::unknown_symbol_name(sym_idx_t i) {
 | 
			
		|||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sym_idx_t SymTableBase::gen_lookup(std::string str, int mode, sym_idx_t idx) {
 | 
			
		||||
sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) {
 | 
			
		||||
  unsigned long long h1 = 1, h2 = 1;
 | 
			
		||||
  for (char c : str) {
 | 
			
		||||
    h1 = ((h1 * 239) + (unsigned char)(c)) % p;
 | 
			
		||||
    h2 = ((h2 * 17) + (unsigned char)(c)) % (p - 1);
 | 
			
		||||
    h1 = ((h1 * 239) + (unsigned char)(c)) % SIZE_PRIME;
 | 
			
		||||
    h2 = ((h2 * 17) + (unsigned char)(c)) % (SIZE_PRIME - 1);
 | 
			
		||||
  }
 | 
			
		||||
  ++h2;
 | 
			
		||||
  ++h1;
 | 
			
		||||
  while (true) {
 | 
			
		||||
    if (sym_table[h1]) {
 | 
			
		||||
      if (sym_table[h1]->str == str) {
 | 
			
		||||
    if (sym[h1]) {
 | 
			
		||||
      if (sym[h1]->str == str) {
 | 
			
		||||
        return (mode & 2) ? not_found : sym_idx_t(h1);
 | 
			
		||||
      }
 | 
			
		||||
      h1 += h2;
 | 
			
		||||
      if (h1 > p) {
 | 
			
		||||
        h1 -= p;
 | 
			
		||||
      if (h1 > SIZE_PRIME) {
 | 
			
		||||
        h1 -= SIZE_PRIME;
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      if (!(mode & 1)) {
 | 
			
		||||
        return not_found;
 | 
			
		||||
      }
 | 
			
		||||
      if (def_sym >= ((long long)p * 3) / 4) {
 | 
			
		||||
      if (def_sym >= ((long long)SIZE_PRIME * 3) / 4) {
 | 
			
		||||
        throw SymTableOverflow{def_sym};
 | 
			
		||||
      }
 | 
			
		||||
      sym_table[h1] = std::make_unique<Symbol>(str, idx <= 0 ? sym_idx_t(h1) : -idx);
 | 
			
		||||
      sym[h1] = std::make_unique<Symbol>(static_cast<std::string>(str), idx <= 0 ? sym_idx_t(h1) : -idx);
 | 
			
		||||
      ++def_sym;
 | 
			
		||||
      return sym_idx_t(h1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SymTableBase& SymTableBase::add_keyword(std::string str, sym_idx_t idx) {
 | 
			
		||||
  if (idx <= 0) {
 | 
			
		||||
    idx = ++def_kw;
 | 
			
		||||
  }
 | 
			
		||||
  sym_idx_t res = gen_lookup(str, -1, idx);
 | 
			
		||||
  if (!res) {
 | 
			
		||||
    throw SymTableKwRedef{str};
 | 
			
		||||
  }
 | 
			
		||||
  if (idx < max_kw_idx) {
 | 
			
		||||
    keywords[idx] = res;
 | 
			
		||||
  }
 | 
			
		||||
  return *this;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void open_scope(Lexer& lex) {
 | 
			
		||||
void open_scope(SrcLocation loc) {
 | 
			
		||||
  ++scope_level;
 | 
			
		||||
  scope_opened_at.push_back(lex.cur().loc);
 | 
			
		||||
  scope_opened_at.push_back(loc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void close_scope(Lexer& lex) {
 | 
			
		||||
void close_scope(SrcLocation loc) {
 | 
			
		||||
  if (!scope_level) {
 | 
			
		||||
    throw Fatal{"cannot close the outer scope"};
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -124,24 +116,20 @@ void close_scope(Lexer& lex) {
 | 
			
		|||
  scope_opened_at.pop_back();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SymDef* lookup_symbol(sym_idx_t idx, int flags) {
 | 
			
		||||
SymDef* lookup_symbol(sym_idx_t idx) {
 | 
			
		||||
  if (!idx) {
 | 
			
		||||
    return nullptr;
 | 
			
		||||
  }
 | 
			
		||||
  if ((flags & 1) && sym_def[idx]) {
 | 
			
		||||
  if (sym_def[idx]) {
 | 
			
		||||
    return sym_def[idx];
 | 
			
		||||
  }
 | 
			
		||||
  if ((flags & 2) && global_sym_def[idx]) {
 | 
			
		||||
  if (global_sym_def[idx]) {
 | 
			
		||||
    return global_sym_def[idx];
 | 
			
		||||
  }
 | 
			
		||||
  return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SymDef* lookup_symbol(std::string name, int flags) {
 | 
			
		||||
  return lookup_symbol(symbols.lookup(name), flags);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) {
 | 
			
		||||
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
 | 
			
		||||
  if (!name_idx) {
 | 
			
		||||
    return nullptr;
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -156,7 +144,7 @@ SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocati
 | 
			
		|||
  return found;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) {
 | 
			
		||||
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
 | 
			
		||||
  if (!name_idx) {
 | 
			
		||||
    return nullptr;
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -176,7 +164,7 @@ SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc
 | 
			
		|||
    return found;
 | 
			
		||||
  }
 | 
			
		||||
  found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc);
 | 
			
		||||
  symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx}));
 | 
			
		||||
  symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx, loc}));
 | 
			
		||||
#ifdef TOLK_DEBUG
 | 
			
		||||
  found->sym_name = found->name();
 | 
			
		||||
  symbol_stack.back().second.sym_name = found->name();
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										116
									
								
								tolk/symtable.h
									
										
									
									
									
								
							
							
						
						
									
										116
									
								
								tolk/symtable.h
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -15,8 +15,9 @@
 | 
			
		|||
    along with TON Blockchain Library.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
#pragma once
 | 
			
		||||
#include "srcread.h"
 | 
			
		||||
#include "lexer.h"
 | 
			
		||||
#include "src-file.h"
 | 
			
		||||
#include <functional>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
namespace tolk {
 | 
			
		||||
| 
						 | 
				
			
			@ -29,11 +30,12 @@ namespace tolk {
 | 
			
		|||
 | 
			
		||||
typedef int var_idx_t;
 | 
			
		||||
 | 
			
		||||
enum class SymValKind { _Param, _Var, _Func, _Typename, _GlobVar, _Const };
 | 
			
		||||
 | 
			
		||||
struct SymValBase {
 | 
			
		||||
  enum { _Param, _Var, _Func, _Typename, _GlobVar, _Const };
 | 
			
		||||
  int type;
 | 
			
		||||
  SymValKind kind;
 | 
			
		||||
  int idx;
 | 
			
		||||
  SymValBase(int _type, int _idx) : type(_type), idx(_idx) {
 | 
			
		||||
  SymValBase(SymValKind kind, int idx) : kind(kind), idx(idx) {
 | 
			
		||||
  }
 | 
			
		||||
  virtual ~SymValBase() = default;
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			@ -44,92 +46,69 @@ struct SymValBase {
 | 
			
		|||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
// defined outside this module (by the end user)
 | 
			
		||||
int compute_symbol_subclass(std::string str);  // return 0 if unneeded
 | 
			
		||||
enum class SymbolSubclass {
 | 
			
		||||
  undef = 0,
 | 
			
		||||
  dot_identifier = 1,    // begins with . (a const method)
 | 
			
		||||
  tilde_identifier = 2   // begins with ~ (a non-const method)
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
typedef int sym_idx_t;
 | 
			
		||||
 | 
			
		||||
struct Symbol {
 | 
			
		||||
  std::string str;
 | 
			
		||||
  sym_idx_t idx;
 | 
			
		||||
  int subclass;
 | 
			
		||||
  Symbol(std::string _str, sym_idx_t _idx, int _sc) : str(_str), idx(_idx), subclass(_sc) {
 | 
			
		||||
  }
 | 
			
		||||
  Symbol(std::string _str, sym_idx_t _idx) : str(_str), idx(_idx) {
 | 
			
		||||
    subclass = compute_symbol_subclass(std::move(_str));
 | 
			
		||||
  }
 | 
			
		||||
  SymbolSubclass subclass;
 | 
			
		||||
 | 
			
		||||
  Symbol(std::string str, sym_idx_t idx);
 | 
			
		||||
 | 
			
		||||
  static std::string unknown_symbol_name(sym_idx_t i);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class SymTableBase {
 | 
			
		||||
  unsigned p;
 | 
			
		||||
  std::unique_ptr<Symbol>* sym_table;
 | 
			
		||||
  sym_idx_t def_kw, def_sym;
 | 
			
		||||
class SymTable {
 | 
			
		||||
public:
 | 
			
		||||
  static constexpr int SIZE_PRIME = 100003;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  sym_idx_t def_sym{0};
 | 
			
		||||
  std::unique_ptr<Symbol> sym[SIZE_PRIME + 1];
 | 
			
		||||
  sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0);
 | 
			
		||||
 | 
			
		||||
  static constexpr int max_kw_idx = 10000;
 | 
			
		||||
  sym_idx_t keywords[max_kw_idx];
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  SymTableBase(unsigned p_, std::unique_ptr<Symbol>* sym_table_)
 | 
			
		||||
      : p(p_), sym_table(sym_table_), def_kw(0x100), def_sym(0) {
 | 
			
		||||
    std::memset(keywords, 0, sizeof(keywords));
 | 
			
		||||
  }
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  static constexpr sym_idx_t not_found = 0;
 | 
			
		||||
  SymTableBase& add_keyword(std::string str, sym_idx_t idx = 0);
 | 
			
		||||
  SymTableBase& add_kw_char(char c) {
 | 
			
		||||
    return add_keyword(std::string{c}, c);
 | 
			
		||||
  }
 | 
			
		||||
  sym_idx_t lookup(std::string str, int mode = 0) {
 | 
			
		||||
  sym_idx_t lookup(const std::string_view& str, int mode = 0) {
 | 
			
		||||
    return gen_lookup(str, mode);
 | 
			
		||||
  }
 | 
			
		||||
  sym_idx_t lookup_add(std::string str) {
 | 
			
		||||
  sym_idx_t lookup_add(const std::string& str) {
 | 
			
		||||
    return gen_lookup(str, 1);
 | 
			
		||||
  }
 | 
			
		||||
  Symbol* operator[](sym_idx_t i) const {
 | 
			
		||||
    return sym_table[i].get();
 | 
			
		||||
    return sym[i].get();
 | 
			
		||||
  }
 | 
			
		||||
  bool is_keyword(sym_idx_t i) const {
 | 
			
		||||
    return sym_table[i] && sym_table[i]->idx < 0;
 | 
			
		||||
    return sym[i] && sym[i]->idx < 0;
 | 
			
		||||
  }
 | 
			
		||||
  std::string get_name(sym_idx_t i) const {
 | 
			
		||||
    return sym_table[i] ? sym_table[i]->str : Symbol::unknown_symbol_name(i);
 | 
			
		||||
    return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i);
 | 
			
		||||
  }
 | 
			
		||||
  int get_subclass(sym_idx_t i) const {
 | 
			
		||||
    return sym_table[i] ? sym_table[i]->subclass : 0;
 | 
			
		||||
  SymbolSubclass get_subclass(sym_idx_t i) const {
 | 
			
		||||
    return sym[i] ? sym[i]->subclass : SymbolSubclass::undef;
 | 
			
		||||
  }
 | 
			
		||||
  Symbol* get_keyword(int i) const {
 | 
			
		||||
    return ((unsigned)i < (unsigned)max_kw_idx) ? sym_table[keywords[i]].get() : nullptr;
 | 
			
		||||
    return ((unsigned)i < (unsigned)max_kw_idx) ? sym[keywords[i]].get() : nullptr;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 protected:
 | 
			
		||||
  sym_idx_t gen_lookup(std::string str, int mode = 0, sym_idx_t idx = 0);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <unsigned pp>
 | 
			
		||||
class SymTable : public SymTableBase {
 | 
			
		||||
 public:
 | 
			
		||||
  static constexpr int hprime = pp;
 | 
			
		||||
  static int size() {
 | 
			
		||||
    return pp + 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  std::unique_ptr<Symbol> sym[pp + 1];
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  SymTable() : SymTableBase(pp, sym) {
 | 
			
		||||
  }
 | 
			
		||||
  SymTable& add_keyword(std::string str, sym_idx_t idx = 0) {
 | 
			
		||||
    SymTableBase::add_keyword(str, idx);
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
  SymTable& add_kw_char(char c) {
 | 
			
		||||
    return add_keyword(std::string{c}, c);
 | 
			
		||||
  SymTable() {
 | 
			
		||||
    std::memset(keywords, 0, sizeof(keywords));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct SymTableOverflow {
 | 
			
		||||
  int sym_def;
 | 
			
		||||
  SymTableOverflow(int x) : sym_def(x) {
 | 
			
		||||
  explicit SymTableOverflow(int x) : sym_def(x) {
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -139,7 +118,7 @@ struct SymTableKwRedef {
 | 
			
		|||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
extern SymTable<100003> symbols;
 | 
			
		||||
extern SymTable symbols;
 | 
			
		||||
 | 
			
		||||
extern int scope_level;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -151,7 +130,7 @@ struct SymDef {
 | 
			
		|||
#ifdef TOLK_DEBUG
 | 
			
		||||
  std::string sym_name;
 | 
			
		||||
#endif
 | 
			
		||||
  SymDef(int lvl, sym_idx_t idx, const SrcLocation& _loc = {}, SymValBase* val = 0)
 | 
			
		||||
  SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr)
 | 
			
		||||
      : level(lvl), sym_idx(idx), value(val), loc(_loc) {
 | 
			
		||||
  }
 | 
			
		||||
  bool has_name() const {
 | 
			
		||||
| 
						 | 
				
			
			@ -162,17 +141,16 @@ struct SymDef {
 | 
			
		|||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
extern SymDef* sym_def[symbols.hprime + 1];
 | 
			
		||||
extern SymDef* global_sym_def[symbols.hprime + 1];
 | 
			
		||||
extern SymDef* sym_def[symbols.SIZE_PRIME + 1];
 | 
			
		||||
extern SymDef* global_sym_def[symbols.SIZE_PRIME + 1];
 | 
			
		||||
extern std::vector<std::pair<int, SymDef>> symbol_stack;
 | 
			
		||||
extern std::vector<SrcLocation> scope_opened_at;
 | 
			
		||||
 | 
			
		||||
void open_scope(Lexer& lex);
 | 
			
		||||
void close_scope(Lexer& lex);
 | 
			
		||||
SymDef* lookup_symbol(sym_idx_t idx, int flags = 3);
 | 
			
		||||
SymDef* lookup_symbol(std::string name, int flags = 3);
 | 
			
		||||
void open_scope(SrcLocation loc);
 | 
			
		||||
void close_scope(SrcLocation loc);
 | 
			
		||||
SymDef* lookup_symbol(sym_idx_t idx);
 | 
			
		||||
 | 
			
		||||
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {});
 | 
			
		||||
SymDef* define_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {});
 | 
			
		||||
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, SrcLocation loc = {});
 | 
			
		||||
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc);
 | 
			
		||||
 | 
			
		||||
}  // namespace tolk
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,67 +30,41 @@
 | 
			
		|||
 | 
			
		||||
void usage(const char* progname) {
 | 
			
		||||
  std::cerr
 | 
			
		||||
      << "usage: " << progname
 | 
			
		||||
      << " [-vIAPSR][-O<level>][-i<indent-spc>][-o<output-filename>][-W<boc-filename>] {<filename.tolk> ...}\n"
 | 
			
		||||
         "\tGenerates Fift TVM assembler code from a Tolk source\n"
 | 
			
		||||
         "-I\tEnables interactive mode (parse stdin)\n"
 | 
			
		||||
         "-o<fift-output-filename>\tWrites generated code into specified file instead of stdout\n"
 | 
			
		||||
         "-v\tIncreases verbosity level (extra information output into stderr)\n"
 | 
			
		||||
         "-i<indent>\tSets indentation for the output code (in two-space units)\n"
 | 
			
		||||
         "-A\tPrefix code with `\"Asm.fif\" include` preamble\n"
 | 
			
		||||
      << "usage: " << progname << " [options] <filename.tolk>\n"
 | 
			
		||||
         "\tGenerates Fift TVM assembler code from a .tolk file\n"
 | 
			
		||||
         "-o<fif-filename>\tWrites generated code into specified .fif file instead of stdout\n"
 | 
			
		||||
         "-b<boc-filename>\tGenerate Fift instructions to save TVM bytecode into .boc file\n"
 | 
			
		||||
         "-O<level>\tSets optimization level (2 by default)\n"
 | 
			
		||||
         "-P\tEnvelope code into PROGRAM{ ... }END>c\n"
 | 
			
		||||
         "-S\tInclude stack layout comments in the output code\n"
 | 
			
		||||
         "-R\tInclude operation rewrite comments in the output code\n"
 | 
			
		||||
         "-W<output-boc-file>\tInclude Fift code to serialize and save generated code into specified BoC file. Enables "
 | 
			
		||||
         "-A and -P.\n"
 | 
			
		||||
         "\t-s\tOutput semantic version of Tolk and exit\n"
 | 
			
		||||
         "\t-V<version>\tShow Tolk build information\n";
 | 
			
		||||
         "-S\tDon't include stack layout comments into Fift output\n"
 | 
			
		||||
         "-e\tIncreases verbosity level (extra output into stderr)\n"
 | 
			
		||||
         "-v\tOutput version of Tolk and exit\n";
 | 
			
		||||
  std::exit(2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* const argv[]) {
 | 
			
		||||
  int i;
 | 
			
		||||
  std::string output_filename;
 | 
			
		||||
  while ((i = getopt(argc, argv, "Ahi:Io:O:PRsSvW:V")) != -1) {
 | 
			
		||||
  while ((i = getopt(argc, argv, "o:b:O:Sevh")) != -1) {
 | 
			
		||||
    switch (i) {
 | 
			
		||||
      case 'A':
 | 
			
		||||
        tolk::asm_preamble = true;
 | 
			
		||||
        break;
 | 
			
		||||
      case 'I':
 | 
			
		||||
        tolk::interactive = true;
 | 
			
		||||
        break;
 | 
			
		||||
      case 'i':
 | 
			
		||||
        tolk::indent = std::max(0, atoi(optarg));
 | 
			
		||||
        break;
 | 
			
		||||
      case 'o':
 | 
			
		||||
        output_filename = optarg;
 | 
			
		||||
        break;
 | 
			
		||||
      case 'b':
 | 
			
		||||
        tolk::boc_output_filename = optarg;
 | 
			
		||||
        break;
 | 
			
		||||
      case 'O':
 | 
			
		||||
        tolk::opt_level = std::max(0, atoi(optarg));
 | 
			
		||||
        break;
 | 
			
		||||
      case 'P':
 | 
			
		||||
        tolk::program_envelope = true;
 | 
			
		||||
        break;
 | 
			
		||||
      case 'R':
 | 
			
		||||
        tolk::op_rewrite_comments = true;
 | 
			
		||||
        break;
 | 
			
		||||
      case 'S':
 | 
			
		||||
        tolk::stack_layout_comments = true;
 | 
			
		||||
        tolk::stack_layout_comments = false;
 | 
			
		||||
        break;
 | 
			
		||||
      case 'v':
 | 
			
		||||
      case 'e':
 | 
			
		||||
        ++tolk::verbosity;
 | 
			
		||||
        break;
 | 
			
		||||
      case 'W':
 | 
			
		||||
        tolk::boc_output_filename = optarg;
 | 
			
		||||
        tolk::asm_preamble = tolk::program_envelope = true;
 | 
			
		||||
        break;
 | 
			
		||||
      case 's':
 | 
			
		||||
        std::cout << tolk::tolk_version << "\n";
 | 
			
		||||
        std::exit(0);
 | 
			
		||||
      case 'V':
 | 
			
		||||
        std::cout << "Tolk semantic version: v" << tolk::tolk_version << "\n";
 | 
			
		||||
        std::cout << "Build information: [ Commit: " << GitMetadata::CommitSHA1() << ", Date: " << GitMetadata::CommitDate() << "]\n";
 | 
			
		||||
      case 'v':
 | 
			
		||||
        std::cout << "Tolk compiler v" << tolk::tolk_version << "\n";
 | 
			
		||||
        std::cout << "Build commit: " << GitMetadata::CommitSHA1() << "\n";
 | 
			
		||||
        std::cout << "Build date: " << GitMetadata::CommitDate() << "\n";
 | 
			
		||||
        std::exit(0);
 | 
			
		||||
      case 'h':
 | 
			
		||||
      default:
 | 
			
		||||
| 
						 | 
				
			
			@ -110,13 +84,14 @@ int main(int argc, char* const argv[]) {
 | 
			
		|||
    outs = fs.get();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::vector<std::string> sources;
 | 
			
		||||
 | 
			
		||||
  while (optind < argc) {
 | 
			
		||||
    sources.push_back(std::string(argv[optind++]));
 | 
			
		||||
  if (optind != argc - 1) {
 | 
			
		||||
    std::cerr << "invalid usage: should specify exactly one input file.tolk";
 | 
			
		||||
    return 2;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::string entrypoint_file_name = argv[optind];
 | 
			
		||||
 | 
			
		||||
  tolk::read_callback = tolk::fs_read_callback;
 | 
			
		||||
 | 
			
		||||
  return tolk::tolk_proceed(sources, *outs, std::cerr);
 | 
			
		||||
  return tolk::tolk_proceed(entrypoint_file_name, *outs, std::cerr);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,81 +31,58 @@
 | 
			
		|||
#include "td/utils/Status.h"
 | 
			
		||||
#include <sstream>
 | 
			
		||||
#include <iomanip>
 | 
			
		||||
#include "vm/boc.h"
 | 
			
		||||
 | 
			
		||||
td::Result<std::string> compile_internal(char *config_json) {
 | 
			
		||||
  TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json)))
 | 
			
		||||
  auto &obj = input_json.get_object();
 | 
			
		||||
  td::JsonObject& config = input_json.get_object();
 | 
			
		||||
 | 
			
		||||
  TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false));
 | 
			
		||||
  TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false));
 | 
			
		||||
 | 
			
		||||
  auto &sources_arr = sources_obj.get_array();
 | 
			
		||||
 | 
			
		||||
  std::vector<std::string> sources;
 | 
			
		||||
 | 
			
		||||
  for (auto &item : sources_arr) {
 | 
			
		||||
    sources.push_back(item.get_string().str());
 | 
			
		||||
  }
 | 
			
		||||
  TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2));
 | 
			
		||||
  TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false));
 | 
			
		||||
  TRY_RESULT(entrypoint_file_name, td::get_json_object_string_field(config, "entrypointFileName", false));
 | 
			
		||||
 | 
			
		||||
  tolk::opt_level = std::max(0, opt_level);
 | 
			
		||||
  tolk::program_envelope = true;
 | 
			
		||||
  tolk::verbosity = 0;
 | 
			
		||||
  tolk::indent = 1;
 | 
			
		||||
  tolk::stack_layout_comments = stack_comments;
 | 
			
		||||
 | 
			
		||||
  std::ostringstream outs, errs;
 | 
			
		||||
  auto compile_res = tolk::tolk_proceed(sources, outs, errs);
 | 
			
		||||
 | 
			
		||||
  if (compile_res != 0) {
 | 
			
		||||
    return td::Status::Error(std::string("Tolk compilation error: ") + errs.str());
 | 
			
		||||
  int tolk_res = tolk::tolk_proceed(entrypoint_file_name, outs, errs);
 | 
			
		||||
  if (tolk_res != 0) {
 | 
			
		||||
    return td::Status::Error("Tolk compilation error: " + errs.str());
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false));
 | 
			
		||||
  TRY_RESULT(boc, vm::std_boc_serialize(code_cell));
 | 
			
		||||
  TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/"));
 | 
			
		||||
 | 
			
		||||
  td::JsonBuilder result_json;
 | 
			
		||||
  auto result_obj = result_json.enter_object();
 | 
			
		||||
  result_obj("status", "ok");
 | 
			
		||||
  result_obj("codeBoc", td::base64_encode(boc));
 | 
			
		||||
  result_obj("fiftCode", outs.str());
 | 
			
		||||
  result_obj("codeHashHex", code_cell->get_hash().to_hex());
 | 
			
		||||
  result_obj.leave();
 | 
			
		||||
 | 
			
		||||
  outs.clear();
 | 
			
		||||
  errs.clear();
 | 
			
		||||
  auto obj = result_json.enter_object();
 | 
			
		||||
  obj("status", "ok");
 | 
			
		||||
  obj("fiftCode", fift_res.fiftCode);
 | 
			
		||||
  obj("codeBoc64", fift_res.codeBoc64);
 | 
			
		||||
  obj("codeHashHex", fift_res.codeHashHex);
 | 
			
		||||
  obj.leave();
 | 
			
		||||
 | 
			
		||||
  return result_json.string_builder().as_cslice().str();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Callback used to retrieve additional source files or data.
 | 
			
		||||
///
 | 
			
		||||
/// @param _kind The kind of callback (a string).
 | 
			
		||||
/// @param _data The data for the callback (a string).
 | 
			
		||||
/// @param o_contents A pointer to the contents of the file, if found. Allocated via malloc().
 | 
			
		||||
/// @param o_error A pointer to an error message, if there is one. Allocated via malloc().
 | 
			
		||||
///
 | 
			
		||||
/// The callback implementor must use malloc() to allocate storage for
 | 
			
		||||
/// contents or error. The callback implementor must use free() to free
 | 
			
		||||
/// said storage after tolk_compile returns.
 | 
			
		||||
///
 | 
			
		||||
/// If the callback is not supported, *o_contents and *o_error must be set to NULL.
 | 
			
		||||
typedef void (*CStyleReadFileCallback)(char const* _kind, char const* _data, char** o_contents, char** o_error);
 | 
			
		||||
/// Callback used to retrieve file contents from a "not file system". See tolk-js for implementation.
 | 
			
		||||
/// The callback must fill either destContents or destError.
 | 
			
		||||
/// The implementor must use malloc() for them and use free() after tolk_compile returns.
 | 
			
		||||
typedef void (*CStyleReadFileCallback)(int kind, char const* data, char** destContents, char** destError);
 | 
			
		||||
 | 
			
		||||
tolk::ReadCallback::Callback wrapReadCallback(CStyleReadFileCallback _readCallback)
 | 
			
		||||
{
 | 
			
		||||
  tolk::ReadCallback::Callback readCallback;
 | 
			
		||||
  if (_readCallback) {
 | 
			
		||||
    readCallback = [=](tolk::ReadCallback::Kind _kind, char const* _data) -> td::Result<std::string> {
 | 
			
		||||
      char* contents_c = nullptr;
 | 
			
		||||
      char* error_c = nullptr;
 | 
			
		||||
      _readCallback(tolk::ReadCallback::kindString(_kind).data(), _data, &contents_c, &error_c);
 | 
			
		||||
      if (!contents_c && !error_c) {
 | 
			
		||||
    readCallback = [=](tolk::ReadCallback::Kind kind, char const* data) -> td::Result<std::string> {
 | 
			
		||||
      char* destContents = nullptr;
 | 
			
		||||
      char* destError = nullptr;
 | 
			
		||||
      _readCallback(static_cast<int>(kind), data, &destContents, &destError);
 | 
			
		||||
      if (!destContents && !destError) {
 | 
			
		||||
        return td::Status::Error("Callback not supported");
 | 
			
		||||
      }
 | 
			
		||||
      if (contents_c) {
 | 
			
		||||
        return contents_c;
 | 
			
		||||
      if (destContents) {
 | 
			
		||||
        return destContents;
 | 
			
		||||
      }
 | 
			
		||||
      return td::Status::Error(std::string(error_c));
 | 
			
		||||
      return td::Status::Error(std::string(destError));
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  return readCallback;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										112
									
								
								tolk/tolk.cpp
									
										
									
									
									
								
							
							
						
						
									
										112
									
								
								tolk/tolk.cpp
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -24,18 +24,17 @@
 | 
			
		|||
    from all source files in the program, then also delete it here.
 | 
			
		||||
*/
 | 
			
		||||
#include "tolk.h"
 | 
			
		||||
#include "srcread.h"
 | 
			
		||||
#include "lexer.h"
 | 
			
		||||
#include <getopt.h>
 | 
			
		||||
#include "git.h"
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include "td/utils/port/path.h"
 | 
			
		||||
#include <sys/stat.h>
 | 
			
		||||
 | 
			
		||||
namespace tolk {
 | 
			
		||||
 | 
			
		||||
int verbosity, indent, opt_level = 2;
 | 
			
		||||
bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble;
 | 
			
		||||
bool interactive = false;
 | 
			
		||||
int verbosity = 0, opt_level = 2;
 | 
			
		||||
bool stack_layout_comments = true;
 | 
			
		||||
GlobalPragma pragma_allow_post_modification{"allow-post-modification"};
 | 
			
		||||
GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"};
 | 
			
		||||
GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"};
 | 
			
		||||
| 
						 | 
				
			
			@ -82,23 +81,13 @@ void GlobalPragma::enable(SrcLocation loc) {
 | 
			
		|||
                     ". Please, remove this line from your code.");
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  if (!loc.get_src_file()->is_entrypoint_file()) {
 | 
			
		||||
    // todo generally it's not true; rework pragmas completely
 | 
			
		||||
    loc.show_warning(PSTRING() << "#pragma " << name_ <<
 | 
			
		||||
                     " should be used in the main file only.");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  enabled_ = true;
 | 
			
		||||
  locs_.push_back(std::move(loc));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void GlobalPragma::check_enable_in_libs() {
 | 
			
		||||
  if (locs_.empty()) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  for (const SrcLocation& loc : locs_) {
 | 
			
		||||
    if (loc.fdescr->is_main) {
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  locs_[0].show_warning(PSTRING() << "#pragma " << name_
 | 
			
		||||
                        << " is enabled in included libraries, it may change the behavior of your code. "
 | 
			
		||||
                        << "Add this #pragma to the main source file to suppress this warning.");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) {
 | 
			
		||||
| 
						 | 
				
			
			@ -109,14 +98,19 @@ void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) {
 | 
			
		|||
td::Result<std::string> fs_read_callback(ReadCallback::Kind kind, const char* query) {
 | 
			
		||||
  switch (kind) {
 | 
			
		||||
    case ReadCallback::Kind::ReadFile: {
 | 
			
		||||
      std::ifstream ifs{query};
 | 
			
		||||
      if (ifs.fail()) {
 | 
			
		||||
        auto msg = std::string{"cannot open source file `"} + query + "`";
 | 
			
		||||
        return td::Status::Error(msg);
 | 
			
		||||
      struct stat f_stat;
 | 
			
		||||
      int res = stat(query, &f_stat);
 | 
			
		||||
      if (res != 0) {
 | 
			
		||||
        return td::Status::Error(std::string{"cannot open source file: "} + query);
 | 
			
		||||
      }
 | 
			
		||||
      std::stringstream ss;
 | 
			
		||||
      ss << ifs.rdbuf();
 | 
			
		||||
      return ss.str();
 | 
			
		||||
 | 
			
		||||
      size_t file_size = static_cast<size_t>(f_stat.st_size);
 | 
			
		||||
      std::string str;
 | 
			
		||||
      str.resize(file_size);
 | 
			
		||||
      FILE* f = fopen(query, "r");
 | 
			
		||||
      fread(str.data(), file_size, 1, f);
 | 
			
		||||
      fclose(f);
 | 
			
		||||
      return std::move(str);
 | 
			
		||||
    }
 | 
			
		||||
    case ReadCallback::Kind::Realpath: {
 | 
			
		||||
      return td::realpath(td::CSlice(query));
 | 
			
		||||
| 
						 | 
				
			
			@ -241,7 +235,7 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er
 | 
			
		|||
    } else if (func_val->is_inline_ref()) {
 | 
			
		||||
      modifier = "REF";
 | 
			
		||||
    }
 | 
			
		||||
    outs << std::string(indent * 2, ' ') << name << " PROC" << modifier << ":<{\n";
 | 
			
		||||
    outs << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n";
 | 
			
		||||
    int mode = 0;
 | 
			
		||||
    if (stack_layout_comments) {
 | 
			
		||||
      mode |= Stack::_StkCmt | Stack::_CptStkCmt;
 | 
			
		||||
| 
						 | 
				
			
			@ -255,8 +249,8 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er
 | 
			
		|||
    if (func_val->is_inline() || func_val->is_inline_ref()) {
 | 
			
		||||
      mode |= Stack::_InlineAny;
 | 
			
		||||
    }
 | 
			
		||||
    code.generate_code(outs, mode, indent + 1);
 | 
			
		||||
    outs << std::string(indent * 2, ' ') << "}>\n";
 | 
			
		||||
    code.generate_code(outs, mode, 2);
 | 
			
		||||
    outs << std::string(2, ' ') << "}>\n";
 | 
			
		||||
    if (verbosity >= 2) {
 | 
			
		||||
      errs << "--------------\n";
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			@ -264,13 +258,9 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
int generate_output(std::ostream &outs, std::ostream &errs) {
 | 
			
		||||
  if (asm_preamble) {
 | 
			
		||||
  outs << "\"Asm.fif\" include\n";
 | 
			
		||||
  }
 | 
			
		||||
  outs << "// automatically generated from " << generated_from << std::endl;
 | 
			
		||||
  if (program_envelope) {
 | 
			
		||||
  outs << "PROGRAM{\n";
 | 
			
		||||
  }
 | 
			
		||||
  mark_used_symbols();
 | 
			
		||||
  for (SymDef* func_sym : glob_func) {
 | 
			
		||||
    SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
 | 
			
		||||
| 
						 | 
				
			
			@ -283,7 +273,7 @@ int generate_output(std::ostream &outs, std::ostream &errs) {
 | 
			
		|||
    }
 | 
			
		||||
 | 
			
		||||
    std::string name = symbols.get_name(func_sym->sym_idx);
 | 
			
		||||
    outs << std::string(indent * 2, ' ');
 | 
			
		||||
    outs << std::string(2, ' ');
 | 
			
		||||
    if (func_val->method_id.is_null()) {
 | 
			
		||||
      outs << "DECLPROC " << name << "\n";
 | 
			
		||||
    } else {
 | 
			
		||||
| 
						 | 
				
			
			@ -300,7 +290,7 @@ int generate_output(std::ostream &outs, std::ostream &errs) {
 | 
			
		|||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
    std::string name = symbols.get_name(gvar_sym->sym_idx);
 | 
			
		||||
    outs << std::string(indent * 2, ' ') << "DECLGLOBVAR " << name << "\n";
 | 
			
		||||
    outs << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n";
 | 
			
		||||
  }
 | 
			
		||||
  int errors = 0;
 | 
			
		||||
  for (SymDef* func_sym : glob_func) {
 | 
			
		||||
| 
						 | 
				
			
			@ -310,76 +300,46 @@ int generate_output(std::ostream &outs, std::ostream &errs) {
 | 
			
		|||
    }
 | 
			
		||||
    try {
 | 
			
		||||
      generate_output_func(func_sym, outs, errs);
 | 
			
		||||
    } catch (Error& err) {
 | 
			
		||||
    } catch (ParseError& err) {
 | 
			
		||||
      errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n"
 | 
			
		||||
                << err << std::endl;
 | 
			
		||||
      ++errors;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  if (program_envelope) {
 | 
			
		||||
  outs << "}END>c\n";
 | 
			
		||||
  }
 | 
			
		||||
  if (!boc_output_filename.empty()) {
 | 
			
		||||
    outs << "2 boc+>B \"" << boc_output_filename << "\" B>file\n";
 | 
			
		||||
    outs << "boc>B \"" << boc_output_filename << "\" B>file\n";
 | 
			
		||||
  }
 | 
			
		||||
  return errors;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void output_inclusion_stack(std::ostream &errs) {
 | 
			
		||||
  while (!inclusion_locations.empty()) {
 | 
			
		||||
    SrcLocation loc = inclusion_locations.top();
 | 
			
		||||
    inclusion_locations.pop();
 | 
			
		||||
    if (loc.fdescr) {
 | 
			
		||||
      errs << "note: included from ";
 | 
			
		||||
      loc.show(errs);
 | 
			
		||||
      errs << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int tolk_proceed(const std::vector<std::string> &sources, std::ostream &outs, std::ostream &errs) {
 | 
			
		||||
  if (program_envelope && !indent) {
 | 
			
		||||
    indent = 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  define_keywords();
 | 
			
		||||
int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs) {
 | 
			
		||||
  define_builtins();
 | 
			
		||||
  lexer_init();
 | 
			
		||||
  pragma_allow_post_modification.always_on_and_deprecated("0.5.0");
 | 
			
		||||
  pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0");
 | 
			
		||||
 | 
			
		||||
  int ok = 0, proc = 0;
 | 
			
		||||
  try {
 | 
			
		||||
    for (auto src : sources) {
 | 
			
		||||
      ok += parse_source_file(src.c_str(), {}, true);
 | 
			
		||||
      proc++;
 | 
			
		||||
    }
 | 
			
		||||
    if (interactive) {
 | 
			
		||||
      generated_from += "stdin ";
 | 
			
		||||
      ok += parse_source_stdin();
 | 
			
		||||
      proc++;
 | 
			
		||||
    }
 | 
			
		||||
    if (ok < proc) {
 | 
			
		||||
    bool ok = parse_source_file(entrypoint_file_name.c_str(), {});
 | 
			
		||||
    if (!ok) {
 | 
			
		||||
      throw Fatal{"output code generation omitted because of errors"};
 | 
			
		||||
    }
 | 
			
		||||
    if (!proc) {
 | 
			
		||||
      throw Fatal{"no source files, no output"};
 | 
			
		||||
    }
 | 
			
		||||
    pragma_remove_unused_functions.check_enable_in_libs();
 | 
			
		||||
 | 
			
		||||
    // todo #ifdef TOLK_PROFILING + comment
 | 
			
		||||
    // lexer_measure_performance(all_src_files.get_all_files());
 | 
			
		||||
 | 
			
		||||
    return generate_output(outs, errs);
 | 
			
		||||
  } catch (Fatal& fatal) {
 | 
			
		||||
    errs << "fatal: " << fatal << std::endl;
 | 
			
		||||
    output_inclusion_stack(errs);
 | 
			
		||||
    return 2;
 | 
			
		||||
  } catch (Error& error) {
 | 
			
		||||
  } catch (ParseError& error) {
 | 
			
		||||
    errs << error << std::endl;
 | 
			
		||||
    output_inclusion_stack(errs);
 | 
			
		||||
    return 2;
 | 
			
		||||
  } catch (UnifyError& unif_err) {
 | 
			
		||||
    errs << "fatal: ";
 | 
			
		||||
    unif_err.print_message(errs);
 | 
			
		||||
    errs << std::endl;
 | 
			
		||||
    output_inclusion_stack(errs);
 | 
			
		||||
    return 2;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										244
									
								
								tolk/tolk.h
									
										
									
									
									
								
							
							
						
						
									
										244
									
								
								tolk/tolk.h
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -15,6 +15,7 @@
 | 
			
		|||
    along with TON Blockchain Library.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
*/
 | 
			
		||||
#pragma once
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <set>
 | 
			
		||||
| 
						 | 
				
			
			@ -26,7 +27,7 @@
 | 
			
		|||
#include "common/refcnt.hpp"
 | 
			
		||||
#include "common/bigint.hpp"
 | 
			
		||||
#include "common/refint.h"
 | 
			
		||||
#include "srcread.h"
 | 
			
		||||
#include "src-file.h"
 | 
			
		||||
#include "lexer.h"
 | 
			
		||||
#include "symtable.h"
 | 
			
		||||
#include "td/utils/Status.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -45,104 +46,6 @@ constexpr int optimize_depth = 20;
 | 
			
		|||
 | 
			
		||||
const std::string tolk_version{"0.4.5"};
 | 
			
		||||
 | 
			
		||||
enum Keyword {
 | 
			
		||||
  _Eof = -1,
 | 
			
		||||
  _Ident = 0,
 | 
			
		||||
  _Number,
 | 
			
		||||
  _Special,
 | 
			
		||||
  _String,
 | 
			
		||||
  _Return = 0x80,
 | 
			
		||||
  _Var,
 | 
			
		||||
  _Repeat,
 | 
			
		||||
  _Do,
 | 
			
		||||
  _While,
 | 
			
		||||
  _Until,
 | 
			
		||||
  _Try,
 | 
			
		||||
  _Catch,
 | 
			
		||||
  _If,
 | 
			
		||||
  _Ifnot,
 | 
			
		||||
  _Then,
 | 
			
		||||
  _Else,
 | 
			
		||||
  _Elseif,
 | 
			
		||||
  _Elseifnot,
 | 
			
		||||
  _Eq,
 | 
			
		||||
  _Neq,
 | 
			
		||||
  _Leq,
 | 
			
		||||
  _Geq,
 | 
			
		||||
  _Spaceship,
 | 
			
		||||
  _Lshift,
 | 
			
		||||
  _Rshift,
 | 
			
		||||
  _RshiftR,
 | 
			
		||||
  _RshiftC,
 | 
			
		||||
  _DivR,
 | 
			
		||||
  _DivC,
 | 
			
		||||
  _ModR,
 | 
			
		||||
  _ModC,
 | 
			
		||||
  _DivMod,
 | 
			
		||||
  _PlusLet,
 | 
			
		||||
  _MinusLet,
 | 
			
		||||
  _TimesLet,
 | 
			
		||||
  _DivLet,
 | 
			
		||||
  _DivRLet,
 | 
			
		||||
  _DivCLet,
 | 
			
		||||
  _ModLet,
 | 
			
		||||
  _ModRLet,
 | 
			
		||||
  _ModCLet,
 | 
			
		||||
  _LshiftLet,
 | 
			
		||||
  _RshiftLet,
 | 
			
		||||
  _RshiftRLet,
 | 
			
		||||
  _RshiftCLet,
 | 
			
		||||
  _AndLet,
 | 
			
		||||
  _OrLet,
 | 
			
		||||
  _XorLet,
 | 
			
		||||
  _Int,
 | 
			
		||||
  _Cell,
 | 
			
		||||
  _Slice,
 | 
			
		||||
  _Builder,
 | 
			
		||||
  _Cont,
 | 
			
		||||
  _Tuple,
 | 
			
		||||
  _Type,
 | 
			
		||||
  _Mapsto,
 | 
			
		||||
  _Forall,
 | 
			
		||||
  _Asm,
 | 
			
		||||
  _Impure,
 | 
			
		||||
  _Pure,
 | 
			
		||||
  _Global,
 | 
			
		||||
  _Extern,
 | 
			
		||||
  _Inline,
 | 
			
		||||
  _InlineRef,
 | 
			
		||||
  _Builtin,
 | 
			
		||||
  _AutoApply,
 | 
			
		||||
  _MethodId,
 | 
			
		||||
  _Get,
 | 
			
		||||
  _Operator,
 | 
			
		||||
  _Infix,
 | 
			
		||||
  _Infixl,
 | 
			
		||||
  _Infixr,
 | 
			
		||||
  _Const,
 | 
			
		||||
  _PragmaHashtag,
 | 
			
		||||
  _IncludeHashtag
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void define_keywords();
 | 
			
		||||
 | 
			
		||||
class IdSc {
 | 
			
		||||
  int cls;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  enum { undef = 0, dotid = 1, tildeid = 2 };
 | 
			
		||||
  IdSc(int _cls = undef) : cls(_cls) {
 | 
			
		||||
  }
 | 
			
		||||
  operator int() {
 | 
			
		||||
    return cls;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// symbol subclass:
 | 
			
		||||
// 1 = begins with . (a const method)
 | 
			
		||||
// 2 = begins with ~ (a non-const method)
 | 
			
		||||
// 0 = else
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * 
 | 
			
		||||
 *   TYPE EXPRESSIONS
 | 
			
		||||
| 
						 | 
				
			
			@ -152,13 +55,13 @@ class IdSc {
 | 
			
		|||
struct TypeExpr {
 | 
			
		||||
  enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll } constr;
 | 
			
		||||
  enum AtomicType {
 | 
			
		||||
    _Int = Keyword::_Int,
 | 
			
		||||
    _Cell = Keyword::_Cell,
 | 
			
		||||
    _Slice = Keyword::_Slice,
 | 
			
		||||
    _Builder = Keyword::_Builder,
 | 
			
		||||
    _Cont = Keyword::_Cont,
 | 
			
		||||
    _Tuple = Keyword::_Tuple,
 | 
			
		||||
    _Type = Keyword::_Type
 | 
			
		||||
    _Int = tok_int,
 | 
			
		||||
    _Cell = tok_cell,
 | 
			
		||||
    _Slice = tok_slice,
 | 
			
		||||
    _Builder = tok_builder,
 | 
			
		||||
    _Cont = tok_cont,
 | 
			
		||||
    _Tuple = tok_tuple,
 | 
			
		||||
    _Type = tok_type
 | 
			
		||||
  };
 | 
			
		||||
  int value;
 | 
			
		||||
  int minw, maxw;
 | 
			
		||||
| 
						 | 
				
			
			@ -279,14 +182,18 @@ struct TypeExpr {
 | 
			
		|||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr);
 | 
			
		||||
 | 
			
		||||
struct UnifyError {
 | 
			
		||||
struct UnifyError : std::exception {
 | 
			
		||||
  TypeExpr* te1;
 | 
			
		||||
  TypeExpr* te2;
 | 
			
		||||
  std::string msg;
 | 
			
		||||
  UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(_msg) {
 | 
			
		||||
 | 
			
		||||
  UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(std::move(_msg)) {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void print_message(std::ostream& os) const;
 | 
			
		||||
  std::string message() const;
 | 
			
		||||
  const char* what() const noexcept override {
 | 
			
		||||
    return msg.c_str();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& os, const UnifyError& ue);
 | 
			
		||||
| 
						 | 
				
			
			@ -310,18 +217,13 @@ struct TmpVar {
 | 
			
		|||
  int cls;
 | 
			
		||||
  sym_idx_t name;
 | 
			
		||||
  int coord;
 | 
			
		||||
  std::unique_ptr<SrcLocation> where;
 | 
			
		||||
  std::vector<std::function<void(const SrcLocation &)>> on_modification;
 | 
			
		||||
  bool undefined = false;
 | 
			
		||||
  TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type = 0, SymDef* sym = 0, const SrcLocation* loc = 0);
 | 
			
		||||
  SrcLocation where;
 | 
			
		||||
  std::vector<std::function<void(SrcLocation)>> on_modification;
 | 
			
		||||
 | 
			
		||||
  TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc);
 | 
			
		||||
  void show(std::ostream& os, int omit_idx = 0) const;
 | 
			
		||||
  void dump(std::ostream& os) const;
 | 
			
		||||
  void set_location(const SrcLocation& loc);
 | 
			
		||||
  std::string to_string() const {
 | 
			
		||||
    std::ostringstream s;
 | 
			
		||||
    show(s, 2);
 | 
			
		||||
    return s.str();
 | 
			
		||||
  }
 | 
			
		||||
  void set_location(SrcLocation loc);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct VarDescr {
 | 
			
		||||
| 
						 | 
				
			
			@ -566,25 +468,25 @@ struct Op {
 | 
			
		|||
  std::unique_ptr<Op> block0, block1;
 | 
			
		||||
  td::RefInt256 int_const;
 | 
			
		||||
  std::string str_const;
 | 
			
		||||
  Op(const SrcLocation& _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) {
 | 
			
		||||
  Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) {
 | 
			
		||||
  }
 | 
			
		||||
  Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left)
 | 
			
		||||
  Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left)
 | 
			
		||||
      : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) {
 | 
			
		||||
  }
 | 
			
		||||
  Op(const SrcLocation& _where, OpKind _cl, std::vector<var_idx_t>&& _left)
 | 
			
		||||
  Op(SrcLocation _where, OpKind _cl, std::vector<var_idx_t>&& _left)
 | 
			
		||||
      : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) {
 | 
			
		||||
  }
 | 
			
		||||
  Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left, td::RefInt256 _const)
 | 
			
		||||
  Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left, td::RefInt256 _const)
 | 
			
		||||
      : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) {
 | 
			
		||||
  }
 | 
			
		||||
  Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left, std::string _const)
 | 
			
		||||
  Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left, std::string _const)
 | 
			
		||||
      : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) {
 | 
			
		||||
  }
 | 
			
		||||
  Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left, const std::vector<var_idx_t>& _right,
 | 
			
		||||
  Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left, const std::vector<var_idx_t>& _right,
 | 
			
		||||
     SymDef* _fun = nullptr)
 | 
			
		||||
      : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) {
 | 
			
		||||
  }
 | 
			
		||||
  Op(const SrcLocation& _where, OpKind _cl, std::vector<var_idx_t>&& _left, std::vector<var_idx_t>&& _right,
 | 
			
		||||
  Op(SrcLocation _where, OpKind _cl, std::vector<var_idx_t>&& _left, std::vector<var_idx_t>&& _right,
 | 
			
		||||
     SymDef* _fun = nullptr)
 | 
			
		||||
      : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) {
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -700,8 +602,8 @@ struct CodeBlob {
 | 
			
		|||
    return res;
 | 
			
		||||
  }
 | 
			
		||||
  bool import_params(FormalArgList arg_list);
 | 
			
		||||
  var_idx_t create_var(int cls, TypeExpr* var_type = 0, SymDef* sym = 0, const SrcLocation* loc = 0);
 | 
			
		||||
  var_idx_t create_tmp_var(TypeExpr* var_type = 0, const SrcLocation* loc = 0) {
 | 
			
		||||
  var_idx_t create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation loc);
 | 
			
		||||
  var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) {
 | 
			
		||||
    return create_var(TmpVar::_Tmp, var_type, nullptr, loc);
 | 
			
		||||
  }
 | 
			
		||||
  int split_vars(bool strict = false);
 | 
			
		||||
| 
						 | 
				
			
			@ -712,14 +614,14 @@ struct CodeBlob {
 | 
			
		|||
    cur_ops_stack.push(cur_ops);
 | 
			
		||||
    cur_ops = &new_cur_ops;
 | 
			
		||||
  }
 | 
			
		||||
  void close_blk(const SrcLocation& location) {
 | 
			
		||||
  void close_blk(SrcLocation location) {
 | 
			
		||||
    *cur_ops = std::make_unique<Op>(location, Op::_Nop);
 | 
			
		||||
  }
 | 
			
		||||
  void pop_cur() {
 | 
			
		||||
    cur_ops = cur_ops_stack.top();
 | 
			
		||||
    cur_ops_stack.pop();
 | 
			
		||||
  }
 | 
			
		||||
  void close_pop_cur(const SrcLocation& location) {
 | 
			
		||||
  void close_pop_cur(SrcLocation location) {
 | 
			
		||||
    close_blk(location);
 | 
			
		||||
    pop_cur();
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -730,7 +632,7 @@ struct CodeBlob {
 | 
			
		|||
  void generate_code(AsmOpList& out_list, int mode = 0);
 | 
			
		||||
  void generate_code(std::ostream& os, int mode = 0, int indent = 0);
 | 
			
		||||
 | 
			
		||||
  void on_var_modification(var_idx_t idx, const SrcLocation& here) const {
 | 
			
		||||
  void on_var_modification(var_idx_t idx, SrcLocation here) const {
 | 
			
		||||
    for (auto& f : vars.at(idx).on_modification) {
 | 
			
		||||
      f(here);
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			@ -746,8 +648,8 @@ struct CodeBlob {
 | 
			
		|||
struct SymVal : SymValBase {
 | 
			
		||||
  TypeExpr* sym_type;
 | 
			
		||||
  bool auto_apply{false};
 | 
			
		||||
  SymVal(int _type, int _idx, TypeExpr* _stype = nullptr)
 | 
			
		||||
      : SymValBase(_type, _idx), sym_type(_stype) {
 | 
			
		||||
  SymVal(SymValKind kind, int idx, TypeExpr* sym_type = nullptr)
 | 
			
		||||
      : SymValBase(kind, idx), sym_type(sym_type) {
 | 
			
		||||
  }
 | 
			
		||||
  ~SymVal() override = default;
 | 
			
		||||
  TypeExpr* get_type() const {
 | 
			
		||||
| 
						 | 
				
			
			@ -774,9 +676,9 @@ struct SymValFunc : SymVal {
 | 
			
		|||
#endif
 | 
			
		||||
  ~SymValFunc() override = default;
 | 
			
		||||
  SymValFunc(int val, TypeExpr* _ft, bool marked_as_pure)
 | 
			
		||||
      : SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {}
 | 
			
		||||
      : SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {}
 | 
			
		||||
  SymValFunc(int val, TypeExpr* _ft, std::initializer_list<int> _arg_order, std::initializer_list<int> _ret_order, bool marked_as_pure)
 | 
			
		||||
      : SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) {
 | 
			
		||||
      : SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const std::vector<int>* get_arg_order() const {
 | 
			
		||||
| 
						 | 
				
			
			@ -818,7 +720,7 @@ struct SymValCodeFunc : SymValFunc {
 | 
			
		|||
 | 
			
		||||
struct SymValType : SymValBase {
 | 
			
		||||
  TypeExpr* sym_type;
 | 
			
		||||
  SymValType(int _type, int _idx, TypeExpr* _stype = nullptr) : SymValBase(_type, _idx), sym_type(_stype) {
 | 
			
		||||
  SymValType(SymValKind kind, int idx, TypeExpr* _stype = nullptr) : SymValBase(kind, idx), sym_type(_stype) {
 | 
			
		||||
  }
 | 
			
		||||
  ~SymValType() override = default;
 | 
			
		||||
  TypeExpr* get_type() const {
 | 
			
		||||
| 
						 | 
				
			
			@ -834,7 +736,7 @@ struct SymValGlobVar : SymValBase {
 | 
			
		|||
  std::string name; // seeing variable name in debugger makes it much easier to delve into Tolk sources
 | 
			
		||||
#endif
 | 
			
		||||
  SymValGlobVar(int val, TypeExpr* gvtype, int oidx = 0)
 | 
			
		||||
      : SymValBase(_GlobVar, val), sym_type(gvtype), out_idx(oidx) {
 | 
			
		||||
      : SymValBase(SymValKind::_GlobVar, val), sym_type(gvtype), out_idx(oidx) {
 | 
			
		||||
  }
 | 
			
		||||
  ~SymValGlobVar() override = default;
 | 
			
		||||
  TypeExpr* get_type() const {
 | 
			
		||||
| 
						 | 
				
			
			@ -843,16 +745,16 @@ struct SymValGlobVar : SymValBase {
 | 
			
		|||
};
 | 
			
		||||
 | 
			
		||||
struct SymValConst : SymValBase {
 | 
			
		||||
  enum ConstKind { IntConst, SliceConst };
 | 
			
		||||
 | 
			
		||||
  td::RefInt256 intval;
 | 
			
		||||
  std::string strval;
 | 
			
		||||
  Keyword type;
 | 
			
		||||
  ConstKind kind;
 | 
			
		||||
  SymValConst(int idx, td::RefInt256 value)
 | 
			
		||||
      : SymValBase(_Const, idx), intval(value) {
 | 
			
		||||
    type = _Int;
 | 
			
		||||
      : SymValBase(SymValKind::_Const, idx), intval(value), kind(IntConst) {
 | 
			
		||||
  }
 | 
			
		||||
  SymValConst(int idx, std::string value)
 | 
			
		||||
      : SymValBase(_Const, idx), strval(value) {
 | 
			
		||||
    type = _Slice;
 | 
			
		||||
      : SymValBase(SymValKind::_Const, idx), strval(value), kind(SliceConst) {
 | 
			
		||||
  }
 | 
			
		||||
  ~SymValConst() override = default;
 | 
			
		||||
  td::RefInt256 get_int_value() const {
 | 
			
		||||
| 
						 | 
				
			
			@ -861,8 +763,8 @@ struct SymValConst : SymValBase {
 | 
			
		|||
  std::string get_str_value() const {
 | 
			
		||||
    return strval;
 | 
			
		||||
  }
 | 
			
		||||
  Keyword get_type() const {
 | 
			
		||||
    return type;
 | 
			
		||||
  ConstKind get_kind() const {
 | 
			
		||||
    return kind;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -882,35 +784,21 @@ public:
 | 
			
		|||
  ReadCallback(ReadCallback const&) = delete;
 | 
			
		||||
  ReadCallback& operator=(ReadCallback const&) = delete;
 | 
			
		||||
 | 
			
		||||
  enum class Kind
 | 
			
		||||
  {
 | 
			
		||||
  enum class Kind {
 | 
			
		||||
    Realpath,
 | 
			
		||||
    ReadFile,
 | 
			
		||||
    Realpath
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static std::string kindString(Kind _kind)
 | 
			
		||||
  {
 | 
			
		||||
    switch (_kind)
 | 
			
		||||
    {
 | 
			
		||||
    case Kind::ReadFile:
 | 
			
		||||
      return "source";
 | 
			
		||||
    case Kind::Realpath:
 | 
			
		||||
      return "realpath";
 | 
			
		||||
    default:
 | 
			
		||||
      throw ""; // todo ?
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /// File reading or generic query callback.
 | 
			
		||||
  using Callback = std::function<td::Result<std::string>(ReadCallback::Kind, const char*)>;
 | 
			
		||||
  using Callback = std::function<td::Result<std::string>(Kind, const char*)>;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// defined in parse-tolk.cpp
 | 
			
		||||
bool parse_source(std::istream* is, const FileDescr* fdescr);
 | 
			
		||||
bool parse_source_file(const char* filename, Lexem lex = {}, bool is_main = false);
 | 
			
		||||
bool parse_source_stdin();
 | 
			
		||||
void parse_source(const SrcFile* file);
 | 
			
		||||
bool parse_source_file(const char* filename, SrcLocation loc_included_from);
 | 
			
		||||
 | 
			
		||||
extern std::stack<SrcLocation> inclusion_locations;
 | 
			
		||||
extern AllRegisteredSrcFiles all_src_files;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * 
 | 
			
		||||
| 
						 | 
				
			
			@ -949,7 +837,7 @@ struct Expr {
 | 
			
		|||
  std::vector<Expr*> args;
 | 
			
		||||
  explicit Expr(ExprCls c = _None) : cls(c) {
 | 
			
		||||
  }
 | 
			
		||||
  Expr(ExprCls c, const SrcLocation& loc) : cls(c), here(loc) {
 | 
			
		||||
  Expr(ExprCls c, SrcLocation loc) : cls(c), here(loc) {
 | 
			
		||||
  }
 | 
			
		||||
  Expr(ExprCls c, std::vector<Expr*> _args) : cls(c), args(std::move(_args)) {
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -990,14 +878,13 @@ struct Expr {
 | 
			
		|||
  bool is_mktuple() const {
 | 
			
		||||
    return cls == _MkTuple;
 | 
			
		||||
  }
 | 
			
		||||
  void chk_rvalue(const Lexem& lem) const;
 | 
			
		||||
  void chk_lvalue(const Lexem& lem) const;
 | 
			
		||||
  void chk_type(const Lexem& lem) const;
 | 
			
		||||
  bool deduce_type(const Lexem& lem);
 | 
			
		||||
  void set_location(const SrcLocation& loc) {
 | 
			
		||||
  void chk_rvalue(const Lexer& lex) const;  // todo here and below: strange to pass Lexer
 | 
			
		||||
  void chk_lvalue(const Lexer& lex) const;
 | 
			
		||||
  bool deduce_type(const Lexer& lex);
 | 
			
		||||
  void set_location(SrcLocation loc) {
 | 
			
		||||
    here = loc;
 | 
			
		||||
  }
 | 
			
		||||
  const SrcLocation& get_location() const {
 | 
			
		||||
  SrcLocation get_location() const {
 | 
			
		||||
    return here;
 | 
			
		||||
  }
 | 
			
		||||
  int define_new_vars(CodeBlob& code);
 | 
			
		||||
| 
						 | 
				
			
			@ -1699,11 +1586,11 @@ struct Stack {
 | 
			
		|||
 * 
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
typedef std::function<AsmOp(std::vector<VarDescr>&, std::vector<VarDescr>&, const SrcLocation)> simple_compile_func_t;
 | 
			
		||||
typedef std::function<AsmOp(std::vector<VarDescr>&, std::vector<VarDescr>&, SrcLocation)> simple_compile_func_t;
 | 
			
		||||
typedef std::function<bool(AsmOpList&, std::vector<VarDescr>&, std::vector<VarDescr>&)> compile_func_t;
 | 
			
		||||
 | 
			
		||||
inline simple_compile_func_t make_simple_compile(AsmOp op) {
 | 
			
		||||
  return [op](std::vector<VarDescr>& out, std::vector<VarDescr>& in, const SrcLocation&) -> AsmOp { return op; };
 | 
			
		||||
  return [op](std::vector<VarDescr>& out, std::vector<VarDescr>& in, SrcLocation) -> AsmOp { return op; };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline compile_func_t make_ext_compile(std::vector<AsmOp>&& ops) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1739,7 +1626,7 @@ struct SymValAsmFunc : SymValFunc {
 | 
			
		|||
                std::initializer_list<int> ret_order = {}, bool marked_as_pure = false)
 | 
			
		||||
      : SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), ext_compile(std::move(_compile)) {
 | 
			
		||||
  }
 | 
			
		||||
  bool compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in, const SrcLocation& where) const;
 | 
			
		||||
  bool compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in, SrcLocation where) const;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// defined in builtins.cpp
 | 
			
		||||
| 
						 | 
				
			
			@ -1753,8 +1640,8 @@ AsmOp push_const(td::RefInt256 x);
 | 
			
		|||
void define_builtins();
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
extern int verbosity, indent, opt_level;
 | 
			
		||||
extern bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble, interactive;
 | 
			
		||||
extern int verbosity, opt_level;
 | 
			
		||||
extern bool stack_layout_comments;
 | 
			
		||||
extern std::string generated_from, boc_output_filename;
 | 
			
		||||
extern ReadCallback::Callback read_callback;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1764,6 +1651,7 @@ class GlobalPragma {
 | 
			
		|||
 public:
 | 
			
		||||
  explicit GlobalPragma(std::string name) : name_(std::move(name)) {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const std::string& name() const {
 | 
			
		||||
    return name_;
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -1771,14 +1659,12 @@ class GlobalPragma {
 | 
			
		|||
    return enabled_;
 | 
			
		||||
  }
 | 
			
		||||
  void enable(SrcLocation loc);
 | 
			
		||||
  void check_enable_in_libs();
 | 
			
		||||
  void always_on_and_deprecated(const char *deprecated_from_v);
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  std::string name_;
 | 
			
		||||
  bool enabled_ = false;
 | 
			
		||||
  const char *deprecated_from_v_ = nullptr;
 | 
			
		||||
  std::vector<SrcLocation> locs_;
 | 
			
		||||
};
 | 
			
		||||
extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, pragma_remove_unused_functions;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1788,7 +1674,7 @@ extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, prag
 | 
			
		|||
 *
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
int tolk_proceed(const std::vector<std::string> &sources, std::ostream &outs, std::ostream &errs);
 | 
			
		||||
int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs);
 | 
			
		||||
 | 
			
		||||
}  // namespace tolk
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -354,12 +354,6 @@ std::ostream& operator<<(std::ostream& os, const UnifyError& ue) {
 | 
			
		|||
  return os;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string UnifyError::message() const {
 | 
			
		||||
  std::ostringstream os;
 | 
			
		||||
  print_message(os);
 | 
			
		||||
  return os.str();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void check_width_compat(TypeExpr* te1, TypeExpr* te2) {
 | 
			
		||||
  if (te1->minw > te2->maxw || te2->minw > te1->maxw) {
 | 
			
		||||
    std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate};
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue