1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-02-12 19:22:37 +00:00

[Tolk] Rewrite lexer, spaces are not mandatory anymore

A new lexer is noticeably faster and memory efficient
(although splitting a file to tokens is negligible in a whole pipeline).

But the purpose of rewriting lexer was not just to speed up,
but to allow writing code without spaces:
`2+2` is now 4, not a valid identifier as earlier.

The variety of symbols allowed in identifier has greatly reduced
and is now similar to other languages.

SrcLocation became 8 bytes on stack everywhere.

Command-line flags were also reworked:
- the input for Tolk compiler is only a single file now, it's parsed, and parsing continues while new #include are resolved
- flags like -A -P and so on are no more needed, actually
This commit is contained in:
tolk-vm 2024-10-31 10:59:23 +04:00
parent 0bcc0b3c12
commit f0e6470d0b
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
26 changed files with 2042 additions and 2129 deletions

View file

@ -211,20 +211,39 @@ td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::s
fift_dir); fift_dir);
} }
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir, bool is_raw) { td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code) {
std::stringstream ss; std::stringstream ss;
std::string sb; std::string sb;
sb.reserve(asm_code.size() + 100); sb.reserve(asm_code.size() + 100);
sb.append("\"Asm.fif\" include\n "); sb.append("\"Asm.fif\" include\n <{\n");
sb.append(is_raw ? "<{" : "");
sb.append(asm_code.data(), asm_code.size()); sb.append(asm_code.data(), asm_code.size());
sb.append(is_raw ? "}>c" : ""); sb.append("\n}>c boc>B \"res\" B>file");
sb.append(" boc>B \"res\" B>file");
TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false, fift_dir)); TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false));
TRY_RESULT(res, run_fift(std::move(source_lookup), &ss)); TRY_RESULT(res, run_fift(std::move(source_lookup), &ss));
TRY_RESULT(boc, res.read_file("res")); TRY_RESULT(boc, res.read_file("res"));
return vm::std_boc_deserialize(std::move(boc.data)); return vm::std_boc_deserialize(std::move(boc.data));
} }
td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir) {
std::string main_fif;
main_fif.reserve(program_code.size() + 100);
main_fif.append(program_code.data(), program_code.size());
main_fif.append(R"( dup hashB B>X $>B "hex" B>file)"); // write codeHashHex to a file
main_fif.append(R"( boc>B B>base64 $>B "boc" B>file)"); // write codeBoc64 to a file
std::stringstream fift_output_stream;
TRY_RESULT(source_lookup, create_source_lookup(std::move(main_fif), true, true, false, false, false, false, false, fift_dir));
TRY_RESULT(res, run_fift(std::move(source_lookup), &fift_output_stream));
TRY_RESULT(boc, res.read_file("boc"));
TRY_RESULT(hex, res.read_file("hex"));
return CompiledProgramOutput{
std::move(program_code),
std::move(boc.data),
std::move(hex.data),
};
}
} // namespace fift } // namespace fift

View file

@ -26,11 +26,21 @@ struct FiftOutput {
SourceLookup source_lookup; SourceLookup source_lookup;
std::string output; std::string output;
}; };
// given a valid Fift code PROGRAM{ ... }END>c, compile_asm_program() returns this output
// now it's used primarily for wasm output (see tolk-js, for example)
struct CompiledProgramOutput {
std::string fiftCode;
std::string codeBoc64;
std::string codeHashHex;
};
td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::string fift_dir = "", td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::string fift_dir = "",
bool need_preamble = true, bool need_asm = true, bool need_preamble = true, bool need_asm = true,
bool need_ton_util = true, bool need_lisp = true, bool need_ton_util = true, bool need_lisp = true,
bool need_w3_code = true); bool need_w3_code = true);
td::Result<FiftOutput> mem_run_fift(std::string source, std::vector<std::string> args = {}, std::string fift_dir = ""); td::Result<FiftOutput> mem_run_fift(std::string source, std::vector<std::string> args = {}, std::string fift_dir = "");
td::Result<FiftOutput> mem_run_fift(SourceLookup source_lookup, std::vector<std::string> args); td::Result<FiftOutput> mem_run_fift(SourceLookup source_lookup, std::vector<std::string> args);
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir = "", bool is_raw = true); td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code);
td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir);
} // namespace fift } // namespace fift

View file

@ -37,10 +37,10 @@
td::Result<std::string> compile_internal(char *config_json) { td::Result<std::string> compile_internal(char *config_json) {
TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json)))
auto &obj = input_json.get_object(); td::JsonObject& config = input_json.get_object();
TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false)); TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optLevel", false));
TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false)); TRY_RESULT(sources_obj, td::get_json_object_field(config, "sources", td::JsonValue::Type::Array, false));
auto &sources_arr = sources_obj.get_array(); auto &sources_arr = sources_obj.get_array();
@ -52,29 +52,25 @@ td::Result<std::string> compile_internal(char *config_json) {
funC::opt_level = std::max(0, opt_level); funC::opt_level = std::max(0, opt_level);
funC::program_envelope = true; funC::program_envelope = true;
funC::asm_preamble = true;
funC::verbosity = 0; funC::verbosity = 0;
funC::indent = 1; funC::indent = 1;
std::ostringstream outs, errs; std::ostringstream outs, errs;
auto compile_res = funC::func_proceed(sources, outs, errs); int funC_res = funC::func_proceed(sources, outs, errs);
if (funC_res != 0) {
if (compile_res != 0) { return td::Status::Error("FunC compilation error: " + errs.str());
return td::Status::Error(std::string("Func compilation error: ") + errs.str());
} }
TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false)); TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/"));
TRY_RESULT(boc, vm::std_boc_serialize(code_cell));
td::JsonBuilder result_json; td::JsonBuilder result_json;
auto result_obj = result_json.enter_object(); auto obj = result_json.enter_object();
result_obj("status", "ok"); obj("status", "ok");
result_obj("codeBoc", td::base64_encode(boc)); obj("fiftCode", std::move(fift_res.fiftCode));
result_obj("fiftCode", outs.str()); obj("codeBoc", std::move(fift_res.codeBoc64));
result_obj("codeHashHex", code_cell->get_hash().to_hex()); obj("codeHashHex", std::move(fift_res.codeHashHex));
result_obj.leave(); obj.leave();
outs.clear();
errs.clear();
return result_json.string_builder().as_cslice().str(); return result_json.string_builder().as_cslice().str();
} }

View file

@ -572,9 +572,9 @@ int atanh_f261(int x, int n) inline_ref {
s -= 1; s -= 1;
} }
x += t; x += t;
int 2x = 2 * x; int `2x` = 2 * x;
int y = lshift256divr(2x, (x >> 1) - t); int y = lshift256divr(`2x`, (x >> 1) - t);
;; y = 2x - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions ;; y = `2x` - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions
return (atanh_f258(y, 36), s); return (atanh_f258(y, 36), s);
} }

View file

@ -1,10 +1,9 @@
cmake_minimum_required(VERSION 3.5 FATAL_ERROR) cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
set(TOLK_SOURCE set(TOLK_SOURCE
srcread.cpp src-file.cpp
lexer.cpp lexer.cpp
symtable.cpp symtable.cpp
keywords.cpp
unify-types.cpp unify-types.cpp
parse-tolk.cpp parse-tolk.cpp
abscode.cpp abscode.cpp

View file

@ -24,29 +24,19 @@ namespace tolk {
* *
*/ */
TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, const SrcLocation* loc) TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc)
: v_type(_type), idx(_idx), cls(_cls), coord(0) { : v_type(_type), idx(_idx), cls(_cls), coord(0), where(loc) {
if (sym) { if (sym) {
name = sym->sym_idx; name = sym->sym_idx;
sym->value->idx = _idx; sym->value->idx = _idx;
} }
if (loc) {
where = std::make_unique<SrcLocation>(*loc);
}
if (!_type) { if (!_type) {
v_type = TypeExpr::new_hole(); v_type = TypeExpr::new_hole();
} }
if (cls == _Named) {
undefined = true;
}
} }
void TmpVar::set_location(const SrcLocation& loc) { void TmpVar::set_location(SrcLocation loc) {
if (where) { where = loc;
*where = loc;
} else {
where = std::make_unique<SrcLocation>(loc);
}
} }
void TmpVar::dump(std::ostream& os) const { void TmpVar::dump(std::ostream& os) const {
@ -469,10 +459,10 @@ void CodeBlob::print(std::ostream& os, int flags) const {
if ((flags & 8) != 0) { if ((flags & 8) != 0) {
for (const auto& var : vars) { for (const auto& var : vars) {
var.dump(os); var.dump(os);
if (var.where && (flags & 1) != 0) { if (var.where.is_defined() && (flags & 1) != 0) {
var.where->show(os); var.where.show(os);
os << " defined here:\n"; os << " defined here:\n";
var.where->show_context(os); var.where.show_context(os);
} }
} }
} }
@ -483,7 +473,7 @@ void CodeBlob::print(std::ostream& os, int flags) const {
os << "-------- END ---------\n\n"; os << "-------- END ---------\n\n";
} }
var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, const SrcLocation* location) { var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation location) {
vars.emplace_back(var_cnt, cls, var_type, sym, location); vars.emplace_back(var_cnt, cls, var_type, sym, location);
if (sym) { if (sym) {
sym->value->idx = var_cnt; sym->value->idx = var_cnt;
@ -501,7 +491,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) {
SymDef* arg_sym; SymDef* arg_sym;
SrcLocation arg_loc; SrcLocation arg_loc;
std::tie(arg_type, arg_sym, arg_loc) = par; std::tie(arg_type, arg_sym, arg_loc) = par;
list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, &arg_loc)); list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, arg_loc));
} }
emplace_back(loc, Op::_Import, list); emplace_back(loc, Op::_Import, list);
in_var_cnt = var_cnt; in_var_cnt = var_cnt;

View file

@ -36,7 +36,7 @@ int CodeBlob::split_vars(bool strict) {
for (int j = 0; j < var_cnt; j++) { for (int j = 0; j < var_cnt; j++) {
TmpVar& var = vars[j]; TmpVar& var = vars[j];
if (strict && var.v_type->minw != var.v_type->maxw) { if (strict && var.v_type->minw != var.v_type->maxw) {
throw ParseError{var.where.get(), "variable does not have fixed width, cannot manipulate it"}; throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"};
} }
std::vector<TypeExpr*> comp_types; std::vector<TypeExpr*> comp_types;
int k = var.v_type->extract_components(comp_types); int k = var.v_type->extract_components(comp_types);
@ -45,7 +45,7 @@ int CodeBlob::split_vars(bool strict) {
if (k != 1) { if (k != 1) {
var.coord = ~((n << 8) + k); var.coord = ~((n << 8) + k);
for (int i = 0; i < k; i++) { for (int i = 0; i < k; i++) {
auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where.get()); auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where);
tolk_assert(v == n + i); tolk_assert(v == n + i);
tolk_assert(vars[v].idx == v); tolk_assert(vars[v].idx == v);
vars[v].name = vars[j].name; vars[v].name = vars[j].name;
@ -54,7 +54,7 @@ int CodeBlob::split_vars(bool strict) {
n += k; n += k;
++changes; ++changes;
} else if (strict && var.v_type->minw != 1) { } else if (strict && var.v_type->minw != 1) {
throw ParseError{var.where.get(), throw ParseError{var.where,
"cannot work with variable or variable component of width greater than one"}; "cannot work with variable or variable component of width greater than one"};
} }
} }

View file

@ -95,7 +95,7 @@ SymDef* define_builtin_const(std::string name, TypeExpr* const_type, Args&&... a
} }
bool SymValAsmFunc::compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in, bool SymValAsmFunc::compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in,
const SrcLocation& where) const { SrcLocation where) const {
if (simple_compile) { if (simple_compile) {
return dest.append(simple_compile(out, in, where)); return dest.append(simple_compile(out, in, where));
} else if (ext_compile) { } else if (ext_compile) {
@ -186,7 +186,7 @@ int emulate_mul(int a, int b) {
return r; return r;
} }
int emulate_and(int a, int b) { int emulate_bitwise_and(int a, int b) {
int both = a & b, any = a | b; int both = a & b, any = a | b;
int r = VarDescr::_Int; int r = VarDescr::_Int;
if (any & VarDescr::_Nan) { if (any & VarDescr::_Nan) {
@ -204,7 +204,7 @@ int emulate_and(int a, int b) {
return r; return r;
} }
int emulate_or(int a, int b) { int emulate_bitwise_or(int a, int b) {
if (b & VarDescr::_Zero) { if (b & VarDescr::_Zero) {
return a; return a;
} else if (a & VarDescr::_Zero) { } else if (a & VarDescr::_Zero) {
@ -222,7 +222,7 @@ int emulate_or(int a, int b) {
return r; return r;
} }
int emulate_xor(int a, int b) { int emulate_bitwise_xor(int a, int b) {
if (b & VarDescr::_Zero) { if (b & VarDescr::_Zero) {
return a; return a;
} else if (a & VarDescr::_Zero) { } else if (a & VarDescr::_Zero) {
@ -241,7 +241,7 @@ int emulate_xor(int a, int b) {
return r; return r;
} }
int emulate_not(int a) { int emulate_bitwise_not(int a) {
if ((a & VarDescr::ConstZero) == VarDescr::ConstZero) { if ((a & VarDescr::ConstZero) == VarDescr::ConstZero) {
return VarDescr::ConstTrue; return VarDescr::ConstTrue;
} }
@ -436,7 +436,7 @@ AsmOp push_const(td::RefInt256 x) {
return AsmOp::IntConst(std::move(x)); return AsmOp::IntConst(std::move(x));
} }
AsmOp compile_add(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) { AsmOp compile_add(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1]; VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) { if (x.is_int_const() && y.is_int_const()) {
@ -478,7 +478,7 @@ AsmOp compile_add(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
return exec_op("ADD", 2); return exec_op("ADD", 2);
} }
AsmOp compile_sub(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) { AsmOp compile_sub(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1]; VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) { if (x.is_int_const() && y.is_int_const()) {
@ -511,7 +511,7 @@ AsmOp compile_sub(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
return exec_op("SUB", 2); return exec_op("SUB", 2);
} }
AsmOp compile_negate(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) { AsmOp compile_unary_minus(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 1); tolk_assert(res.size() == 1 && args.size() == 1);
VarDescr &r = res[0], &x = args[0]; VarDescr &r = res[0], &x = args[0];
if (x.is_int_const()) { if (x.is_int_const()) {
@ -526,7 +526,19 @@ AsmOp compile_negate(std::vector<VarDescr>& res, std::vector<VarDescr>& args, co
return exec_op("NEGATE", 1); return exec_op("NEGATE", 1);
} }
AsmOp compile_and(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) { AsmOp compile_unary_plus(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 1);
VarDescr &r = res[0], &x = args[0];
if (x.is_int_const()) {
r.set_const(x.int_const);
x.unused();
return push_const(r.int_const);
}
r.val = x.val;
return AsmOp::Nop();
}
AsmOp compile_bitwise_and(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1]; VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) { if (x.is_int_const() && y.is_int_const()) {
@ -535,11 +547,11 @@ AsmOp compile_and(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
y.unused(); y.unused();
return push_const(r.int_const); return push_const(r.int_const);
} }
r.val = emulate_and(x.val, y.val); r.val = emulate_bitwise_and(x.val, y.val);
return exec_op("AND", 2); return exec_op("AND", 2);
} }
AsmOp compile_or(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) { AsmOp compile_bitwise_or(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1]; VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) { if (x.is_int_const() && y.is_int_const()) {
@ -548,11 +560,11 @@ AsmOp compile_or(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
y.unused(); y.unused();
return push_const(r.int_const); return push_const(r.int_const);
} }
r.val = emulate_or(x.val, y.val); r.val = emulate_bitwise_or(x.val, y.val);
return exec_op("OR", 2); return exec_op("OR", 2);
} }
AsmOp compile_xor(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) { AsmOp compile_bitwise_xor(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1]; VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) { if (x.is_int_const() && y.is_int_const()) {
@ -561,11 +573,11 @@ AsmOp compile_xor(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
y.unused(); y.unused();
return push_const(r.int_const); return push_const(r.int_const);
} }
r.val = emulate_xor(x.val, y.val); r.val = emulate_bitwise_xor(x.val, y.val);
return exec_op("XOR", 2); return exec_op("XOR", 2);
} }
AsmOp compile_not(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) { AsmOp compile_bitwise_not(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 1); tolk_assert(res.size() == 1 && args.size() == 1);
VarDescr &r = res[0], &x = args[0]; VarDescr &r = res[0], &x = args[0];
if (x.is_int_const()) { if (x.is_int_const()) {
@ -573,11 +585,11 @@ AsmOp compile_not(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
x.unused(); x.unused();
return push_const(r.int_const); return push_const(r.int_const);
} }
r.val = emulate_not(x.val); r.val = emulate_bitwise_not(x.val);
return exec_op("NOT", 1); return exec_op("NOT", 1);
} }
AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where) { AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where) {
if (x.is_int_const() && y.is_int_const()) { if (x.is_int_const() && y.is_int_const()) {
r.set_const(x.int_const * y.int_const); r.set_const(x.int_const * y.int_const);
if (!r.int_const->is_valid()) { if (!r.int_const->is_valid()) {
@ -645,12 +657,12 @@ AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocat
return exec_op("MUL", 2); return exec_op("MUL", 2);
} }
AsmOp compile_mul(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) { AsmOp compile_mul(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
return compile_mul_internal(res[0], args[0], args[1], where); return compile_mul_internal(res[0], args[0], args[1], where);
} }
AsmOp compile_lshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) { AsmOp compile_lshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1]; VarDescr &r = res[0], &x = args[0], &y = args[1];
if (y.is_int_const()) { if (y.is_int_const()) {
@ -692,7 +704,7 @@ AsmOp compile_lshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, co
return exec_op("LSHIFT", 2); return exec_op("LSHIFT", 2);
} }
AsmOp compile_rshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where, AsmOp compile_rshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where,
int round_mode) { int round_mode) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1]; VarDescr &r = res[0], &x = args[0], &y = args[1];
@ -722,7 +734,7 @@ AsmOp compile_rshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, co
return exec_op(rshift, 2); return exec_op(rshift, 2);
} }
AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where, int round_mode) { AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where, int round_mode) {
if (x.is_int_const() && y.is_int_const()) { if (x.is_int_const() && y.is_int_const()) {
r.set_const(div(x.int_const, y.int_const, round_mode)); r.set_const(div(x.int_const, y.int_const, round_mode));
if (!r.int_const->is_valid()) { if (!r.int_const->is_valid()) {
@ -762,12 +774,12 @@ AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocat
return exec_op(op, 2); return exec_op(op, 2);
} }
AsmOp compile_div(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where, int round_mode) { AsmOp compile_div(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where, int round_mode) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
return compile_div_internal(res[0], args[0], args[1], where, round_mode); return compile_div_internal(res[0], args[0], args[1], where, round_mode);
} }
AsmOp compile_mod(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where, AsmOp compile_mod(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where,
int round_mode) { int round_mode) {
tolk_assert(res.size() == 1 && args.size() == 2); tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1]; VarDescr &r = res[0], &x = args[0], &y = args[1];
@ -808,7 +820,7 @@ AsmOp compile_mod(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
return exec_op(op, 2); return exec_op(op, 2);
} }
AsmOp compile_muldiv(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where, AsmOp compile_muldiv(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where,
int round_mode) { int round_mode) {
tolk_assert(res.size() == 1 && args.size() == 3); tolk_assert(res.size() == 1 && args.size() == 3);
VarDescr &r = res[0], &x = args[0], &y = args[1], &z = args[2]; VarDescr &r = res[0], &x = args[0], &y = args[1], &z = args[2];
@ -978,7 +990,7 @@ AsmOp compile_cmp_int(std::vector<VarDescr>& res, std::vector<VarDescr>& args, i
return exec_op(cmp_names[mode], 2); return exec_op(cmp_names[mode], 2);
} }
AsmOp compile_throw(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) { AsmOp compile_throw(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(res.empty() && args.size() == 1); tolk_assert(res.empty() && args.size() == 1);
VarDescr& x = args[0]; VarDescr& x = args[0];
if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) {
@ -1010,7 +1022,7 @@ AsmOp compile_cond_throw(std::vector<VarDescr>& res, std::vector<VarDescr>& args
} }
} }
AsmOp compile_throw_arg(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) { AsmOp compile_throw_arg(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(res.empty() && args.size() == 2); tolk_assert(res.empty() && args.size() == 2);
VarDescr &x = args[1]; VarDescr &x = args[1];
if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) {
@ -1101,7 +1113,7 @@ AsmOp compile_fetch_slice(std::vector<VarDescr>& res, std::vector<VarDescr>& arg
} }
// <type> <type>_at(tuple t, int index) asm "INDEXVAR"; // <type> <type>_at(tuple t, int index) asm "INDEXVAR";
AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) { AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(args.size() == 2 && res.size() == 1); tolk_assert(args.size() == 2 && res.size() == 1);
auto& y = args[1]; auto& y = args[1];
if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) { if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) {
@ -1112,7 +1124,7 @@ AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args,
} }
// int null?(X arg) // int null?(X arg)
AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) { AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(args.size() == 1 && res.size() == 1); tolk_assert(args.size() == 1 && res.size() == 1);
auto &x = args[0], &r = res[0]; auto &x = args[0], &r = res[0];
if (x.always_null() || x.always_not_null()) { if (x.always_null() || x.always_not_null()) {
@ -1128,12 +1140,12 @@ AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, c
void define_builtins() { void define_builtins() {
using namespace std::placeholders; using namespace std::placeholders;
auto Unit = TypeExpr::new_unit(); auto Unit = TypeExpr::new_unit();
auto Int = TypeExpr::new_atomic(_Int); auto Int = TypeExpr::new_atomic(TypeExpr::_Int);
auto Cell = TypeExpr::new_atomic(_Cell); auto Cell = TypeExpr::new_atomic(TypeExpr::_Cell);
auto Slice = TypeExpr::new_atomic(_Slice); auto Slice = TypeExpr::new_atomic(TypeExpr::_Slice);
auto Builder = TypeExpr::new_atomic(_Builder); auto Builder = TypeExpr::new_atomic(TypeExpr::_Builder);
// auto Null = TypeExpr::new_atomic(_Null); // auto Null = TypeExpr::new_atomic(TypeExpr::_Null);
auto Tuple = TypeExpr::new_atomic(_Tuple); auto Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple);
auto Int2 = TypeExpr::new_tensor({Int, Int}); auto Int2 = TypeExpr::new_tensor({Int, Int});
auto Int3 = TypeExpr::new_tensor({Int, Int, Int}); auto Int3 = TypeExpr::new_tensor({Int, Int, Int});
auto TupleInt = TypeExpr::new_tensor({Tuple, Int}); auto TupleInt = TypeExpr::new_tensor({Tuple, Int});
@ -1156,9 +1168,16 @@ void define_builtins() {
//auto arith_null_op = TypeExpr::new_map(TypeExpr::new_unit(), Int); //auto arith_null_op = TypeExpr::new_map(TypeExpr::new_unit(), Int);
auto throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); auto throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit));
auto cond_throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int, Int}), Unit)); auto cond_throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int, Int}), Unit));
// prevent unused vars warnings (there vars are created to acquire initial id of TypeExpr::value)
static_cast<void>(Z);
static_cast<void>(XY);
static_cast<void>(Cell);
define_builtin_func("_+_", arith_bin_op, compile_add); define_builtin_func("_+_", arith_bin_op, compile_add);
define_builtin_func("_-_", arith_bin_op, compile_sub); define_builtin_func("_-_", arith_bin_op, compile_sub);
define_builtin_func("-_", arith_un_op, compile_negate); define_builtin_func("-_", arith_un_op, compile_unary_minus);
define_builtin_func("+_", arith_un_op, compile_unary_plus);
define_builtin_func("_*_", arith_bin_op, compile_mul); define_builtin_func("_*_", arith_bin_op, compile_mul);
define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1)); define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1));
define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0));
@ -1175,10 +1194,10 @@ void define_builtins() {
define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1));
define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0));
define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1));
define_builtin_func("_&_", arith_bin_op, compile_and); define_builtin_func("_&_", arith_bin_op, compile_bitwise_and);
define_builtin_func("_|_", arith_bin_op, compile_or); define_builtin_func("_|_", arith_bin_op, compile_bitwise_or);
define_builtin_func("_^_", arith_bin_op, compile_xor); define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor);
define_builtin_func("~_", arith_un_op, compile_not); define_builtin_func("~_", arith_un_op, compile_bitwise_not);
define_builtin_func("^_+=_", arith_bin_op, compile_add); define_builtin_func("^_+=_", arith_bin_op, compile_add);
define_builtin_func("^_-=_", arith_bin_op, compile_sub); define_builtin_func("^_-=_", arith_bin_op, compile_sub);
define_builtin_func("^_*=_", arith_bin_op, compile_mul); define_builtin_func("^_*=_", arith_bin_op, compile_mul);
@ -1192,9 +1211,9 @@ void define_builtins() {
define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1));
define_builtin_func("^_~>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); define_builtin_func("^_~>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0));
define_builtin_func("^_^>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); define_builtin_func("^_^>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1));
define_builtin_func("^_&=_", arith_bin_op, compile_and); define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and);
define_builtin_func("^_|=_", arith_bin_op, compile_or); define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or);
define_builtin_func("^_^=_", arith_bin_op, compile_xor); define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor);
define_builtin_func("muldiv", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1)); define_builtin_func("muldiv", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1));
define_builtin_func("muldivr", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0)); define_builtin_func("muldivr", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0));
define_builtin_func("muldivc", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1)); define_builtin_func("muldivc", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1));

View file

@ -41,25 +41,19 @@ Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list<Expr*> _arglist)
} }
} }
void Expr::chk_rvalue(const Lexem& lem) const { void Expr::chk_rvalue(const Lexer& lex) const {
if (!is_rvalue()) { if (!is_rvalue()) {
lem.error_at("rvalue expected before `", "`"); lex.error_at("rvalue expected before `", "`");
} }
} }
void Expr::chk_lvalue(const Lexem& lem) const { void Expr::chk_lvalue(const Lexer& lex) const {
if (!is_lvalue()) { if (!is_lvalue()) {
lem.error_at("lvalue expected before `", "`"); lex.error_at("lvalue expected before `", "`");
} }
} }
void Expr::chk_type(const Lexem& lem) const { bool Expr::deduce_type(const Lexer& lex) {
if (!is_type()) {
lem.error_at("type expression expected before `", "`");
}
}
bool Expr::deduce_type(const Lexem& lem) {
if (e_type) { if (e_type) {
return true; return true;
} }
@ -83,7 +77,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os; std::ostringstream os;
os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type "
<< fun_type->args[0] << ": " << ue; << fun_type->args[0] << ": " << ue;
lem.error(os.str()); lex.error(os.str());
} }
e_type = fun_type->args[1]; e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type); TypeExpr::remove_indirect(e_type);
@ -98,7 +92,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os; std::ostringstream os;
os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type
<< ": " << ue; << ": " << ue;
lem.error(os.str()); lex.error(os.str());
} }
e_type = fun_type->args[1]; e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type); TypeExpr::remove_indirect(e_type);
@ -113,7 +107,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os; std::ostringstream os;
os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type "
<< args[0]->e_type << ": " << ue; << args[0]->e_type << ": " << ue;
lem.error(os.str()); lex.error(os.str());
} }
e_type = args[0]->e_type; e_type = args[0]->e_type;
TypeExpr::remove_indirect(e_type); TypeExpr::remove_indirect(e_type);
@ -130,7 +124,7 @@ bool Expr::deduce_type(const Lexem& lem) {
os << "cannot implicitly assign an expression of type " << args[1]->e_type os << "cannot implicitly assign an expression of type " << args[1]->e_type
<< " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val) << " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val)
<< "` : " << ue; << "` : " << ue;
lem.error(os.str()); lex.error(os.str());
} }
e_type = rhs_type->args[1]; e_type = rhs_type->args[1];
TypeExpr::remove_indirect(e_type); TypeExpr::remove_indirect(e_type);
@ -139,13 +133,13 @@ bool Expr::deduce_type(const Lexem& lem) {
} }
case _CondExpr: { case _CondExpr: {
tolk_assert(args.size() == 3); tolk_assert(args.size() == 3);
auto flag_type = TypeExpr::new_atomic(_Int); auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int);
try { try {
unify(args[0]->e_type, flag_type); unify(args[0]->e_type, flag_type);
} catch (UnifyError& ue) { } catch (UnifyError& ue) {
std::ostringstream os; std::ostringstream os;
os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue;
lem.error(os.str()); lex.error(os.str());
} }
try { try {
unify(args[1]->e_type, args[2]->e_type); unify(args[1]->e_type, args[2]->e_type);
@ -153,7 +147,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os; std::ostringstream os;
os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and "
<< args[2]->e_type << " : " << ue; << args[2]->e_type << " : " << ue;
lem.error(os.str()); lex.error(os.str());
} }
e_type = args[1]->e_type; e_type = args[1]->e_type;
TypeExpr::remove_indirect(e_type); TypeExpr::remove_indirect(e_type);
@ -176,13 +170,13 @@ int Expr::define_new_vars(CodeBlob& code) {
} }
case _Var: case _Var:
if (val < 0) { if (val < 0) {
val = code.create_var(TmpVar::_Named, e_type, sym, &here); val = code.create_var(TmpVar::_Named, e_type, sym, here);
return 1; return 1;
} }
break; break;
case _Hole: case _Hole:
if (val < 0) { if (val < 0) {
val = code.create_var(TmpVar::_Tmp, e_type, nullptr, &here); val = code.create_var(TmpVar::_Tmp, e_type, nullptr, here);
} }
break; break;
} }
@ -202,7 +196,7 @@ int Expr::predefine_vars() {
} }
case _Var: case _Var:
if (!sym) { if (!sym) {
tolk_assert(val < 0 && here.defined()); tolk_assert(val < 0 && here.is_defined());
if (prohibited_var_names.count(symbols.get_name(~val))) { if (prohibited_var_names.count(symbols.get_name(~val))) {
throw ParseError{ throw ParseError{
here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"}; here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"};
@ -212,7 +206,7 @@ int Expr::predefine_vars() {
if (!sym) { if (!sym) {
throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"}; throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"};
} }
sym->value = new SymVal{SymVal::_Var, -1, e_type}; sym->value = new SymVal{SymValKind::_Var, -1, e_type};
return 1; return 1;
} }
break; break;
@ -221,17 +215,17 @@ int Expr::predefine_vars() {
} }
var_idx_t Expr::new_tmp(CodeBlob& code) const { var_idx_t Expr::new_tmp(CodeBlob& code) const {
return code.create_tmp_var(e_type, &here); return code.create_tmp_var(e_type, here);
} }
void add_set_globs(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>& globs, const SrcLocation& here) { void add_set_globs(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>& globs, SrcLocation here) {
for (const auto& p : globs) { for (const auto& p : globs) {
auto& op = code.emplace_back(here, Op::_SetGlob, std::vector<var_idx_t>{}, std::vector<var_idx_t>{ p.second }, p.first); auto& op = code.emplace_back(here, Op::_SetGlob, std::vector<var_idx_t>{}, std::vector<var_idx_t>{ p.second }, p.first);
op.set_impure(code); op.set_impure(code);
} }
} }
std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) { std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) {
while (lhs->is_type_apply()) { while (lhs->is_type_apply()) {
lhs = lhs->args.at(0); lhs = lhs->args.at(0);
} }
@ -245,7 +239,7 @@ std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, con
auto right = rhs->pre_compile(code); auto right = rhs->pre_compile(code);
TypeExpr::remove_indirect(rhs->e_type); TypeExpr::remove_indirect(rhs->e_type);
auto unpacked_type = rhs->e_type->args.at(0); auto unpacked_type = rhs->e_type->args.at(0);
std::vector<var_idx_t> tmp{code.create_tmp_var(unpacked_type, &rhs->here)}; std::vector<var_idx_t> tmp{code.create_tmp_var(unpacked_type, rhs->here)};
code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right));
auto tvar = new Expr{Expr::_Var}; auto tvar = new Expr{Expr::_Var};
tvar->set_val(tmp[0]); tvar->set_val(tmp[0]);
@ -286,14 +280,14 @@ std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *>& args, CodeB
for (size_t j = 0; j < res_lists[i].size(); ++j) { for (size_t j = 0; j < res_lists[i].size(); ++j) {
TmpVar& var = code.vars.at(res_lists[i][j]); TmpVar& var = code.vars.at(res_lists[i][j]);
if (!lval_globs && (var.cls & TmpVar::_Named)) { if (!lval_globs && (var.cls & TmpVar::_Named)) {
var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](const SrcLocation &here) mutable { var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable {
if (!done) { if (!done) {
done = true; done = true;
modified_vars.push_back({i, j, cur_ops}); modified_vars.push_back({i, j, cur_ops});
} }
}); });
} else { } else {
var.on_modification.push_back([](const SrcLocation &) { var.on_modification.push_back([](SrcLocation) {
}); });
} }
} }
@ -307,8 +301,8 @@ std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *>& args, CodeB
for (size_t idx = modified_vars.size(); idx--; ) { for (size_t idx = modified_vars.size(); idx--; ) {
const ModifiedVar &m = modified_vars[idx]; const ModifiedVar &m = modified_vars[idx];
var_idx_t orig_v = res_lists[m.i][m.j]; var_idx_t orig_v = res_lists[m.i][m.j];
var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where.get()); var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where);
std::unique_ptr<Op> op = std::make_unique<Op>(*code.vars[orig_v].where, Op::_Let); std::unique_ptr<Op> op = std::make_unique<Op>(code.vars[orig_v].where, Op::_Let);
op->left = {tmp_v}; op->left = {tmp_v};
op->right = {orig_v}; op->right = {orig_v};
op->next = std::move((*m.cur_ops)); op->next = std::move((*m.cur_ops));

View file

@ -1,129 +0,0 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
namespace tolk {
/*
*
* KEYWORD DEFINITION
*
*/
void define_keywords() {
symbols.add_kw_char('+')
.add_kw_char('-')
.add_kw_char('*')
.add_kw_char('/')
.add_kw_char('%')
.add_kw_char('?')
.add_kw_char(':')
.add_kw_char(',')
.add_kw_char(';')
.add_kw_char('(')
.add_kw_char(')')
.add_kw_char('[')
.add_kw_char(']')
.add_kw_char('{')
.add_kw_char('}')
.add_kw_char('=')
.add_kw_char('_')
.add_kw_char('<')
.add_kw_char('>')
.add_kw_char('&')
.add_kw_char('|')
.add_kw_char('^')
.add_kw_char('~');
symbols.add_keyword("==", Keyword::_Eq)
.add_keyword("!=", Keyword::_Neq)
.add_keyword("<=", Keyword::_Leq)
.add_keyword(">=", Keyword::_Geq)
.add_keyword("<=>", Keyword::_Spaceship)
.add_keyword("<<", Keyword::_Lshift)
.add_keyword(">>", Keyword::_Rshift)
.add_keyword("~>>", Keyword::_RshiftR)
.add_keyword("^>>", Keyword::_RshiftC)
.add_keyword("~/", Keyword::_DivR)
.add_keyword("^/", Keyword::_DivC)
.add_keyword("~%", Keyword::_ModR)
.add_keyword("^%", Keyword::_ModC)
.add_keyword("/%", Keyword::_DivMod)
.add_keyword("+=", Keyword::_PlusLet)
.add_keyword("-=", Keyword::_MinusLet)
.add_keyword("*=", Keyword::_TimesLet)
.add_keyword("/=", Keyword::_DivLet)
.add_keyword("~/=", Keyword::_DivRLet)
.add_keyword("^/=", Keyword::_DivCLet)
.add_keyword("%=", Keyword::_ModLet)
.add_keyword("~%=", Keyword::_ModRLet)
.add_keyword("^%=", Keyword::_ModCLet)
.add_keyword("<<=", Keyword::_LshiftLet)
.add_keyword(">>=", Keyword::_RshiftLet)
.add_keyword("~>>=", Keyword::_RshiftRLet)
.add_keyword("^>>=", Keyword::_RshiftCLet)
.add_keyword("&=", Keyword::_AndLet)
.add_keyword("|=", Keyword::_OrLet)
.add_keyword("^=", Keyword::_XorLet);
symbols.add_keyword("return", Keyword::_Return)
.add_keyword("var", Keyword::_Var)
.add_keyword("repeat", Keyword::_Repeat)
.add_keyword("do", Keyword::_Do)
.add_keyword("while", Keyword::_While)
.add_keyword("until", Keyword::_Until)
.add_keyword("try", Keyword::_Try)
.add_keyword("catch", Keyword::_Catch)
.add_keyword("if", Keyword::_If)
.add_keyword("ifnot", Keyword::_Ifnot)
.add_keyword("then", Keyword::_Then)
.add_keyword("else", Keyword::_Else)
.add_keyword("elseif", Keyword::_Elseif)
.add_keyword("elseifnot", Keyword::_Elseifnot);
symbols.add_keyword("int", Keyword::_Int)
.add_keyword("cell", Keyword::_Cell)
.add_keyword("slice", Keyword::_Slice)
.add_keyword("builder", Keyword::_Builder)
.add_keyword("cont", Keyword::_Cont)
.add_keyword("tuple", Keyword::_Tuple)
.add_keyword("type", Keyword::_Type)
.add_keyword("->", Keyword::_Mapsto)
.add_keyword("forall", Keyword::_Forall);
symbols.add_keyword("extern", Keyword::_Extern)
.add_keyword("global", Keyword::_Global)
.add_keyword("asm", Keyword::_Asm)
.add_keyword("impure", Keyword::_Impure)
.add_keyword("pure", Keyword::_Pure)
.add_keyword("inline", Keyword::_Inline)
.add_keyword("inline_ref", Keyword::_InlineRef)
.add_keyword("builtin", Keyword::_Builtin)
.add_keyword("auto_apply", Keyword::_AutoApply)
.add_keyword("method_id", Keyword::_MethodId)
.add_keyword("get", Keyword::_Get)
.add_keyword("operator", Keyword::_Operator)
.add_keyword("infix", Keyword::_Infix)
.add_keyword("infixl", Keyword::_Infixl)
.add_keyword("infixr", Keyword::_Infixr)
.add_keyword("const", Keyword::_Const);
symbols.add_keyword("#pragma", Keyword::_PragmaHashtag)
.add_keyword("#include", Keyword::_IncludeHashtag);
}
} // namespace tolk

View file

@ -16,335 +16,632 @@
*/ */
#include "lexer.h" #include "lexer.h"
#include "symtable.h" #include "symtable.h"
#include <sstream>
#include <cassert> #include <cassert>
namespace tolk { namespace tolk {
/* // By 'chunk' in lexer I mean a token or a list of tokens parsed simultaneously.
* // E.g., when we meet "str", ChunkString is called, it emits tok_string.
* LEXER // E.g., when we meet "str"x, ChunkString emits not only tok_string, but tok_string_modifier.
* // E.g., when we meet //, ChunkInlineComment is called, it emits nothing (just skips a line).
*/ // We store all valid chunks lexers in a prefix tree (LexingTrie), see below.
struct ChunkLexerBase {
ChunkLexerBase(const ChunkLexerBase&) = delete;
ChunkLexerBase &operator=(const ChunkLexerBase&) = delete;
ChunkLexerBase() = default;
std::string Lexem::lexem_name_str(int idx) { virtual bool parse(Lexer* lex) const = 0;
if (idx == Eof) { virtual ~ChunkLexerBase() = default;
return "end of file"; };
} else if (idx == Ident) {
return "identifier"; template <class T>
} else if (idx == Number) { static T* singleton() {
return "number"; static T obj;
} else if (idx == String) { return &obj;
return "string";
} else if (idx == Special) {
return "special";
} else if (symbols.get_keyword(idx)) {
return "`" + symbols.get_keyword(idx)->str + "`";
} else {
std::ostringstream os{"<unknown lexem of type "};
os << idx << ">";
return os.str();
}
} }
std::string Lexem::name_str() const { // LexingTrie is a prefix tree storing all available Tolk language constructs.
if (tp == Ident) { // It's effectively a map of a prefix to ChunkLexerBase.
return std::string{"identifier `"} + symbols.get_name(val) + "`"; class LexingTrie {
} else if (tp == String) { LexingTrie** next{nullptr}; // either nullptr or [256]
return std::string{"string \""} + str + '"'; ChunkLexerBase* val{nullptr}; // non-null for leafs
} else {
return lexem_name_str(tp);
}
}
bool is_number(std::string str) { GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_next_allocated() {
auto st = str.begin(), en = str.end(); if (next == nullptr) {
if (st == en) { next = new LexingTrie*[256];
return false; std::memset(next, 0, 256 * sizeof(LexingTrie*));
}
if (*st == '-') {
st++;
}
bool hex = false;
if (st + 1 < en && *st == '0' && st[1] == 'x') {
st += 2;
hex = true;
}
if (st == en) {
return false;
}
while (st < en) {
int c = *st;
if (c >= '0' && c <= '9') {
++st;
continue;
} }
if (!hex) { }
return false;
GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_symbol_allocated(uint8_t symbol) const {
if (next[symbol] == nullptr) {
next[symbol] = new LexingTrie;
} }
c |= 0x20; }
if (c < 'a' || c > 'f') {
return false; public:
// Maps a prefix onto a chunk lexer.
// E.g. " -> ChunkString
// E.g. """ -> ChunkMultilineString
void add_prefix(const char* s, ChunkLexerBase* val) {
LexingTrie* cur = this;
for (; *s; ++s) {
uint8_t symbol = static_cast<uint8_t>(*s);
cur->ensure_next_allocated();
cur->ensure_symbol_allocated(symbol);
cur = cur->next[symbol];
} }
++st;
}
return true;
}
int Lexem::classify() { #ifdef TOLK_DEBUG
if (tp != Unknown) { assert(!cur->val);
return tp; #endif
cur->val = val;
} }
sym_idx_t i = symbols.lookup(str);
if (i) {
assert(str == symbols[i]->str);
str = symbols[i]->str;
sym_idx_t idx = symbols[i]->idx;
tp = (idx < 0 ? -idx : Ident);
val = i;
} else if (is_number(str)) {
tp = Number;
} else {
tp = 0;
}
if (tp == Unknown) {
tp = Ident;
val = symbols.lookup(str, 1);
}
return tp;
}
int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) { // Maps a pattern onto a chunk lexer.
str = _str; // E.g. -[0-9] -> ChunkNegativeNumber
loc = _loc; // Internally, it expands the pattern to all possible prefixes: -0, -1, etc.
tp = _tp; // (for example, [0-9][a-z_$] gives 10*28=280 prefixes)
val = _val; void add_pattern(const char* pattern, ChunkLexerBase* val) {
return classify(); std::vector<LexingTrie*> all_possible_trie{this};
}
Lexer::Lexer(SourceReader& _src, std::string active_chars, std::string quote_chars, std::string multiline_quote) for (const char* c = pattern; *c; ++c) {
: src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined), std::string to_append;
multiline_quote(std::move(multiline_quote)) { if (*c == '[') {
std::memset(char_class, 0, sizeof(char_class)); c++;
unsigned char activity = cc::active; while (*c != ']') { // assume that input is corrent, no out-of-string checks
for (char c : active_chars) { if (*(c + 1) == '-') {
if (c == ' ') { char l = *c, r = *(c + 2);
if (!--activity) { for (char symbol = l; symbol <= r; ++symbol) {
activity = cc::allow_repeat; to_append += symbol;
}
c += 3;
} else {
to_append += *c;
c++;
}
}
} else {
to_append += *c;
} }
} else if ((unsigned)c < 0x80) {
char_class[(unsigned)c] |= activity; std::vector<LexingTrie*> next_all_possible_trie;
next_all_possible_trie.reserve(all_possible_trie.size() * to_append.size());
for (LexingTrie* cur : all_possible_trie) {
cur->ensure_next_allocated();
for (uint8_t symbol : to_append) {
cur->ensure_symbol_allocated(symbol);
next_all_possible_trie.emplace_back(cur->next[symbol]);
}
}
all_possible_trie = std::move(next_all_possible_trie);
}
for (LexingTrie* trie : all_possible_trie) {
trie->val = val;
} }
} }
for (int c : quote_chars) {
if (c > ' ' && c <= 0x7f) {
char_class[(unsigned)c] |= cc::quote_char;
}
}
}
void Lexer::set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts) { // Looks up a chunk lexer given a string (in practice, s points to cur position in the middle of the file).
set_spec(eol_cmt, eol_cmts); // It returns the deepest case: pointing to ", it will return ChunkMultilineString if """, or ChunkString otherwize.
set_spec(cmt_op, open_cmts); ChunkLexerBase* get_deepest(const char* s) const {
set_spec(cmt_cl, close_cmts); const LexingTrie* best = this;
}
void Lexer::set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2) { for (const LexingTrie* cur = this; cur && cur->next; ++s) {
set_spec(eol_cmt2, eol_cmts2); cur = cur->next[static_cast<uint8_t>(*s)]; // if s reaches \0, cur will just become nullptr, and loop will end
set_spec(cmt_op2, open_cmts2); if (cur && cur->val) {
set_spec(cmt_cl2, close_cmts2); best = cur;
}
void Lexer::start_parsing() {
next();
}
void Lexer::set_spec(std::array<int, 3>& arr, std::string setup) {
arr[0] = arr[1] = arr[2] = -0x100;
std::size_t n = setup.size(), i;
for (i = 0; i < n; i++) {
if (setup[i] == ' ') {
continue;
}
if (i == n - 1 || setup[i + 1] == ' ') {
arr[0] = setup[i];
} else if (i == n - 2 || (i < n - 2 && setup[i + 2] == ' ')) {
arr[1] = setup[i];
arr[2] = setup[++i];
} else {
while (i < n && setup[i] != ' ') {
i++;
} }
} }
}
}
bool Lexer::is_multiline_quote(const char* begin, const char* end) { return best->val;
if (multiline_quote.empty()) {
return false;
} }
for (const char& c : multiline_quote) { };
if (begin == end || *begin != c) {
return false;
}
++begin;
}
return true;
}
void Lexer::expect(int exp_tp, const char* msg) { //
if (tp() != exp_tp) { // ----------------------------------------------------------------------
throw ParseError{lexem.loc, (msg ? std::string{msg} : Lexem::lexem_name_str(exp_tp)) + " expected instead of " + // A list of valid parsed chunks.
cur().name_str()}; //
}
next();
}
const Lexem& Lexer::next() { // An inline comment, starting from '//'
if (peek_lexem.valid()) { struct ChunkInlineComment final : ChunkLexerBase {
lexem = std::move(peek_lexem); bool parse(Lexer* lex) const override {
peek_lexem.clear({}, Lexem::Undefined); lex->skip_line();
eof = (lexem.tp == Lexem::Eof); return true;
return lexem;
} }
if (eof) { };
return lexem.clear(src.here(), Lexem::Eof);
} // A multiline comment, starting from '/*'
long long comm = 1; // Note, that nested comments are not supported.
// the code below is very complicated, because it tried to support one-symbol start/end and nesting struct ChunkMultilineComment final : ChunkLexerBase {
// in Tolk, we decided to stop supporting nesting (it was never used in practice and almost impossible for js highlighters) bool parse(Lexer* lex) const override {
// later on I'll simplify this code (more precisely, rewrite lexer from scratch) while (!lex->is_eof()) {
while (!src.seek_eof()) { // todo drop -} later
int cc = src.cur_char(), nc = src.next_char(); if ((lex->char_at() == '-' && lex->char_at(1) == '}') || (lex->char_at() == '*' && lex->char_at(1) == '/')) {
// note, that in practice, [0]-th element is -256, condition for [0]-th is always false lex->skip_chars(2);
// todo rewrite this all in the future return true;
if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2]) || cc == eol_cmt2[0] || (cc == eol_cmt2[1] && nc == eol_cmt2[2])) {
if (comm == 1) { // just "//" — skip a whole line
src.load_line();
} else { // if "//" is nested into "/*", continue reading, since "*/" may be met
src.advance(1);
} }
} else if (cc == cmt_op[1] && nc == cmt_op[2] || cc == cmt_op2[1] && nc == cmt_op2[2]) { lex->skip_chars(1);
src.advance(2);
comm = comm * 2 + 1;
} else if (cc == cmt_op[0] || cc == cmt_op2[0]) { // always false
src.advance(1);
comm *= 2;
} else if (comm == 1) {
break; // means that we are not inside a comment
} else if (cc == cmt_cl[1] && nc == cmt_cl[2] || cc == cmt_cl2[1] && nc == cmt_cl2[2]) {
if (!(comm & 1)) { // always false
src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] +
"`");
}
// note that {- may be closed with */, but assume it's ok (we'll get rid of {- in the future)
comm = 1;
src.advance(2);
} else if (cc == cmt_cl[0] || cc == cmt_cl2[0]) { // always false
if (!(comm & 1)) {
src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] +
"`");
}
comm = 1;
src.advance(1);
} else {
src.advance(1);
}
if (comm < 0) {
src.error("too many nested comments");
} }
return true; // it's okay if comment extends past end of file
} }
if (src.seek_eof()) { };
eof = true;
if (comm > 1) { // A string, starting from "
src.error("comment extends past end of file"); // Note, that there are no escape symbols inside: the purpose of strings in Tolk just doesn't need it.
// After a closing quote, a string modifier may be present, like "Ef8zMzMzMzMzMzMzMzMzMzM0vF"a.
// If present, it emits a separate tok_string_modifier.
struct ChunkString final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
const char* str_begin = lex->c_str();
lex->skip_chars(1);
while (!lex->is_eof() && lex->char_at() != '"' && lex->char_at() != '\n') {
lex->skip_chars(1);
} }
return lexem.clear(src.here(), Lexem::Eof); if (lex->char_at() != '"') {
lex->error("string extends past end of line");
}
std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1);
lex->skip_chars(1);
lex->add_token(tok_string_const, str_val);
if (std::isalpha(lex->char_at())) {
std::string_view modifier_val(lex->c_str(), 1);
lex->skip_chars(1);
lex->add_token(tok_string_modifier, modifier_val);
}
return true;
} }
if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) { };
src.advance(multiline_quote.size());
const char* end = nullptr; // A string starting from """
SrcLocation here = src.here(); // Used for multiline asm constructions. Can not have a postfix modifier.
std::string body; struct ChunkMultilineString final : ChunkLexerBase {
while (!src.is_eof()) { bool parse(Lexer* lex) const override {
if (src.is_eoln()) { const char* str_begin = lex->c_str();
body.push_back('\n'); lex->skip_chars(3);
src.load_line(); while (!lex->is_eof()) {
continue; if (lex->char_at() == '"' && lex->char_at(1) == '"' && lex->char_at(2) == '"') {
}
if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) {
end = src.get_ptr();
src.advance(multiline_quote.size());
break; break;
} }
body.push_back(src.cur_char()); lex->skip_chars(1);
src.advance(1);
} }
if (!end) { if (lex->is_eof()) {
src.error("string extends past end of file"); lex->error("string extends past end of file");
} }
lexem.set(body, here, Lexem::String);
int c = src.cur_char(); std::string_view str_val(str_begin + 3, lex->c_str() - str_begin - 3);
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { lex->skip_chars(3);
lexem.val = c; lex->add_token(tok_string_const, str_val);
src.advance(1); return true;
}
return lexem;
} }
int c = src.cur_char(); };
const char* end = src.get_ptr();
if (is_quote_char(c) || c == '`') { // A number, may be a hex one.
int qc = c; struct ChunkNumber final : ChunkLexerBase {
++end; bool parse(Lexer* lex) const override {
while (end < src.get_end_ptr() && *end != qc) { const char* str_begin = lex->c_str();
++end; bool hex = false;
if (lex->char_at() == '0' && lex->char_at(1) == 'x') {
lex->skip_chars(2);
hex = true;
} }
if (*end != qc) { if (lex->is_eof()) {
src.error(qc == '`' ? "a `back-quoted` token extends past end of line" : "string extends past end of line"); return false;
} }
lexem.set(std::string{src.get_ptr() + 1, end}, src.here(), qc == '`' ? Lexem::Unknown : Lexem::String); while (!lex->is_eof()) {
src.set_ptr(end + 1); char c = lex->char_at();
c = src.cur_char(); if (c >= '0' && c <= '9') {
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { lex->skip_chars(1);
lexem.val = c; continue;
src.set_ptr(end + 2); }
if (!hex) {
break;
}
c |= 0x20;
if (c < 'a' || c > 'f') {
break;
}
lex->skip_chars(1);
} }
// std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl;
return lexem; std::string_view str_val(str_begin, lex->c_str() - str_begin);
lex->add_token(tok_int_const, str_val);
return true;
} }
int len = 0, pc = -0x100; };
while (end < src.get_end_ptr()) {
c = *end; // Anything starting from # is a compiler directive.
bool repeated = (c == pc && is_repeatable(c)); // Technically, #include and #pragma can be mapped as separate chunks,
if (c == ' ' || c == 9 || (len && is_left_active(c) && !repeated)) { // but storing such long strings in a trie increases its memory usage.
break; struct ChunkCompilerDirective final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
const char* str_begin = lex->c_str();
lex->skip_chars(1);
while (std::isalnum(lex->char_at())) {
lex->skip_chars(1);
} }
++len;
++end; std::string_view str_val(str_begin, lex->c_str() - str_begin);
if (is_right_active(c) && !repeated) { if (str_val == "#include") {
break; lex->add_token(tok_include, str_val);
return true;
} }
pc = c; if (str_val == "#pragma") {
lex->add_token(tok_pragma, str_val);
return true;
}
lex->error("unknown compiler directive");
} }
lexem.set(std::string{src.get_ptr(), end}, src.here()); };
src.set_ptr(end);
// std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl; // Tokens like !=, &, etc. emit just a simple TokenType.
return lexem; // Since they are stored in trie, "parsing" them is just skipping len chars.
struct ChunkSimpleToken final : ChunkLexerBase {
TokenType tp;
int len;
ChunkSimpleToken(TokenType tp, int len) : tp(tp), len(len) {}
bool parse(Lexer* lex) const override {
std::string_view str_val(lex->c_str(), len);
lex->add_token(tp, str_val);
lex->skip_chars(len);
return true;
}
};
// Spaces and other space-like symbols are just skipped.
struct ChunkSkipWhitespace final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
lex->skip_chars(1);
lex->skip_spaces();
return true;
}
};
// Here we handle corner cases of grammar that are requested on demand.
// E.g., for 'pragma version >0.5.0', '0.5.0' should be parsed specially to emit tok_semver.
// See TolkLanguageGrammar::parse_next_chunk_special().
struct ChunkSpecialParsing {
static bool parse_pragma_name(Lexer* lex) {
const char* str_begin = lex->c_str();
while (std::isalnum(lex->char_at()) || lex->char_at() == '-') {
lex->skip_chars(1);
}
std::string_view str_val(str_begin, lex->c_str() - str_begin);
if (str_val.empty()) {
return false;
}
lex->add_token(tok_pragma_name, str_val);
return true;
}
static bool parse_semver(Lexer* lex) {
const char* str_begin = lex->c_str();
while (std::isdigit(lex->char_at()) || lex->char_at() == '.') {
lex->skip_chars(1);
}
std::string_view str_val(str_begin, lex->c_str() - str_begin);
if (str_val.empty()) {
return false;
}
lex->add_token(tok_semver, str_val);
return true;
}
};
// Anything starting from a valid identifier beginning symbol is parsed as an identifier.
// But if a resulting string is a keyword, a corresponding token is emitted instead of tok_identifier.
struct ChunkIdentifierOrKeyword final : ChunkLexerBase {
// having parsed str up to the valid end, look up whether it's a valid keyword
// in the future, this could be a bit more effective than just comparing strings (e.g. gperf),
// but nevertheless, performance of the naive code below is reasonably good
static TokenType maybe_keyword(std::string_view str) {
switch (str.size()) {
case 1:
if (str == "~") return tok_bitwise_not; // todo attention
if (str == "_") return tok_underscore; // todo attention
break;
case 2:
if (str == "do") return tok_do;
if (str == "if") return tok_if;
break;
case 3:
if (str == "int") return tok_int;
if (str == "var") return tok_var;
if (str == "asm") return tok_asm;
if (str == "get") return tok_get;
if (str == "try") return tok_try;
break;
case 4:
if (str == "else") return tok_else;
if (str == "pure") return tok_pure;
if (str == "then") return tok_then;
if (str == "cell") return tok_cell;
if (str == "cont") return tok_cont;
if (str == "type") return tok_type; // todo unused token?
break;
case 5:
if (str == "slice") return tok_slice;
if (str == "tuple") return tok_tuple;
if (str == "const") return tok_const;
if (str == "while") return tok_while;
if (str == "until") return tok_until;
if (str == "catch") return tok_catch;
if (str == "ifnot") return tok_ifnot;
break;
case 6:
if (str == "return") return tok_return;
if (str == "repeat") return tok_repeat;
if (str == "elseif") return tok_elseif;
if (str == "forall") return tok_forall;
if (str == "extern") return tok_extern;
if (str == "global") return tok_global;
if (str == "impure") return tok_impure;
if (str == "inline") return tok_inline;
break;
case 7:
if (str == "builder") return tok_builder;
if (str == "builtin") return tok_builtin;
break;
case 8:
if (str == "operator") return tok_operator;
break;
case 9:
if (str == "elseifnot") return tok_elseifnot;
if (str == "method_id") return tok_method_id;
break;
case 10:
if (str == "inline_ref") return tok_inlineref;
if (str == "auto_apply") return tok_autoapply;
break;
default:
break;
}
return tok_empty;
}
bool parse(Lexer* lex) const override {
const char* sym_begin = lex->c_str();
lex->skip_chars(1);
while (!lex->is_eof()) {
char c = lex->char_at();
// the pattern of valid identifier first symbol is provided in trie, here we test for identifier middle
bool allowed_in_identifier = std::isalnum(c) || c == '_' || c == '$' || c == ':' || c == '?' || c == '!' || c == '\'';
if (!allowed_in_identifier) {
break;
}
lex->skip_chars(1);
}
std::string_view str_val(sym_begin, lex->c_str() - sym_begin);
if (TokenType kw_tok = maybe_keyword(str_val)) {
lex->add_token(kw_tok, str_val);
} else {
symbols.lookup_add(static_cast<std::string>(str_val));
lex->add_token(tok_identifier, str_val);
}
return true;
}
};
// Like in Kotlin, `backticks` can be used to wrap identifiers (both in declarations/usage, both for vars/functions).
// E.g.: function `do`() { var `with spaces` = 1; }
// This could be useful to use reserved names as identifiers (in a probable codegen from TL, for example).
struct ChunkIdentifierInBackticks final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
const char* str_begin = lex->c_str();
lex->skip_chars(1);
while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') {
if (std::isspace(lex->char_at())) { // probably, I'll remove this restriction after rewriting symtable and cur_sym_idx
lex->error("An identifier can't have a space in its name (even inside backticks)");
}
lex->skip_chars(1);
}
if (lex->char_at() != '`') {
lex->error("Unclosed backtick `");
}
std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1);
lex->skip_chars(1);
symbols.lookup_add(static_cast<std::string>(str_val));
lex->add_token(tok_identifier, str_val);
return true;
}
};
//
// ----------------------------------------------------------------------
// Here we define a grammar of Tolk.
// All valid chunks prefixes are stored in trie.
//
struct TolkLanguageGrammar {
static LexingTrie trie;
static bool parse_next_chunk(Lexer* lex) {
const ChunkLexerBase* best = trie.get_deepest(lex->c_str());
return best && best->parse(lex);
}
static bool parse_next_chunk_special(Lexer* lex, TokenType parse_next_as) {
switch (parse_next_as) {
case tok_pragma_name:
return ChunkSpecialParsing::parse_pragma_name(lex);
case tok_semver:
return ChunkSpecialParsing::parse_semver(lex);
default:
assert(false);
return false;
}
}
static void register_token(const char* str, int len, TokenType tp) {
trie.add_prefix(str, new ChunkSimpleToken(tp, len));
}
static void init() {
trie.add_prefix("//", singleton<ChunkInlineComment>());
trie.add_prefix(";;", singleton<ChunkInlineComment>());
trie.add_prefix("/*", singleton<ChunkMultilineComment>());
trie.add_prefix("{-", singleton<ChunkMultilineComment>());
trie.add_prefix(R"(")", singleton<ChunkString>());
trie.add_prefix(R"(""")", singleton<ChunkMultilineString>());
trie.add_prefix(" ", singleton<ChunkSkipWhitespace>());
trie.add_prefix("\t", singleton<ChunkSkipWhitespace>());
trie.add_prefix("\r", singleton<ChunkSkipWhitespace>());
trie.add_prefix("\n", singleton<ChunkSkipWhitespace>());
trie.add_prefix("#", singleton<ChunkCompilerDirective>());
trie.add_pattern("[0-9]", singleton<ChunkNumber>());
// todo think of . ~
trie.add_pattern("[a-zA-Z_$.~]", singleton<ChunkIdentifierOrKeyword>());
trie.add_prefix("`", singleton<ChunkIdentifierInBackticks>());
register_token("+", 1, tok_plus);
register_token("-", 1, tok_minus);
register_token("*", 1, tok_mul);
register_token("/", 1, tok_div);
register_token("%", 1, tok_mod);
register_token("?", 1, tok_question);
register_token(":", 1, tok_colon);
register_token(",", 1, tok_comma);
register_token(";", 1, tok_semicolon);
register_token("(", 1, tok_oppar);
register_token(")", 1, tok_clpar);
register_token("[", 1, tok_opbracket);
register_token("]", 1, tok_clbracket);
register_token("{", 1, tok_opbrace);
register_token("}", 1, tok_clbrace);
register_token("=", 1, tok_assign);
register_token("<", 1, tok_lt);
register_token(">", 1, tok_gt);
register_token("&", 1, tok_bitwise_and);
register_token("|", 1, tok_bitwise_or);
register_token("^", 1, tok_bitwise_xor);
register_token("==", 2, tok_eq);
register_token("!=", 2, tok_neq);
register_token("<=", 2, tok_leq);
register_token(">=", 2, tok_geq);
register_token("<<", 2, tok_lshift);
register_token(">>", 2, tok_rshift);
register_token("~/", 2, tok_divR);
register_token("^/", 2, tok_divC);
register_token("~%", 2, tok_modR);
register_token("^%", 2, tok_modC);
register_token("/%", 2, tok_divmod);
register_token("+=", 2, tok_set_plus);
register_token("-=", 2, tok_set_minus);
register_token("*=", 2, tok_set_mul);
register_token("/=", 2, tok_set_div);
register_token("%=", 2, tok_set_mod);
register_token("&=", 2, tok_set_bitwise_and);
register_token("|=", 2, tok_set_bitwise_or);
register_token("^=", 2, tok_set_bitwise_xor);
register_token("->", 2, tok_mapsto);
register_token("<=>", 3, tok_spaceship);
register_token("~>>", 3, tok_rshiftR);
register_token("^>>", 3, tok_rshiftC);
register_token("~/=", 3, tok_set_divR);
register_token("^/=", 3, tok_set_divC);
register_token("~%=", 3, tok_set_modR);
register_token("^%=", 3, tok_set_modC);
register_token("<<=", 3, tok_set_lshift);
register_token(">>=", 3, tok_set_rshift);
register_token("~>>=", 4, tok_set_rshiftR);
register_token("^>>=", 4, tok_set_rshiftC);
}
};
LexingTrie TolkLanguageGrammar::trie;
//
// ----------------------------------------------------------------------
// The Lexer class is to be used outside (by parser, which constructs AST from tokens).
// It's streaming. It means, that `next()` parses a next token on demand
// (instead of parsing all file contents to vector<Token> and iterating over it).
// Parsing on demand uses effectively less memory.
// Note, that chunks, being parsed, call `add_token()`, and a chunk may add multiple tokens at once.
// That's why a small cirlular buffer for tokens is used.
// `last_token_idx` actually means a number of total tokens added.
// `cur_token_idx` is a number of returned by `next()`.
// It's assumed that an input file has already been loaded, its contents is present and won't be deleted
// (`start`, `cur` and `end`, as well as every Token str_val, points inside file->text).
//
Lexer::Lexer(const SrcFile* file)
: file(file)
, p_start(file->text.data())
, p_end(p_start + file->text.size())
, p_next(p_start)
, location(file) {
next();
} }
const Lexem& Lexer::peek() { void Lexer::next() {
if (peek_lexem.valid()) { while (cur_token_idx == last_token_idx && !is_eof()) {
return peek_lexem; update_location();
if (!TolkLanguageGrammar::parse_next_chunk(this)) {
error("Failed to parse");
}
} }
if (eof) { if (is_eof()) {
return lexem.clear(src.here(), Lexem::Eof); add_token(tok_eof, file->text);
}
cur_token = tokens_circularbuf[++cur_token_idx & 7];
}
void Lexer::next_special(TokenType parse_next_as, const char* str_expected) {
assert(cur_token_idx == last_token_idx);
skip_spaces();
update_location();
if (!TolkLanguageGrammar::parse_next_chunk_special(this, parse_next_as)) {
error(std::string(str_expected) + " expected");
}
cur_token = tokens_circularbuf[++cur_token_idx & 7];
}
int Lexer::cur_sym_idx() const {
assert(tok() == tok_identifier);
return symbols.lookup_add(cur_str_std_string());
}
void Lexer::error(const std::string& err_msg) const {
throw ParseError(cur_location(), err_msg);
}
void Lexer::error_at(const std::string& prefix, const std::string& suffix) const {
throw ParseError(cur_location(), prefix + cur_str_std_string() + suffix);
}
void Lexer::on_expect_call_failed(const char* str_expected) const {
throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + cur_str_std_string() + "`");
}
void lexer_init() {
TolkLanguageGrammar::init();
}
// todo #ifdef TOLK_PROFILING
// As told above, `next()` produces tokens on demand, while AST is being generated.
// Hence, it's difficult to measure Lexer performance separately.
// This function can be called just to tick Lexer performance, it just scans all input files.
// There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful.
void lexer_measure_performance(const std::vector<SrcFile*>& files_to_just_parse) {
for (const SrcFile* file : files_to_just_parse) {
Lexer lex(file);
while (!lex.is_eof()) {
lex.next();
}
} }
Lexem keep = std::move(lexem);
next();
peek_lexem = std::move(lexem);
lexem = std::move(keep);
eof = false;
return peek_lexem;
} }
} // namespace tolk } // namespace tolk

View file

@ -15,104 +15,225 @@
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>. along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/ */
#pragma once #pragma once
#include "srcread.h"
#include <array> #include "platform-utils.h"
#include <memory> #include "src-file.h"
#include <cstring> #include <string>
namespace tolk { namespace tolk {
/* enum TokenType {
* tok_empty,
* LEXER
*
*/
struct Lexem { tok_int_const,
enum { Undefined = -2, Eof = -1, Unknown = 0, Ident = 0, Number = 1, Special = 2, String = 3 }; tok_string_const,
int tp; tok_string_modifier,
int val;
std::string str;
SrcLocation loc;
int classify();
Lexem(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0)
: tp(_tp), val(_val), str(_str), loc(_loc) {
classify();
}
int set(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0);
Lexem& clear(const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) {
tp = _tp;
val = _val;
loc = _loc;
str = "";
return *this;
}
bool valid() const {
return tp != Undefined;
}
std::string name_str() const;
void error(std::string _str) const {
throw ParseError{loc, _str};
}
void error_at(std::string str1, std::string str2) const {
error(str1 + str + str2);
}
static std::string lexem_name_str(int idx); tok_identifier,
tok_plus,
tok_minus,
tok_mul,
tok_div,
tok_mod,
tok_question,
tok_colon,
tok_comma,
tok_semicolon,
tok_oppar,
tok_clpar,
tok_opbracket,
tok_clbracket,
tok_opbrace,
tok_clbrace,
tok_assign,
tok_underscore,
tok_lt,
tok_gt,
tok_bitwise_and,
tok_bitwise_or,
tok_bitwise_xor,
tok_bitwise_not,
tok_dot,
tok_eq,
tok_neq,
tok_leq,
tok_geq,
tok_spaceship,
tok_lshift,
tok_rshift,
tok_rshiftR,
tok_rshiftC,
tok_divR,
tok_divC,
tok_modR,
tok_modC,
tok_divmod,
tok_set_plus,
tok_set_minus,
tok_set_mul,
tok_set_div,
tok_set_divR,
tok_set_divC,
tok_set_mod,
tok_set_modR,
tok_set_modC,
tok_set_lshift,
tok_set_rshift,
tok_set_rshiftR,
tok_set_rshiftC,
tok_set_bitwise_and,
tok_set_bitwise_or,
tok_set_bitwise_xor,
tok_return,
tok_var,
tok_repeat,
tok_do,
tok_while,
tok_until,
tok_try,
tok_catch,
tok_if,
tok_ifnot,
tok_then,
tok_else,
tok_elseif,
tok_elseifnot,
tok_int,
tok_cell,
tok_slice,
tok_builder,
tok_cont,
tok_tuple,
tok_type,
tok_mapsto,
tok_forall,
tok_extern,
tok_global,
tok_asm,
tok_impure,
tok_pure,
tok_inline,
tok_inlineref,
tok_builtin,
tok_autoapply,
tok_method_id,
tok_get,
tok_operator,
tok_infix,
tok_infixl,
tok_infixr,
tok_const,
tok_pragma,
tok_pragma_name,
tok_semver,
tok_include,
tok_eof
}; };
// All tolk language is parsed into tokens.
// Lexer::next() returns a Token.
struct Token {
TokenType type = tok_empty;
std::string_view str_val;
Token() = default;
Token(TokenType type, std::string_view str_val): type(type), str_val(str_val) {}
};
// Lexer::next() is a method to be used externally (while parsing tolk file to AST).
// It's streaming: `next()` parses a token on demand.
// For comments, see lexer.cpp, a comment above Lexer constructor.
class Lexer { class Lexer {
SourceReader& src; Token tokens_circularbuf[8]{};
bool eof; int last_token_idx = -1;
Lexem lexem, peek_lexem; int cur_token_idx = -1;
unsigned char char_class[128]; Token cur_token; // = tokens_circularbuf[cur_token_idx & 7]
std::array<int, 3> eol_cmt, cmt_op, cmt_cl; // for ;; {- -}
std::array<int, 3> eol_cmt2, cmt_op2, cmt_cl2; // for // /* */
std::string multiline_quote;
enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 };
public: const SrcFile* file;
bool eof_found() const { const char *p_start, *p_end, *p_next;
return eof; SrcLocation location;
}
explicit Lexer(SourceReader& _src, std::string active_chars = ";,() ~.",
std::string quote_chars = "\"", std::string multiline_quote = "\"\"\"");
void set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts); void update_location() {
void set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2); location.char_offset = static_cast<int>(p_next - p_start);
void start_parsing();
const Lexem& next();
const Lexem& cur() const {
return lexem;
}
const Lexem& peek();
int tp() const {
return lexem.tp;
}
void expect(int exp_tp, const char* msg = 0);
int classify_char(unsigned c) const {
return c < 0x80 ? char_class[c] : 0;
}
bool is_active(int c) const {
return (classify_char(c) & cc::active) == cc::active;
}
bool is_left_active(int c) const {
return (classify_char(c) & cc::left_active);
}
bool is_right_active(int c) const {
return (classify_char(c) & cc::right_active);
}
bool is_repeatable(int c) const {
return (classify_char(c) & cc::allow_repeat);
}
bool is_quote_char(int c) const {
return (classify_char(c) & cc::quote_char);
} }
private: GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void set_spec(std::array<int, 3>& arr, std::string setup); void on_expect_call_failed(const char* str_expected) const;
bool is_multiline_quote(const char* begin, const char* end);
public:
explicit Lexer(const SrcFile* file);
Lexer(const Lexer&) = delete;
Lexer &operator=(const Lexer&) = delete;
void add_token(TokenType type, std::string_view str) {
tokens_circularbuf[++last_token_idx & 7] = Token(type, str);
}
void skip_spaces() {
while (std::isspace(*p_next)) {
++p_next;
}
}
void skip_line() {
while (p_next < p_end && *p_next != '\n' && *p_next != '\r') {
++p_next;
}
while (*p_next == '\n' || *p_next == '\r') {
++p_next;
}
}
void skip_chars(int n) {
p_next += n;
}
bool is_eof() const {
return p_next >= p_end;
}
char char_at() const { return *p_next; }
char char_at(int shift) const { return *(p_next + shift); }
const char* c_str() const { return p_next; }
TokenType tok() const { return cur_token.type; }
std::string_view cur_str() const { return cur_token.str_val; }
std::string cur_str_std_string() const { return static_cast<std::string>(cur_token.str_val); }
SrcLocation cur_location() const { return location; }
int cur_sym_idx() const;
void next();
void next_special(TokenType parse_next_as, const char* str_expected);
void check(TokenType next_tok, const char* str_expected) const {
if (cur_token.type != next_tok) {
on_expect_call_failed(str_expected); // unlikely path, not inlined
}
}
void expect(TokenType next_tok, const char* str_expected) {
if (cur_token.type != next_tok) {
on_expect_call_failed(str_expected);
}
next();
}
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void error(const std::string& err_msg) const;
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void error_at(const std::string& prefix, const std::string& suffix) const;
}; };
void lexer_init();
// todo #ifdef TOLK_PROFILING
void lexer_measure_performance(const std::vector<SrcFile*>& files_to_just_parse);
} // namespace tolk } // namespace tolk

View file

@ -612,7 +612,7 @@ bool Optimizer::optimize() {
} }
AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode) { AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode) {
Optimizer opt(std::move(op_list), op_rewrite_comments, mode); Optimizer opt(std::move(op_list), false, mode);
opt.optimize(); opt.optimize();
return opt.extract_code(); return opt.extract_code();
} }

File diff suppressed because it is too large Load diff

44
tolk/platform-utils.h Normal file
View file

@ -0,0 +1,44 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#pragma once
#if __GNUC__
#define GNU_ATTRIBUTE_COLD [[gnu::cold]]
#define GNU_ATTRIBUTE_NORETURN [[gnu::noreturn]]
#define GNU_ATTRIBUTE_ALWAYS_INLINE [[gnu::always_inline]]
#else
#define GNU_ATTRIBUTE_COLD
#define GNU_ATTRIBUTE_NORETURN [[noreturn]]
#define GNU_ATTRIBUTE_ALWAYS_INLINE
#endif
#if defined(__GNUC__)
#define LIKELY(x) __builtin_expect(x, true)
#define UNLIKELY(x) __builtin_expect(x, false)
#else
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif

164
tolk/src-file.cpp Normal file
View file

@ -0,0 +1,164 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "src-file.h"
#include <iostream>
namespace tolk {
extern AllRegisteredSrcFiles all_src_files;
extern std::string stdlib_filename;
static_assert(sizeof(SrcLocation) == 8);
const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const {
for (const SrcFile* file : all_src_files) {
if (file->file_id == file_id) {
return file;
}
}
return nullptr;
}
const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const {
for (const SrcFile* file : all_src_files) {
if (file->abs_filename == abs_filename) {
return file;
}
}
return nullptr;
}
const SrcFile* AllRegisteredSrcFiles::register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from) {
SrcFile* created = new SrcFile(++last_file_id, rel_filename, abs_filename, std::move(text), included_from);
all_src_files.push_back(created);
return created;
}
bool SrcFile::is_entrypoint_file() const {
return file_id == (stdlib_filename.empty() ? 0 : 1);
}
bool SrcFile::is_offset_valid(int offset) const {
return offset >= 0 && offset < static_cast<int>(text.size());
}
SrcFile::SrcPosition SrcFile::convert_offset(int offset) const {
if (!is_offset_valid(offset)) {
return SrcPosition{offset, -1, -1, "invalid offset"};
}
int line_idx = 0;
int char_idx = 0;
int line_offset = 0;
for (int i = 0; i < offset; ++i) {
char c = text[i];
if (c == '\n') {
line_idx++;
char_idx = 0;
line_offset = i + 1;
} else {
char_idx++;
}
}
size_t line_len = text.size() - line_offset;
for (int i = line_offset; i < static_cast<int>(text.size()); ++i) {
if (text[i] == '\n') {
line_len = i - line_offset;
break;
}
}
std::string_view line_str(text.data() + line_offset, line_len);
return SrcPosition{offset, line_idx + 1, char_idx + 1, line_str};
}
std::ostream& operator<<(std::ostream& os, const SrcFile* src_file) {
return os << (src_file ? src_file->rel_filename : "unknown-location");
}
std::ostream& operator<<(std::ostream& os, const Fatal& fatal) {
return os << fatal.what();
}
const SrcFile* SrcLocation::get_src_file() const {
return all_src_files.find_file(file_id);
}
void SrcLocation::show(std::ostream& os) const {
const SrcFile* src_file = get_src_file();
os << src_file;
if (src_file && src_file->is_offset_valid(char_offset)) {
SrcFile::SrcPosition pos = src_file->convert_offset(char_offset);
os << ':' << pos.line_no << ':' << pos.char_no;
}
}
void SrcLocation::show_context(std::ostream& os) const {
const SrcFile* src_file = get_src_file();
if (!src_file || !src_file->is_offset_valid(char_offset)) {
return;
}
SrcFile::SrcPosition pos = src_file->convert_offset(char_offset);
os << " " << pos.line_str << "\n";
os << " ";
for (int i = 1; i < pos.char_no; ++i) {
os << ' ';
}
os << '^' << "\n";
}
std::ostream& operator<<(std::ostream& os, SrcLocation loc) {
loc.show(os);
return os;
}
void SrcLocation::show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const {
show(os);
if (!err_type.empty()) {
os << ": " << err_type;
}
os << ": " << message << std::endl;
show_context(os);
}
void SrcLocation::show_note(const std::string& err_msg) const {
show_general_error(std::cerr, err_msg, "note");
}
void SrcLocation::show_warning(const std::string& err_msg) const {
show_general_error(std::cerr, err_msg, "warning");
}
void SrcLocation::show_error(const std::string& err_msg) const {
show_general_error(std::cerr, err_msg, "error");
}
std::ostream& operator<<(std::ostream& os, const ParseError& error) {
error.show(os);
return os;
}
void ParseError::show(std::ostream& os) const {
os << where << ": error: " << message << std::endl;
where.show_context(os);
}
} // namespace tolk

120
tolk/src-file.h Normal file
View file

@ -0,0 +1,120 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
namespace tolk {
struct SrcFile {
struct SrcPosition {
int offset;
int line_no;
int char_no;
std::string_view line_str;
};
int file_id;
std::string rel_filename;
std::string abs_filename;
std::string text;
const SrcFile* included_from{nullptr};
SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text, const SrcFile* included_from)
: file_id(file_id)
, rel_filename(std::move(rel_filename))
, abs_filename(std::move(abs_filename))
, text(std::move(text))
, included_from(included_from) { }
SrcFile(const SrcFile& other) = delete;
SrcFile &operator=(const SrcFile&) = delete;
bool is_entrypoint_file() const;
bool is_offset_valid(int offset) const;
SrcPosition convert_offset(int offset) const;
};
class AllRegisteredSrcFiles {
std::vector<SrcFile*> all_src_files;
int last_file_id = -1;
public:
const SrcFile *find_file(int file_id) const;
const SrcFile* find_file(const std::string& abs_filename) const;
const SrcFile* register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from);
const std::vector<SrcFile*>& get_all_files() const { return all_src_files; }
};
struct Fatal final : std::exception {
std::string message;
explicit Fatal(std::string _msg) : message(std::move(_msg)) {
}
const char* what() const noexcept override {
return message.c_str();
}
};
std::ostream& operator<<(std::ostream& os, const Fatal& fatal);
// SrcLocation points to a location (line, column) in some loaded .tolk source SrcFile.
// Note, that instead of storing src_file, line_no, etc., only 2 ints are stored.
// The purpose is: sizeof(SrcLocation) == 8, so it's just passed/stored without pointers/refs, just like int64_t.
// When decoding SrcLocation into human-readable format, it's converted to SrcFile::SrcPosition via offset.
class SrcLocation {
friend class Lexer;
int file_id = -1; // file_id from AllRegisteredSrcFiles
int char_offset = -1; // offset from SrcFile::text
public:
SrcLocation() = default;
explicit SrcLocation(const SrcFile* src_file) : file_id(src_file->file_id) {
}
bool is_defined() const { return file_id != -1; }
const SrcFile* get_src_file() const;
void show(std::ostream& os) const;
void show_context(std::ostream& os) const;
void show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const;
void show_note(const std::string& err_msg) const;
void show_warning(const std::string& err_msg) const;
void show_error(const std::string& err_msg) const;
};
std::ostream& operator<<(std::ostream& os, SrcLocation loc);
struct ParseError : std::exception {
SrcLocation where;
std::string message;
ParseError(SrcLocation _where, std::string _msg) : where(_where), message(std::move(_msg)) {
}
const char* what() const noexcept override {
return message.c_str();
}
void show(std::ostream& os) const;
};
std::ostream& operator<<(std::ostream& os, const ParseError& error);
} // namespace tolk

View file

@ -1,228 +0,0 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "srcread.h"
#include <algorithm>
namespace tolk {
/*
*
* SOURCE FILE READER
*
*/
std::ostream& operator<<(std::ostream& os, const FileDescr* fdescr) {
return os << (fdescr ? (fdescr->is_stdin ? "stdin" : fdescr->filename) : "unknown-location");
}
std::ostream& operator<<(std::ostream& os, const Fatal& fatal) {
return os << fatal.get_msg();
}
const char* FileDescr::convert_offset(long offset, long* line_no, long* line_pos, long* line_size) const {
long lno = 0, lpos = -1, lsize = 0;
const char* lstart = nullptr;
if (offset >= 0 && offset < (long)text.size()) {
auto it = std::upper_bound(line_offs.begin(), line_offs.end(), offset);
lno = it - line_offs.begin();
if (lno && it != line_offs.end()) {
lsize = it[0] - it[-1];
lpos = offset - it[-1];
lstart = text.data() + it[-1];
}
} else {
lno = (long)line_offs.size();
}
if (line_no) {
*line_no = lno;
}
if (line_pos) {
*line_pos = lpos;
}
if (line_size) {
*line_size = lsize;
}
return lstart;
}
const char* FileDescr::push_line(std::string new_line) {
if (line_offs.empty()) {
line_offs.push_back(0);
}
std::size_t cur_size = text.size();
text += new_line;
text += '\0';
line_offs.push_back((long)text.size());
return text.data() + cur_size;
}
void SrcLocation::show(std::ostream& os) const {
os << fdescr;
long line_no, line_pos;
if (fdescr && convert_pos(&line_no, &line_pos)) {
os << ':' << line_no;
if (line_pos >= 0) {
os << ':' << (line_pos + 1);
}
}
}
bool SrcLocation::show_context(std::ostream& os) const {
long line_no, line_pos, line_size;
if (!fdescr || !convert_pos(&line_no, &line_pos, &line_size)) {
return false;
}
bool skip_left = (line_pos > 200), skip_right = (line_pos + 200u < line_size);
const char* here = fdescr->text.data() + char_offs;
const char* base = here - line_pos;
const char* start = skip_left ? here - 100 : base;
const char* end = skip_right ? here + 100 : base + line_size;
os << " ";
if (skip_left) {
os << "... ";
}
for (const char* ptr = start; ptr < end; ptr++) {
os << (char)*ptr;
}
if (skip_right) {
os << " ...";
}
os << std::endl;
os << " ";
if (skip_left) {
os << "... ";
}
for (const char* ptr = start; ptr < here; ptr++) {
char c = *ptr;
os << (c == 9 || c == 10 ? c : ' ');
}
os << '^' << std::endl;
return true;
}
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc) {
loc.show(os);
return os;
}
void SrcLocation::show_gen_error(std::ostream& os, std::string message, std::string err_type) const {
show(os);
if (!err_type.empty()) {
os << ": " << err_type;
}
os << ": " << message << std::endl;
show_context(os);
}
std::ostream& operator<<(std::ostream& os, const Error& error) {
error.show(os);
return os;
}
void ParseError::show(std::ostream& os) const {
os << where << ": error: " << message << std::endl;
where.show_context(os);
}
SourceReader::SourceReader(std::istream* _is, FileDescr* _fdescr)
: ifs(_is), fdescr(_fdescr), loc(_fdescr), eof(false), cur_line_len(0), start(0), cur(0), end(0) {
load_line();
}
void SourceReader::set_eof() {
if (!eof) {
eof = true;
start = cur = end = 0;
}
}
int SourceReader::skip_spc() {
if (!cur) {
return 0;
}
const char* ptr = cur;
int res = 0;
while (*ptr == ' ' || *ptr == 9) {
++ptr;
++res;
}
set_ptr(ptr);
return res;
}
bool SourceReader::seek_eof() {
while (seek_eoln()) {
if (!load_line()) {
return true;
}
}
return false;
}
const char* SourceReader::set_ptr(const char* ptr) {
if (ptr != cur) {
if (ptr < cur || ptr > end) {
error("parsing position went outside of line");
}
loc.char_offs += ptr - cur;
cur = ptr;
}
return ptr;
}
bool SourceReader::load_line() {
if (eof) {
return false;
}
loc.set_eof();
if (ifs->eof()) {
set_eof();
return false;
}
std::getline(*ifs, cur_line);
if (ifs->fail()) {
set_eof();
if (!ifs->eof()) {
error("cannot read line from source stream");
}
return false;
}
std::size_t len = cur_line.size();
if (len > 0xffffff) {
set_eof();
error("line too long");
return false;
}
if (len && cur_line.back() == '\r') {
// CP/M line breaks support
cur_line.pop_back();
--len;
}
cur_line_len = (int)len;
if (fdescr) {
cur = start = fdescr->push_line(std::move(cur_line));
end = start + len;
loc.char_offs = (std::size_t)(cur - fdescr->text.data());
cur_line.clear();
} else {
cur = start = cur_line.c_str();
end = start + cur_line_len;
}
return true;
}
} // namespace tolk

View file

@ -1,162 +0,0 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
#include <iostream>
namespace tolk {
/*
*
* SOURCE FILE READER
*
*/
struct FileDescr {
std::string filename;
std::string text;
std::vector<long> line_offs;
bool is_stdin;
bool is_main = false;
FileDescr(std::string _fname, bool _stdin = false) : filename(std::move(_fname)), is_stdin(_stdin) {
}
const char* push_line(std::string new_line);
const char* convert_offset(long offset, long* line_no, long* line_pos, long* line_size = nullptr) const;
};
struct Fatal {
std::string message;
Fatal(std::string _msg) : message(std::move(_msg)) {
}
std::string get_msg() const {
return message;
}
};
std::ostream& operator<<(std::ostream& os, const Fatal& fatal);
struct SrcLocation {
const FileDescr* fdescr;
long char_offs;
SrcLocation() : fdescr(nullptr), char_offs(-1) {
}
SrcLocation(const FileDescr* _fdescr, long offs = -1) : fdescr(_fdescr), char_offs(-1) {
}
bool defined() const {
return fdescr;
}
bool eof() const {
return char_offs == -1;
}
void set_eof() {
char_offs = -1;
}
const char* convert_pos(long* line_no, long* line_pos, long* line_size = nullptr) const {
return defined() ? fdescr->convert_offset(char_offs, line_no, line_pos, line_size) : nullptr;
}
void show(std::ostream& os) const;
bool show_context(std::ostream& os) const;
void show_gen_error(std::ostream& os, std::string message, std::string err_type = "") const;
void show_note(std::string err_msg) const {
show_gen_error(std::cerr, err_msg, "note");
}
void show_warning(std::string err_msg) const {
show_gen_error(std::cerr, err_msg, "warning");
}
void show_error(std::string err_msg) const {
show_gen_error(std::cerr, err_msg, "error");
}
};
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc);
struct Error {
virtual ~Error() = default;
virtual void show(std::ostream& os) const = 0;
};
std::ostream& operator<<(std::ostream& os, const Error& error);
struct ParseError : Error {
SrcLocation where;
std::string message;
ParseError(const SrcLocation& _where, std::string _msg) : where(_where), message(_msg) {
}
ParseError(const SrcLocation* _where, std::string _msg) : message(_msg) {
if (_where) {
where = *_where;
}
}
~ParseError() override = default;
void show(std::ostream& os) const override;
};
class SourceReader {
std::istream* ifs;
FileDescr* fdescr;
SrcLocation loc;
bool eof;
std::string cur_line;
int cur_line_len;
void set_eof();
const char *start, *cur, *end;
public:
SourceReader(std::istream* _is, FileDescr* _fdescr);
bool load_line();
bool is_eof() const {
return eof;
}
int is_eoln() const {
return cur == end;
}
int skip_spc();
bool seek_eoln() {
skip_spc();
return is_eoln();
}
bool seek_eof();
const char* cur_line_cstr() const {
return cur_line.c_str();
}
const SrcLocation& here() const {
return loc;
}
char cur_char() const {
return *cur;
}
char next_char() const {
return cur[1];
}
const char* get_ptr() const {
return cur;
}
const char* get_end_ptr() const {
return end;
}
const char* set_ptr(const char* ptr);
void advance(int n) {
set_ptr(get_ptr() + n);
}
void error(std::string err_msg) {
throw ParseError{loc, err_msg};
}
};
} // namespace tolk

View file

@ -28,13 +28,19 @@ namespace tolk {
int scope_level; int scope_level;
SymTable<100003> symbols; SymTable symbols;
SymDef* sym_def[symbols.hprime + 1]; SymDef* sym_def[symbols.SIZE_PRIME + 1];
SymDef* global_sym_def[symbols.hprime + 1]; SymDef* global_sym_def[symbols.SIZE_PRIME + 1];
std::vector<std::pair<int, SymDef>> symbol_stack; std::vector<std::pair<int, SymDef>> symbol_stack;
std::vector<SrcLocation> scope_opened_at; std::vector<SrcLocation> scope_opened_at;
Symbol::Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {
subclass = this->str[0] == '.' ? SymbolSubclass::dot_identifier
: this->str[0] == '~' ? SymbolSubclass::tilde_identifier
: SymbolSubclass::undef;
}
std::string Symbol::unknown_symbol_name(sym_idx_t i) { std::string Symbol::unknown_symbol_name(sym_idx_t i) {
if (!i) { if (!i) {
return "_"; return "_";
@ -45,57 +51,43 @@ std::string Symbol::unknown_symbol_name(sym_idx_t i) {
} }
} }
sym_idx_t SymTableBase::gen_lookup(std::string str, int mode, sym_idx_t idx) { sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) {
unsigned long long h1 = 1, h2 = 1; unsigned long long h1 = 1, h2 = 1;
for (char c : str) { for (char c : str) {
h1 = ((h1 * 239) + (unsigned char)(c)) % p; h1 = ((h1 * 239) + (unsigned char)(c)) % SIZE_PRIME;
h2 = ((h2 * 17) + (unsigned char)(c)) % (p - 1); h2 = ((h2 * 17) + (unsigned char)(c)) % (SIZE_PRIME - 1);
} }
++h2; ++h2;
++h1; ++h1;
while (true) { while (true) {
if (sym_table[h1]) { if (sym[h1]) {
if (sym_table[h1]->str == str) { if (sym[h1]->str == str) {
return (mode & 2) ? not_found : sym_idx_t(h1); return (mode & 2) ? not_found : sym_idx_t(h1);
} }
h1 += h2; h1 += h2;
if (h1 > p) { if (h1 > SIZE_PRIME) {
h1 -= p; h1 -= SIZE_PRIME;
} }
} else { } else {
if (!(mode & 1)) { if (!(mode & 1)) {
return not_found; return not_found;
} }
if (def_sym >= ((long long)p * 3) / 4) { if (def_sym >= ((long long)SIZE_PRIME * 3) / 4) {
throw SymTableOverflow{def_sym}; throw SymTableOverflow{def_sym};
} }
sym_table[h1] = std::make_unique<Symbol>(str, idx <= 0 ? sym_idx_t(h1) : -idx); sym[h1] = std::make_unique<Symbol>(static_cast<std::string>(str), idx <= 0 ? sym_idx_t(h1) : -idx);
++def_sym; ++def_sym;
return sym_idx_t(h1); return sym_idx_t(h1);
} }
} }
} }
SymTableBase& SymTableBase::add_keyword(std::string str, sym_idx_t idx) { void open_scope(SrcLocation loc) {
if (idx <= 0) {
idx = ++def_kw;
}
sym_idx_t res = gen_lookup(str, -1, idx);
if (!res) {
throw SymTableKwRedef{str};
}
if (idx < max_kw_idx) {
keywords[idx] = res;
}
return *this;
}
void open_scope(Lexer& lex) {
++scope_level; ++scope_level;
scope_opened_at.push_back(lex.cur().loc); scope_opened_at.push_back(loc);
} }
void close_scope(Lexer& lex) { void close_scope(SrcLocation loc) {
if (!scope_level) { if (!scope_level) {
throw Fatal{"cannot close the outer scope"}; throw Fatal{"cannot close the outer scope"};
} }
@ -124,24 +116,20 @@ void close_scope(Lexer& lex) {
scope_opened_at.pop_back(); scope_opened_at.pop_back();
} }
SymDef* lookup_symbol(sym_idx_t idx, int flags) { SymDef* lookup_symbol(sym_idx_t idx) {
if (!idx) { if (!idx) {
return nullptr; return nullptr;
} }
if ((flags & 1) && sym_def[idx]) { if (sym_def[idx]) {
return sym_def[idx]; return sym_def[idx];
} }
if ((flags & 2) && global_sym_def[idx]) { if (global_sym_def[idx]) {
return global_sym_def[idx]; return global_sym_def[idx];
} }
return nullptr; return nullptr;
} }
SymDef* lookup_symbol(std::string name, int flags) { SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
return lookup_symbol(symbols.lookup(name), flags);
}
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) {
if (!name_idx) { if (!name_idx) {
return nullptr; return nullptr;
} }
@ -156,7 +144,7 @@ SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocati
return found; return found;
} }
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) { SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
if (!name_idx) { if (!name_idx) {
return nullptr; return nullptr;
} }
@ -176,7 +164,7 @@ SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc
return found; return found;
} }
found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc); found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc);
symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx})); symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx, loc}));
#ifdef TOLK_DEBUG #ifdef TOLK_DEBUG
found->sym_name = found->name(); found->sym_name = found->name();
symbol_stack.back().second.sym_name = found->name(); symbol_stack.back().second.sym_name = found->name();

View file

@ -15,8 +15,9 @@
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>. along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/ */
#pragma once #pragma once
#include "srcread.h" #include "src-file.h"
#include "lexer.h" #include <functional>
#include <memory>
#include <vector> #include <vector>
namespace tolk { namespace tolk {
@ -29,11 +30,12 @@ namespace tolk {
typedef int var_idx_t; typedef int var_idx_t;
enum class SymValKind { _Param, _Var, _Func, _Typename, _GlobVar, _Const };
struct SymValBase { struct SymValBase {
enum { _Param, _Var, _Func, _Typename, _GlobVar, _Const }; SymValKind kind;
int type;
int idx; int idx;
SymValBase(int _type, int _idx) : type(_type), idx(_idx) { SymValBase(SymValKind kind, int idx) : kind(kind), idx(idx) {
} }
virtual ~SymValBase() = default; virtual ~SymValBase() = default;
}; };
@ -44,92 +46,69 @@ struct SymValBase {
* *
*/ */
// defined outside this module (by the end user) enum class SymbolSubclass {
int compute_symbol_subclass(std::string str); // return 0 if unneeded undef = 0,
dot_identifier = 1, // begins with . (a const method)
tilde_identifier = 2 // begins with ~ (a non-const method)
};
typedef int sym_idx_t; typedef int sym_idx_t;
struct Symbol { struct Symbol {
std::string str; std::string str;
sym_idx_t idx; sym_idx_t idx;
int subclass; SymbolSubclass subclass;
Symbol(std::string _str, sym_idx_t _idx, int _sc) : str(_str), idx(_idx), subclass(_sc) {
} Symbol(std::string str, sym_idx_t idx);
Symbol(std::string _str, sym_idx_t _idx) : str(_str), idx(_idx) {
subclass = compute_symbol_subclass(std::move(_str));
}
static std::string unknown_symbol_name(sym_idx_t i); static std::string unknown_symbol_name(sym_idx_t i);
}; };
class SymTableBase { class SymTable {
unsigned p; public:
std::unique_ptr<Symbol>* sym_table; static constexpr int SIZE_PRIME = 100003;
sym_idx_t def_kw, def_sym;
private:
sym_idx_t def_sym{0};
std::unique_ptr<Symbol> sym[SIZE_PRIME + 1];
sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0);
static constexpr int max_kw_idx = 10000; static constexpr int max_kw_idx = 10000;
sym_idx_t keywords[max_kw_idx]; sym_idx_t keywords[max_kw_idx];
public: public:
SymTableBase(unsigned p_, std::unique_ptr<Symbol>* sym_table_)
: p(p_), sym_table(sym_table_), def_kw(0x100), def_sym(0) {
std::memset(keywords, 0, sizeof(keywords));
}
static constexpr sym_idx_t not_found = 0; static constexpr sym_idx_t not_found = 0;
SymTableBase& add_keyword(std::string str, sym_idx_t idx = 0); sym_idx_t lookup(const std::string_view& str, int mode = 0) {
SymTableBase& add_kw_char(char c) {
return add_keyword(std::string{c}, c);
}
sym_idx_t lookup(std::string str, int mode = 0) {
return gen_lookup(str, mode); return gen_lookup(str, mode);
} }
sym_idx_t lookup_add(std::string str) { sym_idx_t lookup_add(const std::string& str) {
return gen_lookup(str, 1); return gen_lookup(str, 1);
} }
Symbol* operator[](sym_idx_t i) const { Symbol* operator[](sym_idx_t i) const {
return sym_table[i].get(); return sym[i].get();
} }
bool is_keyword(sym_idx_t i) const { bool is_keyword(sym_idx_t i) const {
return sym_table[i] && sym_table[i]->idx < 0; return sym[i] && sym[i]->idx < 0;
} }
std::string get_name(sym_idx_t i) const { std::string get_name(sym_idx_t i) const {
return sym_table[i] ? sym_table[i]->str : Symbol::unknown_symbol_name(i); return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i);
} }
int get_subclass(sym_idx_t i) const { SymbolSubclass get_subclass(sym_idx_t i) const {
return sym_table[i] ? sym_table[i]->subclass : 0; return sym[i] ? sym[i]->subclass : SymbolSubclass::undef;
} }
Symbol* get_keyword(int i) const { Symbol* get_keyword(int i) const {
return ((unsigned)i < (unsigned)max_kw_idx) ? sym_table[keywords[i]].get() : nullptr; return ((unsigned)i < (unsigned)max_kw_idx) ? sym[keywords[i]].get() : nullptr;
} }
protected: SymTable() {
sym_idx_t gen_lookup(std::string str, int mode = 0, sym_idx_t idx = 0); std::memset(keywords, 0, sizeof(keywords));
};
template <unsigned pp>
class SymTable : public SymTableBase {
public:
static constexpr int hprime = pp;
static int size() {
return pp + 1;
}
private:
std::unique_ptr<Symbol> sym[pp + 1];
public:
SymTable() : SymTableBase(pp, sym) {
}
SymTable& add_keyword(std::string str, sym_idx_t idx = 0) {
SymTableBase::add_keyword(str, idx);
return *this;
}
SymTable& add_kw_char(char c) {
return add_keyword(std::string{c}, c);
} }
}; };
struct SymTableOverflow { struct SymTableOverflow {
int sym_def; int sym_def;
SymTableOverflow(int x) : sym_def(x) { explicit SymTableOverflow(int x) : sym_def(x) {
} }
}; };
@ -139,7 +118,7 @@ struct SymTableKwRedef {
} }
}; };
extern SymTable<100003> symbols; extern SymTable symbols;
extern int scope_level; extern int scope_level;
@ -151,7 +130,7 @@ struct SymDef {
#ifdef TOLK_DEBUG #ifdef TOLK_DEBUG
std::string sym_name; std::string sym_name;
#endif #endif
SymDef(int lvl, sym_idx_t idx, const SrcLocation& _loc = {}, SymValBase* val = 0) SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr)
: level(lvl), sym_idx(idx), value(val), loc(_loc) { : level(lvl), sym_idx(idx), value(val), loc(_loc) {
} }
bool has_name() const { bool has_name() const {
@ -162,17 +141,16 @@ struct SymDef {
} }
}; };
extern SymDef* sym_def[symbols.hprime + 1]; extern SymDef* sym_def[symbols.SIZE_PRIME + 1];
extern SymDef* global_sym_def[symbols.hprime + 1]; extern SymDef* global_sym_def[symbols.SIZE_PRIME + 1];
extern std::vector<std::pair<int, SymDef>> symbol_stack; extern std::vector<std::pair<int, SymDef>> symbol_stack;
extern std::vector<SrcLocation> scope_opened_at; extern std::vector<SrcLocation> scope_opened_at;
void open_scope(Lexer& lex); void open_scope(SrcLocation loc);
void close_scope(Lexer& lex); void close_scope(SrcLocation loc);
SymDef* lookup_symbol(sym_idx_t idx, int flags = 3); SymDef* lookup_symbol(sym_idx_t idx);
SymDef* lookup_symbol(std::string name, int flags = 3);
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {}); SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, SrcLocation loc = {});
SymDef* define_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {}); SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc);
} // namespace tolk } // namespace tolk

View file

@ -30,67 +30,41 @@
void usage(const char* progname) { void usage(const char* progname) {
std::cerr std::cerr
<< "usage: " << progname << "usage: " << progname << " [options] <filename.tolk>\n"
<< " [-vIAPSR][-O<level>][-i<indent-spc>][-o<output-filename>][-W<boc-filename>] {<filename.tolk> ...}\n" "\tGenerates Fift TVM assembler code from a .tolk file\n"
"\tGenerates Fift TVM assembler code from a Tolk source\n" "-o<fif-filename>\tWrites generated code into specified .fif file instead of stdout\n"
"-I\tEnables interactive mode (parse stdin)\n" "-b<boc-filename>\tGenerate Fift instructions to save TVM bytecode into .boc file\n"
"-o<fift-output-filename>\tWrites generated code into specified file instead of stdout\n"
"-v\tIncreases verbosity level (extra information output into stderr)\n"
"-i<indent>\tSets indentation for the output code (in two-space units)\n"
"-A\tPrefix code with `\"Asm.fif\" include` preamble\n"
"-O<level>\tSets optimization level (2 by default)\n" "-O<level>\tSets optimization level (2 by default)\n"
"-P\tEnvelope code into PROGRAM{ ... }END>c\n" "-S\tDon't include stack layout comments into Fift output\n"
"-S\tInclude stack layout comments in the output code\n" "-e\tIncreases verbosity level (extra output into stderr)\n"
"-R\tInclude operation rewrite comments in the output code\n" "-v\tOutput version of Tolk and exit\n";
"-W<output-boc-file>\tInclude Fift code to serialize and save generated code into specified BoC file. Enables "
"-A and -P.\n"
"\t-s\tOutput semantic version of Tolk and exit\n"
"\t-V<version>\tShow Tolk build information\n";
std::exit(2); std::exit(2);
} }
int main(int argc, char* const argv[]) { int main(int argc, char* const argv[]) {
int i; int i;
std::string output_filename; std::string output_filename;
while ((i = getopt(argc, argv, "Ahi:Io:O:PRsSvW:V")) != -1) { while ((i = getopt(argc, argv, "o:b:O:Sevh")) != -1) {
switch (i) { switch (i) {
case 'A':
tolk::asm_preamble = true;
break;
case 'I':
tolk::interactive = true;
break;
case 'i':
tolk::indent = std::max(0, atoi(optarg));
break;
case 'o': case 'o':
output_filename = optarg; output_filename = optarg;
break; break;
case 'b':
tolk::boc_output_filename = optarg;
break;
case 'O': case 'O':
tolk::opt_level = std::max(0, atoi(optarg)); tolk::opt_level = std::max(0, atoi(optarg));
break; break;
case 'P':
tolk::program_envelope = true;
break;
case 'R':
tolk::op_rewrite_comments = true;
break;
case 'S': case 'S':
tolk::stack_layout_comments = true; tolk::stack_layout_comments = false;
break; break;
case 'v': case 'e':
++tolk::verbosity; ++tolk::verbosity;
break; break;
case 'W': case 'v':
tolk::boc_output_filename = optarg; std::cout << "Tolk compiler v" << tolk::tolk_version << "\n";
tolk::asm_preamble = tolk::program_envelope = true; std::cout << "Build commit: " << GitMetadata::CommitSHA1() << "\n";
break; std::cout << "Build date: " << GitMetadata::CommitDate() << "\n";
case 's':
std::cout << tolk::tolk_version << "\n";
std::exit(0);
case 'V':
std::cout << "Tolk semantic version: v" << tolk::tolk_version << "\n";
std::cout << "Build information: [ Commit: " << GitMetadata::CommitSHA1() << ", Date: " << GitMetadata::CommitDate() << "]\n";
std::exit(0); std::exit(0);
case 'h': case 'h':
default: default:
@ -110,13 +84,14 @@ int main(int argc, char* const argv[]) {
outs = fs.get(); outs = fs.get();
} }
std::vector<std::string> sources; if (optind != argc - 1) {
std::cerr << "invalid usage: should specify exactly one input file.tolk";
while (optind < argc) { return 2;
sources.push_back(std::string(argv[optind++]));
} }
std::string entrypoint_file_name = argv[optind];
tolk::read_callback = tolk::fs_read_callback; tolk::read_callback = tolk::fs_read_callback;
return tolk::tolk_proceed(sources, *outs, std::cerr); return tolk::tolk_proceed(entrypoint_file_name, *outs, std::cerr);
} }

View file

@ -31,81 +31,58 @@
#include "td/utils/Status.h" #include "td/utils/Status.h"
#include <sstream> #include <sstream>
#include <iomanip> #include <iomanip>
#include "vm/boc.h"
td::Result<std::string> compile_internal(char *config_json) { td::Result<std::string> compile_internal(char *config_json) {
TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json)))
auto &obj = input_json.get_object(); td::JsonObject& config = input_json.get_object();
TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false)); TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2));
TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false)); TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false));
TRY_RESULT(entrypoint_file_name, td::get_json_object_string_field(config, "entrypointFileName", false));
auto &sources_arr = sources_obj.get_array();
std::vector<std::string> sources;
for (auto &item : sources_arr) {
sources.push_back(item.get_string().str());
}
tolk::opt_level = std::max(0, opt_level); tolk::opt_level = std::max(0, opt_level);
tolk::program_envelope = true;
tolk::verbosity = 0; tolk::verbosity = 0;
tolk::indent = 1; tolk::stack_layout_comments = stack_comments;
std::ostringstream outs, errs; std::ostringstream outs, errs;
auto compile_res = tolk::tolk_proceed(sources, outs, errs); int tolk_res = tolk::tolk_proceed(entrypoint_file_name, outs, errs);
if (tolk_res != 0) {
if (compile_res != 0) { return td::Status::Error("Tolk compilation error: " + errs.str());
return td::Status::Error(std::string("Tolk compilation error: ") + errs.str());
} }
TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false)); TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/"));
TRY_RESULT(boc, vm::std_boc_serialize(code_cell));
td::JsonBuilder result_json; td::JsonBuilder result_json;
auto result_obj = result_json.enter_object(); auto obj = result_json.enter_object();
result_obj("status", "ok"); obj("status", "ok");
result_obj("codeBoc", td::base64_encode(boc)); obj("fiftCode", fift_res.fiftCode);
result_obj("fiftCode", outs.str()); obj("codeBoc64", fift_res.codeBoc64);
result_obj("codeHashHex", code_cell->get_hash().to_hex()); obj("codeHashHex", fift_res.codeHashHex);
result_obj.leave(); obj.leave();
outs.clear();
errs.clear();
return result_json.string_builder().as_cslice().str(); return result_json.string_builder().as_cslice().str();
} }
/// Callback used to retrieve additional source files or data. /// Callback used to retrieve file contents from a "not file system". See tolk-js for implementation.
/// /// The callback must fill either destContents or destError.
/// @param _kind The kind of callback (a string). /// The implementor must use malloc() for them and use free() after tolk_compile returns.
/// @param _data The data for the callback (a string). typedef void (*CStyleReadFileCallback)(int kind, char const* data, char** destContents, char** destError);
/// @param o_contents A pointer to the contents of the file, if found. Allocated via malloc().
/// @param o_error A pointer to an error message, if there is one. Allocated via malloc().
///
/// The callback implementor must use malloc() to allocate storage for
/// contents or error. The callback implementor must use free() to free
/// said storage after tolk_compile returns.
///
/// If the callback is not supported, *o_contents and *o_error must be set to NULL.
typedef void (*CStyleReadFileCallback)(char const* _kind, char const* _data, char** o_contents, char** o_error);
tolk::ReadCallback::Callback wrapReadCallback(CStyleReadFileCallback _readCallback) tolk::ReadCallback::Callback wrapReadCallback(CStyleReadFileCallback _readCallback)
{ {
tolk::ReadCallback::Callback readCallback; tolk::ReadCallback::Callback readCallback;
if (_readCallback) { if (_readCallback) {
readCallback = [=](tolk::ReadCallback::Kind _kind, char const* _data) -> td::Result<std::string> { readCallback = [=](tolk::ReadCallback::Kind kind, char const* data) -> td::Result<std::string> {
char* contents_c = nullptr; char* destContents = nullptr;
char* error_c = nullptr; char* destError = nullptr;
_readCallback(tolk::ReadCallback::kindString(_kind).data(), _data, &contents_c, &error_c); _readCallback(static_cast<int>(kind), data, &destContents, &destError);
if (!contents_c && !error_c) { if (!destContents && !destError) {
return td::Status::Error("Callback not supported"); return td::Status::Error("Callback not supported");
} }
if (contents_c) { if (destContents) {
return contents_c; return destContents;
} }
return td::Status::Error(std::string(error_c)); return td::Status::Error(std::string(destError));
}; };
} }
return readCallback; return readCallback;

View file

@ -24,18 +24,17 @@
from all source files in the program, then also delete it here. from all source files in the program, then also delete it here.
*/ */
#include "tolk.h" #include "tolk.h"
#include "srcread.h"
#include "lexer.h" #include "lexer.h"
#include <getopt.h> #include <getopt.h>
#include "git.h" #include "git.h"
#include <fstream> #include <fstream>
#include "td/utils/port/path.h" #include "td/utils/port/path.h"
#include <sys/stat.h>
namespace tolk { namespace tolk {
int verbosity, indent, opt_level = 2; int verbosity = 0, opt_level = 2;
bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble; bool stack_layout_comments = true;
bool interactive = false;
GlobalPragma pragma_allow_post_modification{"allow-post-modification"}; GlobalPragma pragma_allow_post_modification{"allow-post-modification"};
GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"}; GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"};
GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"}; GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"};
@ -82,23 +81,13 @@ void GlobalPragma::enable(SrcLocation loc) {
". Please, remove this line from your code."); ". Please, remove this line from your code.");
return; return;
} }
if (!loc.get_src_file()->is_entrypoint_file()) {
// todo generally it's not true; rework pragmas completely
loc.show_warning(PSTRING() << "#pragma " << name_ <<
" should be used in the main file only.");
}
enabled_ = true; enabled_ = true;
locs_.push_back(std::move(loc));
}
void GlobalPragma::check_enable_in_libs() {
if (locs_.empty()) {
return;
}
for (const SrcLocation& loc : locs_) {
if (loc.fdescr->is_main) {
return;
}
}
locs_[0].show_warning(PSTRING() << "#pragma " << name_
<< " is enabled in included libraries, it may change the behavior of your code. "
<< "Add this #pragma to the main source file to suppress this warning.");
} }
void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) { void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) {
@ -109,14 +98,19 @@ void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) {
td::Result<std::string> fs_read_callback(ReadCallback::Kind kind, const char* query) { td::Result<std::string> fs_read_callback(ReadCallback::Kind kind, const char* query) {
switch (kind) { switch (kind) {
case ReadCallback::Kind::ReadFile: { case ReadCallback::Kind::ReadFile: {
std::ifstream ifs{query}; struct stat f_stat;
if (ifs.fail()) { int res = stat(query, &f_stat);
auto msg = std::string{"cannot open source file `"} + query + "`"; if (res != 0) {
return td::Status::Error(msg); return td::Status::Error(std::string{"cannot open source file: "} + query);
} }
std::stringstream ss;
ss << ifs.rdbuf(); size_t file_size = static_cast<size_t>(f_stat.st_size);
return ss.str(); std::string str;
str.resize(file_size);
FILE* f = fopen(query, "r");
fread(str.data(), file_size, 1, f);
fclose(f);
return std::move(str);
} }
case ReadCallback::Kind::Realpath: { case ReadCallback::Kind::Realpath: {
return td::realpath(td::CSlice(query)); return td::realpath(td::CSlice(query));
@ -241,7 +235,7 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er
} else if (func_val->is_inline_ref()) { } else if (func_val->is_inline_ref()) {
modifier = "REF"; modifier = "REF";
} }
outs << std::string(indent * 2, ' ') << name << " PROC" << modifier << ":<{\n"; outs << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n";
int mode = 0; int mode = 0;
if (stack_layout_comments) { if (stack_layout_comments) {
mode |= Stack::_StkCmt | Stack::_CptStkCmt; mode |= Stack::_StkCmt | Stack::_CptStkCmt;
@ -255,8 +249,8 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er
if (func_val->is_inline() || func_val->is_inline_ref()) { if (func_val->is_inline() || func_val->is_inline_ref()) {
mode |= Stack::_InlineAny; mode |= Stack::_InlineAny;
} }
code.generate_code(outs, mode, indent + 1); code.generate_code(outs, mode, 2);
outs << std::string(indent * 2, ' ') << "}>\n"; outs << std::string(2, ' ') << "}>\n";
if (verbosity >= 2) { if (verbosity >= 2) {
errs << "--------------\n"; errs << "--------------\n";
} }
@ -264,13 +258,9 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er
} }
int generate_output(std::ostream &outs, std::ostream &errs) { int generate_output(std::ostream &outs, std::ostream &errs) {
if (asm_preamble) { outs << "\"Asm.fif\" include\n";
outs << "\"Asm.fif\" include\n";
}
outs << "// automatically generated from " << generated_from << std::endl; outs << "// automatically generated from " << generated_from << std::endl;
if (program_envelope) { outs << "PROGRAM{\n";
outs << "PROGRAM{\n";
}
mark_used_symbols(); mark_used_symbols();
for (SymDef* func_sym : glob_func) { for (SymDef* func_sym : glob_func) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value); SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
@ -283,7 +273,7 @@ int generate_output(std::ostream &outs, std::ostream &errs) {
} }
std::string name = symbols.get_name(func_sym->sym_idx); std::string name = symbols.get_name(func_sym->sym_idx);
outs << std::string(indent * 2, ' '); outs << std::string(2, ' ');
if (func_val->method_id.is_null()) { if (func_val->method_id.is_null()) {
outs << "DECLPROC " << name << "\n"; outs << "DECLPROC " << name << "\n";
} else { } else {
@ -300,7 +290,7 @@ int generate_output(std::ostream &outs, std::ostream &errs) {
continue; continue;
} }
std::string name = symbols.get_name(gvar_sym->sym_idx); std::string name = symbols.get_name(gvar_sym->sym_idx);
outs << std::string(indent * 2, ' ') << "DECLGLOBVAR " << name << "\n"; outs << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n";
} }
int errors = 0; int errors = 0;
for (SymDef* func_sym : glob_func) { for (SymDef* func_sym : glob_func) {
@ -310,76 +300,46 @@ int generate_output(std::ostream &outs, std::ostream &errs) {
} }
try { try {
generate_output_func(func_sym, outs, errs); generate_output_func(func_sym, outs, errs);
} catch (Error& err) { } catch (ParseError& err) {
errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n" errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n"
<< err << std::endl; << err << std::endl;
++errors; ++errors;
} }
} }
if (program_envelope) { outs << "}END>c\n";
outs << "}END>c\n";
}
if (!boc_output_filename.empty()) { if (!boc_output_filename.empty()) {
outs << "2 boc+>B \"" << boc_output_filename << "\" B>file\n"; outs << "boc>B \"" << boc_output_filename << "\" B>file\n";
} }
return errors; return errors;
} }
void output_inclusion_stack(std::ostream &errs) {
while (!inclusion_locations.empty()) {
SrcLocation loc = inclusion_locations.top();
inclusion_locations.pop();
if (loc.fdescr) {
errs << "note: included from ";
loc.show(errs);
errs << std::endl;
}
}
}
int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs) {
int tolk_proceed(const std::vector<std::string> &sources, std::ostream &outs, std::ostream &errs) {
if (program_envelope && !indent) {
indent = 1;
}
define_keywords();
define_builtins(); define_builtins();
lexer_init();
pragma_allow_post_modification.always_on_and_deprecated("0.5.0"); pragma_allow_post_modification.always_on_and_deprecated("0.5.0");
pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0"); pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0");
int ok = 0, proc = 0;
try { try {
for (auto src : sources) { bool ok = parse_source_file(entrypoint_file_name.c_str(), {});
ok += parse_source_file(src.c_str(), {}, true); if (!ok) {
proc++;
}
if (interactive) {
generated_from += "stdin ";
ok += parse_source_stdin();
proc++;
}
if (ok < proc) {
throw Fatal{"output code generation omitted because of errors"}; throw Fatal{"output code generation omitted because of errors"};
} }
if (!proc) {
throw Fatal{"no source files, no output"}; // todo #ifdef TOLK_PROFILING + comment
} // lexer_measure_performance(all_src_files.get_all_files());
pragma_remove_unused_functions.check_enable_in_libs();
return generate_output(outs, errs); return generate_output(outs, errs);
} catch (Fatal& fatal) { } catch (Fatal& fatal) {
errs << "fatal: " << fatal << std::endl; errs << "fatal: " << fatal << std::endl;
output_inclusion_stack(errs);
return 2; return 2;
} catch (Error& error) { } catch (ParseError& error) {
errs << error << std::endl; errs << error << std::endl;
output_inclusion_stack(errs);
return 2; return 2;
} catch (UnifyError& unif_err) { } catch (UnifyError& unif_err) {
errs << "fatal: "; errs << "fatal: ";
unif_err.print_message(errs); unif_err.print_message(errs);
errs << std::endl; errs << std::endl;
output_inclusion_stack(errs);
return 2; return 2;
} }

View file

@ -15,6 +15,7 @@
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>. along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/ */
#pragma once #pragma once
#include <utility>
#include <vector> #include <vector>
#include <string> #include <string>
#include <set> #include <set>
@ -26,7 +27,7 @@
#include "common/refcnt.hpp" #include "common/refcnt.hpp"
#include "common/bigint.hpp" #include "common/bigint.hpp"
#include "common/refint.h" #include "common/refint.h"
#include "srcread.h" #include "src-file.h"
#include "lexer.h" #include "lexer.h"
#include "symtable.h" #include "symtable.h"
#include "td/utils/Status.h" #include "td/utils/Status.h"
@ -45,104 +46,6 @@ constexpr int optimize_depth = 20;
const std::string tolk_version{"0.4.5"}; const std::string tolk_version{"0.4.5"};
enum Keyword {
_Eof = -1,
_Ident = 0,
_Number,
_Special,
_String,
_Return = 0x80,
_Var,
_Repeat,
_Do,
_While,
_Until,
_Try,
_Catch,
_If,
_Ifnot,
_Then,
_Else,
_Elseif,
_Elseifnot,
_Eq,
_Neq,
_Leq,
_Geq,
_Spaceship,
_Lshift,
_Rshift,
_RshiftR,
_RshiftC,
_DivR,
_DivC,
_ModR,
_ModC,
_DivMod,
_PlusLet,
_MinusLet,
_TimesLet,
_DivLet,
_DivRLet,
_DivCLet,
_ModLet,
_ModRLet,
_ModCLet,
_LshiftLet,
_RshiftLet,
_RshiftRLet,
_RshiftCLet,
_AndLet,
_OrLet,
_XorLet,
_Int,
_Cell,
_Slice,
_Builder,
_Cont,
_Tuple,
_Type,
_Mapsto,
_Forall,
_Asm,
_Impure,
_Pure,
_Global,
_Extern,
_Inline,
_InlineRef,
_Builtin,
_AutoApply,
_MethodId,
_Get,
_Operator,
_Infix,
_Infixl,
_Infixr,
_Const,
_PragmaHashtag,
_IncludeHashtag
};
void define_keywords();
class IdSc {
int cls;
public:
enum { undef = 0, dotid = 1, tildeid = 2 };
IdSc(int _cls = undef) : cls(_cls) {
}
operator int() {
return cls;
}
};
// symbol subclass:
// 1 = begins with . (a const method)
// 2 = begins with ~ (a non-const method)
// 0 = else
/* /*
* *
* TYPE EXPRESSIONS * TYPE EXPRESSIONS
@ -152,13 +55,13 @@ class IdSc {
struct TypeExpr { struct TypeExpr {
enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll } constr; enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll } constr;
enum AtomicType { enum AtomicType {
_Int = Keyword::_Int, _Int = tok_int,
_Cell = Keyword::_Cell, _Cell = tok_cell,
_Slice = Keyword::_Slice, _Slice = tok_slice,
_Builder = Keyword::_Builder, _Builder = tok_builder,
_Cont = Keyword::_Cont, _Cont = tok_cont,
_Tuple = Keyword::_Tuple, _Tuple = tok_tuple,
_Type = Keyword::_Type _Type = tok_type
}; };
int value; int value;
int minw, maxw; int minw, maxw;
@ -279,14 +182,18 @@ struct TypeExpr {
std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr);
struct UnifyError { struct UnifyError : std::exception {
TypeExpr* te1; TypeExpr* te1;
TypeExpr* te2; TypeExpr* te2;
std::string msg; std::string msg;
UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(_msg) {
UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(std::move(_msg)) {
} }
void print_message(std::ostream& os) const; void print_message(std::ostream& os) const;
std::string message() const; const char* what() const noexcept override {
return msg.c_str();
}
}; };
std::ostream& operator<<(std::ostream& os, const UnifyError& ue); std::ostream& operator<<(std::ostream& os, const UnifyError& ue);
@ -310,18 +217,13 @@ struct TmpVar {
int cls; int cls;
sym_idx_t name; sym_idx_t name;
int coord; int coord;
std::unique_ptr<SrcLocation> where; SrcLocation where;
std::vector<std::function<void(const SrcLocation &)>> on_modification; std::vector<std::function<void(SrcLocation)>> on_modification;
bool undefined = false;
TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type = 0, SymDef* sym = 0, const SrcLocation* loc = 0); TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc);
void show(std::ostream& os, int omit_idx = 0) const; void show(std::ostream& os, int omit_idx = 0) const;
void dump(std::ostream& os) const; void dump(std::ostream& os) const;
void set_location(const SrcLocation& loc); void set_location(SrcLocation loc);
std::string to_string() const {
std::ostringstream s;
show(s, 2);
return s.str();
}
}; };
struct VarDescr { struct VarDescr {
@ -566,25 +468,25 @@ struct Op {
std::unique_ptr<Op> block0, block1; std::unique_ptr<Op> block0, block1;
td::RefInt256 int_const; td::RefInt256 int_const;
std::string str_const; std::string str_const;
Op(const SrcLocation& _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) {
} }
Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left) Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left)
: cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) { : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) {
} }
Op(const SrcLocation& _where, OpKind _cl, std::vector<var_idx_t>&& _left) Op(SrcLocation _where, OpKind _cl, std::vector<var_idx_t>&& _left)
: cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) { : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) {
} }
Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left, td::RefInt256 _const) Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left, td::RefInt256 _const)
: cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) { : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) {
} }
Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left, std::string _const) Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left, std::string _const)
: cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) { : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) {
} }
Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left, const std::vector<var_idx_t>& _right, Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left, const std::vector<var_idx_t>& _right,
SymDef* _fun = nullptr) SymDef* _fun = nullptr)
: cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) { : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) {
} }
Op(const SrcLocation& _where, OpKind _cl, std::vector<var_idx_t>&& _left, std::vector<var_idx_t>&& _right, Op(SrcLocation _where, OpKind _cl, std::vector<var_idx_t>&& _left, std::vector<var_idx_t>&& _right,
SymDef* _fun = nullptr) SymDef* _fun = nullptr)
: cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) {
} }
@ -700,8 +602,8 @@ struct CodeBlob {
return res; return res;
} }
bool import_params(FormalArgList arg_list); bool import_params(FormalArgList arg_list);
var_idx_t create_var(int cls, TypeExpr* var_type = 0, SymDef* sym = 0, const SrcLocation* loc = 0); var_idx_t create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation loc);
var_idx_t create_tmp_var(TypeExpr* var_type = 0, const SrcLocation* loc = 0) { var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) {
return create_var(TmpVar::_Tmp, var_type, nullptr, loc); return create_var(TmpVar::_Tmp, var_type, nullptr, loc);
} }
int split_vars(bool strict = false); int split_vars(bool strict = false);
@ -712,14 +614,14 @@ struct CodeBlob {
cur_ops_stack.push(cur_ops); cur_ops_stack.push(cur_ops);
cur_ops = &new_cur_ops; cur_ops = &new_cur_ops;
} }
void close_blk(const SrcLocation& location) { void close_blk(SrcLocation location) {
*cur_ops = std::make_unique<Op>(location, Op::_Nop); *cur_ops = std::make_unique<Op>(location, Op::_Nop);
} }
void pop_cur() { void pop_cur() {
cur_ops = cur_ops_stack.top(); cur_ops = cur_ops_stack.top();
cur_ops_stack.pop(); cur_ops_stack.pop();
} }
void close_pop_cur(const SrcLocation& location) { void close_pop_cur(SrcLocation location) {
close_blk(location); close_blk(location);
pop_cur(); pop_cur();
} }
@ -730,7 +632,7 @@ struct CodeBlob {
void generate_code(AsmOpList& out_list, int mode = 0); void generate_code(AsmOpList& out_list, int mode = 0);
void generate_code(std::ostream& os, int mode = 0, int indent = 0); void generate_code(std::ostream& os, int mode = 0, int indent = 0);
void on_var_modification(var_idx_t idx, const SrcLocation& here) const { void on_var_modification(var_idx_t idx, SrcLocation here) const {
for (auto& f : vars.at(idx).on_modification) { for (auto& f : vars.at(idx).on_modification) {
f(here); f(here);
} }
@ -746,8 +648,8 @@ struct CodeBlob {
struct SymVal : SymValBase { struct SymVal : SymValBase {
TypeExpr* sym_type; TypeExpr* sym_type;
bool auto_apply{false}; bool auto_apply{false};
SymVal(int _type, int _idx, TypeExpr* _stype = nullptr) SymVal(SymValKind kind, int idx, TypeExpr* sym_type = nullptr)
: SymValBase(_type, _idx), sym_type(_stype) { : SymValBase(kind, idx), sym_type(sym_type) {
} }
~SymVal() override = default; ~SymVal() override = default;
TypeExpr* get_type() const { TypeExpr* get_type() const {
@ -774,9 +676,9 @@ struct SymValFunc : SymVal {
#endif #endif
~SymValFunc() override = default; ~SymValFunc() override = default;
SymValFunc(int val, TypeExpr* _ft, bool marked_as_pure) SymValFunc(int val, TypeExpr* _ft, bool marked_as_pure)
: SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {} : SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {}
SymValFunc(int val, TypeExpr* _ft, std::initializer_list<int> _arg_order, std::initializer_list<int> _ret_order, bool marked_as_pure) SymValFunc(int val, TypeExpr* _ft, std::initializer_list<int> _arg_order, std::initializer_list<int> _ret_order, bool marked_as_pure)
: SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) { : SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) {
} }
const std::vector<int>* get_arg_order() const { const std::vector<int>* get_arg_order() const {
@ -818,7 +720,7 @@ struct SymValCodeFunc : SymValFunc {
struct SymValType : SymValBase { struct SymValType : SymValBase {
TypeExpr* sym_type; TypeExpr* sym_type;
SymValType(int _type, int _idx, TypeExpr* _stype = nullptr) : SymValBase(_type, _idx), sym_type(_stype) { SymValType(SymValKind kind, int idx, TypeExpr* _stype = nullptr) : SymValBase(kind, idx), sym_type(_stype) {
} }
~SymValType() override = default; ~SymValType() override = default;
TypeExpr* get_type() const { TypeExpr* get_type() const {
@ -834,7 +736,7 @@ struct SymValGlobVar : SymValBase {
std::string name; // seeing variable name in debugger makes it much easier to delve into Tolk sources std::string name; // seeing variable name in debugger makes it much easier to delve into Tolk sources
#endif #endif
SymValGlobVar(int val, TypeExpr* gvtype, int oidx = 0) SymValGlobVar(int val, TypeExpr* gvtype, int oidx = 0)
: SymValBase(_GlobVar, val), sym_type(gvtype), out_idx(oidx) { : SymValBase(SymValKind::_GlobVar, val), sym_type(gvtype), out_idx(oidx) {
} }
~SymValGlobVar() override = default; ~SymValGlobVar() override = default;
TypeExpr* get_type() const { TypeExpr* get_type() const {
@ -843,16 +745,16 @@ struct SymValGlobVar : SymValBase {
}; };
struct SymValConst : SymValBase { struct SymValConst : SymValBase {
enum ConstKind { IntConst, SliceConst };
td::RefInt256 intval; td::RefInt256 intval;
std::string strval; std::string strval;
Keyword type; ConstKind kind;
SymValConst(int idx, td::RefInt256 value) SymValConst(int idx, td::RefInt256 value)
: SymValBase(_Const, idx), intval(value) { : SymValBase(SymValKind::_Const, idx), intval(value), kind(IntConst) {
type = _Int;
} }
SymValConst(int idx, std::string value) SymValConst(int idx, std::string value)
: SymValBase(_Const, idx), strval(value) { : SymValBase(SymValKind::_Const, idx), strval(value), kind(SliceConst) {
type = _Slice;
} }
~SymValConst() override = default; ~SymValConst() override = default;
td::RefInt256 get_int_value() const { td::RefInt256 get_int_value() const {
@ -861,8 +763,8 @@ struct SymValConst : SymValBase {
std::string get_str_value() const { std::string get_str_value() const {
return strval; return strval;
} }
Keyword get_type() const { ConstKind get_kind() const {
return type; return kind;
} }
}; };
@ -882,35 +784,21 @@ public:
ReadCallback(ReadCallback const&) = delete; ReadCallback(ReadCallback const&) = delete;
ReadCallback& operator=(ReadCallback const&) = delete; ReadCallback& operator=(ReadCallback const&) = delete;
enum class Kind enum class Kind {
{ Realpath,
ReadFile, ReadFile,
Realpath
}; };
static std::string kindString(Kind _kind)
{
switch (_kind)
{
case Kind::ReadFile:
return "source";
case Kind::Realpath:
return "realpath";
default:
throw ""; // todo ?
}
}
/// File reading or generic query callback. /// File reading or generic query callback.
using Callback = std::function<td::Result<std::string>(ReadCallback::Kind, const char*)>; using Callback = std::function<td::Result<std::string>(Kind, const char*)>;
}; };
// defined in parse-tolk.cpp // defined in parse-tolk.cpp
bool parse_source(std::istream* is, const FileDescr* fdescr); void parse_source(const SrcFile* file);
bool parse_source_file(const char* filename, Lexem lex = {}, bool is_main = false); bool parse_source_file(const char* filename, SrcLocation loc_included_from);
bool parse_source_stdin();
extern std::stack<SrcLocation> inclusion_locations; extern std::stack<SrcLocation> inclusion_locations;
extern AllRegisteredSrcFiles all_src_files;
/* /*
* *
@ -949,7 +837,7 @@ struct Expr {
std::vector<Expr*> args; std::vector<Expr*> args;
explicit Expr(ExprCls c = _None) : cls(c) { explicit Expr(ExprCls c = _None) : cls(c) {
} }
Expr(ExprCls c, const SrcLocation& loc) : cls(c), here(loc) { Expr(ExprCls c, SrcLocation loc) : cls(c), here(loc) {
} }
Expr(ExprCls c, std::vector<Expr*> _args) : cls(c), args(std::move(_args)) { Expr(ExprCls c, std::vector<Expr*> _args) : cls(c), args(std::move(_args)) {
} }
@ -990,14 +878,13 @@ struct Expr {
bool is_mktuple() const { bool is_mktuple() const {
return cls == _MkTuple; return cls == _MkTuple;
} }
void chk_rvalue(const Lexem& lem) const; void chk_rvalue(const Lexer& lex) const; // todo here and below: strange to pass Lexer
void chk_lvalue(const Lexem& lem) const; void chk_lvalue(const Lexer& lex) const;
void chk_type(const Lexem& lem) const; bool deduce_type(const Lexer& lex);
bool deduce_type(const Lexem& lem); void set_location(SrcLocation loc) {
void set_location(const SrcLocation& loc) {
here = loc; here = loc;
} }
const SrcLocation& get_location() const { SrcLocation get_location() const {
return here; return here;
} }
int define_new_vars(CodeBlob& code); int define_new_vars(CodeBlob& code);
@ -1699,11 +1586,11 @@ struct Stack {
* *
*/ */
typedef std::function<AsmOp(std::vector<VarDescr>&, std::vector<VarDescr>&, const SrcLocation)> simple_compile_func_t; typedef std::function<AsmOp(std::vector<VarDescr>&, std::vector<VarDescr>&, SrcLocation)> simple_compile_func_t;
typedef std::function<bool(AsmOpList&, std::vector<VarDescr>&, std::vector<VarDescr>&)> compile_func_t; typedef std::function<bool(AsmOpList&, std::vector<VarDescr>&, std::vector<VarDescr>&)> compile_func_t;
inline simple_compile_func_t make_simple_compile(AsmOp op) { inline simple_compile_func_t make_simple_compile(AsmOp op) {
return [op](std::vector<VarDescr>& out, std::vector<VarDescr>& in, const SrcLocation&) -> AsmOp { return op; }; return [op](std::vector<VarDescr>& out, std::vector<VarDescr>& in, SrcLocation) -> AsmOp { return op; };
} }
inline compile_func_t make_ext_compile(std::vector<AsmOp>&& ops) { inline compile_func_t make_ext_compile(std::vector<AsmOp>&& ops) {
@ -1739,7 +1626,7 @@ struct SymValAsmFunc : SymValFunc {
std::initializer_list<int> ret_order = {}, bool marked_as_pure = false) std::initializer_list<int> ret_order = {}, bool marked_as_pure = false)
: SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), ext_compile(std::move(_compile)) { : SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), ext_compile(std::move(_compile)) {
} }
bool compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in, const SrcLocation& where) const; bool compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in, SrcLocation where) const;
}; };
// defined in builtins.cpp // defined in builtins.cpp
@ -1753,8 +1640,8 @@ AsmOp push_const(td::RefInt256 x);
void define_builtins(); void define_builtins();
extern int verbosity, indent, opt_level; extern int verbosity, opt_level;
extern bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble, interactive; extern bool stack_layout_comments;
extern std::string generated_from, boc_output_filename; extern std::string generated_from, boc_output_filename;
extern ReadCallback::Callback read_callback; extern ReadCallback::Callback read_callback;
@ -1764,6 +1651,7 @@ class GlobalPragma {
public: public:
explicit GlobalPragma(std::string name) : name_(std::move(name)) { explicit GlobalPragma(std::string name) : name_(std::move(name)) {
} }
const std::string& name() const { const std::string& name() const {
return name_; return name_;
} }
@ -1771,14 +1659,12 @@ class GlobalPragma {
return enabled_; return enabled_;
} }
void enable(SrcLocation loc); void enable(SrcLocation loc);
void check_enable_in_libs();
void always_on_and_deprecated(const char *deprecated_from_v); void always_on_and_deprecated(const char *deprecated_from_v);
private: private:
std::string name_; std::string name_;
bool enabled_ = false; bool enabled_ = false;
const char *deprecated_from_v_ = nullptr; const char *deprecated_from_v_ = nullptr;
std::vector<SrcLocation> locs_;
}; };
extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, pragma_remove_unused_functions; extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, pragma_remove_unused_functions;
@ -1788,7 +1674,7 @@ extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, prag
* *
*/ */
int tolk_proceed(const std::vector<std::string> &sources, std::ostream &outs, std::ostream &errs); int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs);
} // namespace tolk } // namespace tolk

View file

@ -354,12 +354,6 @@ std::ostream& operator<<(std::ostream& os, const UnifyError& ue) {
return os; return os;
} }
std::string UnifyError::message() const {
std::ostringstream os;
print_message(os);
return os.str();
}
void check_width_compat(TypeExpr* te1, TypeExpr* te2) { void check_width_compat(TypeExpr* te1, TypeExpr* te2) {
if (te1->minw > te2->maxw || te2->minw > te1->maxw) { if (te1->minw > te2->maxw || te2->minw > te1->maxw) {
std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate}; std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate};