1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-02-12 11:12:16 +00:00

[Tolk] Rewrite lexer, spaces are not mandatory anymore

A new lexer is noticeably faster and memory efficient
(although splitting a file to tokens is negligible in a whole pipeline).

But the purpose of rewriting lexer was not just to speed up,
but to allow writing code without spaces:
`2+2` is now 4, not a valid identifier as earlier.

The variety of symbols allowed in identifier has greatly reduced
and is now similar to other languages.

SrcLocation became 8 bytes on stack everywhere.

Command-line flags were also reworked:
- the input for Tolk compiler is only a single file now, it's parsed, and parsing continues while new #include are resolved
- flags like -A -P and so on are no more needed, actually
This commit is contained in:
tolk-vm 2024-10-31 10:59:23 +04:00
parent 0bcc0b3c12
commit f0e6470d0b
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
26 changed files with 2042 additions and 2129 deletions

View file

@ -211,20 +211,39 @@ td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::s
fift_dir);
}
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir, bool is_raw) {
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code) {
std::stringstream ss;
std::string sb;
sb.reserve(asm_code.size() + 100);
sb.append("\"Asm.fif\" include\n ");
sb.append(is_raw ? "<{" : "");
sb.append("\"Asm.fif\" include\n <{\n");
sb.append(asm_code.data(), asm_code.size());
sb.append(is_raw ? "}>c" : "");
sb.append(" boc>B \"res\" B>file");
sb.append("\n}>c boc>B \"res\" B>file");
TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false, fift_dir));
TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false));
TRY_RESULT(res, run_fift(std::move(source_lookup), &ss));
TRY_RESULT(boc, res.read_file("res"));
return vm::std_boc_deserialize(std::move(boc.data));
}
td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir) {
std::string main_fif;
main_fif.reserve(program_code.size() + 100);
main_fif.append(program_code.data(), program_code.size());
main_fif.append(R"( dup hashB B>X $>B "hex" B>file)"); // write codeHashHex to a file
main_fif.append(R"( boc>B B>base64 $>B "boc" B>file)"); // write codeBoc64 to a file
std::stringstream fift_output_stream;
TRY_RESULT(source_lookup, create_source_lookup(std::move(main_fif), true, true, false, false, false, false, false, fift_dir));
TRY_RESULT(res, run_fift(std::move(source_lookup), &fift_output_stream));
TRY_RESULT(boc, res.read_file("boc"));
TRY_RESULT(hex, res.read_file("hex"));
return CompiledProgramOutput{
std::move(program_code),
std::move(boc.data),
std::move(hex.data),
};
}
} // namespace fift

View file

@ -26,11 +26,21 @@ struct FiftOutput {
SourceLookup source_lookup;
std::string output;
};
// given a valid Fift code PROGRAM{ ... }END>c, compile_asm_program() returns this output
// now it's used primarily for wasm output (see tolk-js, for example)
struct CompiledProgramOutput {
std::string fiftCode;
std::string codeBoc64;
std::string codeHashHex;
};
td::Result<fift::SourceLookup> create_mem_source_lookup(std::string main, std::string fift_dir = "",
bool need_preamble = true, bool need_asm = true,
bool need_ton_util = true, bool need_lisp = true,
bool need_w3_code = true);
td::Result<FiftOutput> mem_run_fift(std::string source, std::vector<std::string> args = {}, std::string fift_dir = "");
td::Result<FiftOutput> mem_run_fift(SourceLookup source_lookup, std::vector<std::string> args);
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code, std::string fift_dir = "", bool is_raw = true);
td::Result<td::Ref<vm::Cell>> compile_asm(td::Slice asm_code);
td::Result<CompiledProgramOutput> compile_asm_program(std::string&& program_code, const std::string& fift_dir);
} // namespace fift

View file

@ -37,10 +37,10 @@
td::Result<std::string> compile_internal(char *config_json) {
TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json)))
auto &obj = input_json.get_object();
td::JsonObject& config = input_json.get_object();
TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false));
TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false));
TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optLevel", false));
TRY_RESULT(sources_obj, td::get_json_object_field(config, "sources", td::JsonValue::Type::Array, false));
auto &sources_arr = sources_obj.get_array();
@ -52,29 +52,25 @@ td::Result<std::string> compile_internal(char *config_json) {
funC::opt_level = std::max(0, opt_level);
funC::program_envelope = true;
funC::asm_preamble = true;
funC::verbosity = 0;
funC::indent = 1;
std::ostringstream outs, errs;
auto compile_res = funC::func_proceed(sources, outs, errs);
if (compile_res != 0) {
return td::Status::Error(std::string("Func compilation error: ") + errs.str());
int funC_res = funC::func_proceed(sources, outs, errs);
if (funC_res != 0) {
return td::Status::Error("FunC compilation error: " + errs.str());
}
TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false));
TRY_RESULT(boc, vm::std_boc_serialize(code_cell));
TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/"));
td::JsonBuilder result_json;
auto result_obj = result_json.enter_object();
result_obj("status", "ok");
result_obj("codeBoc", td::base64_encode(boc));
result_obj("fiftCode", outs.str());
result_obj("codeHashHex", code_cell->get_hash().to_hex());
result_obj.leave();
outs.clear();
errs.clear();
auto obj = result_json.enter_object();
obj("status", "ok");
obj("fiftCode", std::move(fift_res.fiftCode));
obj("codeBoc", std::move(fift_res.codeBoc64));
obj("codeHashHex", std::move(fift_res.codeHashHex));
obj.leave();
return result_json.string_builder().as_cslice().str();
}

View file

@ -572,9 +572,9 @@ int atanh_f261(int x, int n) inline_ref {
s -= 1;
}
x += t;
int 2x = 2 * x;
int y = lshift256divr(2x, (x >> 1) - t);
;; y = 2x - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions
int `2x` = 2 * x;
int y = lshift256divr(`2x`, (x >> 1) - t);
;; y = `2x` - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions
return (atanh_f258(y, 36), s);
}

View file

@ -1,10 +1,9 @@
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
set(TOLK_SOURCE
srcread.cpp
src-file.cpp
lexer.cpp
symtable.cpp
keywords.cpp
unify-types.cpp
parse-tolk.cpp
abscode.cpp

View file

@ -24,29 +24,19 @@ namespace tolk {
*
*/
TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, const SrcLocation* loc)
: v_type(_type), idx(_idx), cls(_cls), coord(0) {
TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc)
: v_type(_type), idx(_idx), cls(_cls), coord(0), where(loc) {
if (sym) {
name = sym->sym_idx;
sym->value->idx = _idx;
}
if (loc) {
where = std::make_unique<SrcLocation>(*loc);
}
if (!_type) {
v_type = TypeExpr::new_hole();
}
if (cls == _Named) {
undefined = true;
}
}
void TmpVar::set_location(const SrcLocation& loc) {
if (where) {
*where = loc;
} else {
where = std::make_unique<SrcLocation>(loc);
}
void TmpVar::set_location(SrcLocation loc) {
where = loc;
}
void TmpVar::dump(std::ostream& os) const {
@ -469,10 +459,10 @@ void CodeBlob::print(std::ostream& os, int flags) const {
if ((flags & 8) != 0) {
for (const auto& var : vars) {
var.dump(os);
if (var.where && (flags & 1) != 0) {
var.where->show(os);
if (var.where.is_defined() && (flags & 1) != 0) {
var.where.show(os);
os << " defined here:\n";
var.where->show_context(os);
var.where.show_context(os);
}
}
}
@ -483,7 +473,7 @@ void CodeBlob::print(std::ostream& os, int flags) const {
os << "-------- END ---------\n\n";
}
var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, const SrcLocation* location) {
var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation location) {
vars.emplace_back(var_cnt, cls, var_type, sym, location);
if (sym) {
sym->value->idx = var_cnt;
@ -501,7 +491,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) {
SymDef* arg_sym;
SrcLocation arg_loc;
std::tie(arg_type, arg_sym, arg_loc) = par;
list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, &arg_loc));
list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, arg_loc));
}
emplace_back(loc, Op::_Import, list);
in_var_cnt = var_cnt;

View file

@ -36,7 +36,7 @@ int CodeBlob::split_vars(bool strict) {
for (int j = 0; j < var_cnt; j++) {
TmpVar& var = vars[j];
if (strict && var.v_type->minw != var.v_type->maxw) {
throw ParseError{var.where.get(), "variable does not have fixed width, cannot manipulate it"};
throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"};
}
std::vector<TypeExpr*> comp_types;
int k = var.v_type->extract_components(comp_types);
@ -45,7 +45,7 @@ int CodeBlob::split_vars(bool strict) {
if (k != 1) {
var.coord = ~((n << 8) + k);
for (int i = 0; i < k; i++) {
auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where.get());
auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where);
tolk_assert(v == n + i);
tolk_assert(vars[v].idx == v);
vars[v].name = vars[j].name;
@ -54,7 +54,7 @@ int CodeBlob::split_vars(bool strict) {
n += k;
++changes;
} else if (strict && var.v_type->minw != 1) {
throw ParseError{var.where.get(),
throw ParseError{var.where,
"cannot work with variable or variable component of width greater than one"};
}
}

View file

@ -95,7 +95,7 @@ SymDef* define_builtin_const(std::string name, TypeExpr* const_type, Args&&... a
}
bool SymValAsmFunc::compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in,
const SrcLocation& where) const {
SrcLocation where) const {
if (simple_compile) {
return dest.append(simple_compile(out, in, where));
} else if (ext_compile) {
@ -186,7 +186,7 @@ int emulate_mul(int a, int b) {
return r;
}
int emulate_and(int a, int b) {
int emulate_bitwise_and(int a, int b) {
int both = a & b, any = a | b;
int r = VarDescr::_Int;
if (any & VarDescr::_Nan) {
@ -204,7 +204,7 @@ int emulate_and(int a, int b) {
return r;
}
int emulate_or(int a, int b) {
int emulate_bitwise_or(int a, int b) {
if (b & VarDescr::_Zero) {
return a;
} else if (a & VarDescr::_Zero) {
@ -222,7 +222,7 @@ int emulate_or(int a, int b) {
return r;
}
int emulate_xor(int a, int b) {
int emulate_bitwise_xor(int a, int b) {
if (b & VarDescr::_Zero) {
return a;
} else if (a & VarDescr::_Zero) {
@ -241,7 +241,7 @@ int emulate_xor(int a, int b) {
return r;
}
int emulate_not(int a) {
int emulate_bitwise_not(int a) {
if ((a & VarDescr::ConstZero) == VarDescr::ConstZero) {
return VarDescr::ConstTrue;
}
@ -436,7 +436,7 @@ AsmOp push_const(td::RefInt256 x) {
return AsmOp::IntConst(std::move(x));
}
AsmOp compile_add(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
AsmOp compile_add(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) {
@ -478,7 +478,7 @@ AsmOp compile_add(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
return exec_op("ADD", 2);
}
AsmOp compile_sub(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
AsmOp compile_sub(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) {
@ -511,7 +511,7 @@ AsmOp compile_sub(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
return exec_op("SUB", 2);
}
AsmOp compile_negate(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
AsmOp compile_unary_minus(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 1);
VarDescr &r = res[0], &x = args[0];
if (x.is_int_const()) {
@ -526,7 +526,19 @@ AsmOp compile_negate(std::vector<VarDescr>& res, std::vector<VarDescr>& args, co
return exec_op("NEGATE", 1);
}
AsmOp compile_and(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
AsmOp compile_unary_plus(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 1);
VarDescr &r = res[0], &x = args[0];
if (x.is_int_const()) {
r.set_const(x.int_const);
x.unused();
return push_const(r.int_const);
}
r.val = x.val;
return AsmOp::Nop();
}
AsmOp compile_bitwise_and(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) {
@ -535,11 +547,11 @@ AsmOp compile_and(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
y.unused();
return push_const(r.int_const);
}
r.val = emulate_and(x.val, y.val);
r.val = emulate_bitwise_and(x.val, y.val);
return exec_op("AND", 2);
}
AsmOp compile_or(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
AsmOp compile_bitwise_or(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) {
@ -548,11 +560,11 @@ AsmOp compile_or(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
y.unused();
return push_const(r.int_const);
}
r.val = emulate_or(x.val, y.val);
r.val = emulate_bitwise_or(x.val, y.val);
return exec_op("OR", 2);
}
AsmOp compile_xor(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
AsmOp compile_bitwise_xor(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1];
if (x.is_int_const() && y.is_int_const()) {
@ -561,11 +573,11 @@ AsmOp compile_xor(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
y.unused();
return push_const(r.int_const);
}
r.val = emulate_xor(x.val, y.val);
r.val = emulate_bitwise_xor(x.val, y.val);
return exec_op("XOR", 2);
}
AsmOp compile_not(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
AsmOp compile_bitwise_not(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 1);
VarDescr &r = res[0], &x = args[0];
if (x.is_int_const()) {
@ -573,11 +585,11 @@ AsmOp compile_not(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
x.unused();
return push_const(r.int_const);
}
r.val = emulate_not(x.val);
r.val = emulate_bitwise_not(x.val);
return exec_op("NOT", 1);
}
AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where) {
AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where) {
if (x.is_int_const() && y.is_int_const()) {
r.set_const(x.int_const * y.int_const);
if (!r.int_const->is_valid()) {
@ -645,12 +657,12 @@ AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocat
return exec_op("MUL", 2);
}
AsmOp compile_mul(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
AsmOp compile_mul(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2);
return compile_mul_internal(res[0], args[0], args[1], where);
}
AsmOp compile_lshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where) {
AsmOp compile_lshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where) {
tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1];
if (y.is_int_const()) {
@ -692,7 +704,7 @@ AsmOp compile_lshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, co
return exec_op("LSHIFT", 2);
}
AsmOp compile_rshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where,
AsmOp compile_rshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where,
int round_mode) {
tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1];
@ -722,7 +734,7 @@ AsmOp compile_rshift(std::vector<VarDescr>& res, std::vector<VarDescr>& args, co
return exec_op(rshift, 2);
}
AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where, int round_mode) {
AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where, int round_mode) {
if (x.is_int_const() && y.is_int_const()) {
r.set_const(div(x.int_const, y.int_const, round_mode));
if (!r.int_const->is_valid()) {
@ -762,12 +774,12 @@ AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocat
return exec_op(op, 2);
}
AsmOp compile_div(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where, int round_mode) {
AsmOp compile_div(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where, int round_mode) {
tolk_assert(res.size() == 1 && args.size() == 2);
return compile_div_internal(res[0], args[0], args[1], where, round_mode);
}
AsmOp compile_mod(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where,
AsmOp compile_mod(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where,
int round_mode) {
tolk_assert(res.size() == 1 && args.size() == 2);
VarDescr &r = res[0], &x = args[0], &y = args[1];
@ -808,7 +820,7 @@ AsmOp compile_mod(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const
return exec_op(op, 2);
}
AsmOp compile_muldiv(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation& where,
AsmOp compile_muldiv(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation where,
int round_mode) {
tolk_assert(res.size() == 1 && args.size() == 3);
VarDescr &r = res[0], &x = args[0], &y = args[1], &z = args[2];
@ -978,7 +990,7 @@ AsmOp compile_cmp_int(std::vector<VarDescr>& res, std::vector<VarDescr>& args, i
return exec_op(cmp_names[mode], 2);
}
AsmOp compile_throw(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) {
AsmOp compile_throw(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(res.empty() && args.size() == 1);
VarDescr& x = args[0];
if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) {
@ -1010,7 +1022,7 @@ AsmOp compile_cond_throw(std::vector<VarDescr>& res, std::vector<VarDescr>& args
}
}
AsmOp compile_throw_arg(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) {
AsmOp compile_throw_arg(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(res.empty() && args.size() == 2);
VarDescr &x = args[1];
if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) {
@ -1101,7 +1113,7 @@ AsmOp compile_fetch_slice(std::vector<VarDescr>& res, std::vector<VarDescr>& arg
}
// <type> <type>_at(tuple t, int index) asm "INDEXVAR";
AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) {
AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(args.size() == 2 && res.size() == 1);
auto& y = args[1];
if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) {
@ -1112,7 +1124,7 @@ AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args,
}
// int null?(X arg)
AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, const SrcLocation&) {
AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(args.size() == 1 && res.size() == 1);
auto &x = args[0], &r = res[0];
if (x.always_null() || x.always_not_null()) {
@ -1128,12 +1140,12 @@ AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, c
void define_builtins() {
using namespace std::placeholders;
auto Unit = TypeExpr::new_unit();
auto Int = TypeExpr::new_atomic(_Int);
auto Cell = TypeExpr::new_atomic(_Cell);
auto Slice = TypeExpr::new_atomic(_Slice);
auto Builder = TypeExpr::new_atomic(_Builder);
// auto Null = TypeExpr::new_atomic(_Null);
auto Tuple = TypeExpr::new_atomic(_Tuple);
auto Int = TypeExpr::new_atomic(TypeExpr::_Int);
auto Cell = TypeExpr::new_atomic(TypeExpr::_Cell);
auto Slice = TypeExpr::new_atomic(TypeExpr::_Slice);
auto Builder = TypeExpr::new_atomic(TypeExpr::_Builder);
// auto Null = TypeExpr::new_atomic(TypeExpr::_Null);
auto Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple);
auto Int2 = TypeExpr::new_tensor({Int, Int});
auto Int3 = TypeExpr::new_tensor({Int, Int, Int});
auto TupleInt = TypeExpr::new_tensor({Tuple, Int});
@ -1156,9 +1168,16 @@ void define_builtins() {
//auto arith_null_op = TypeExpr::new_map(TypeExpr::new_unit(), Int);
auto throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit));
auto cond_throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int, Int}), Unit));
// prevent unused vars warnings (there vars are created to acquire initial id of TypeExpr::value)
static_cast<void>(Z);
static_cast<void>(XY);
static_cast<void>(Cell);
define_builtin_func("_+_", arith_bin_op, compile_add);
define_builtin_func("_-_", arith_bin_op, compile_sub);
define_builtin_func("-_", arith_un_op, compile_negate);
define_builtin_func("-_", arith_un_op, compile_unary_minus);
define_builtin_func("+_", arith_un_op, compile_unary_plus);
define_builtin_func("_*_", arith_bin_op, compile_mul);
define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1));
define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0));
@ -1175,10 +1194,10 @@ void define_builtins() {
define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1));
define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0));
define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1));
define_builtin_func("_&_", arith_bin_op, compile_and);
define_builtin_func("_|_", arith_bin_op, compile_or);
define_builtin_func("_^_", arith_bin_op, compile_xor);
define_builtin_func("~_", arith_un_op, compile_not);
define_builtin_func("_&_", arith_bin_op, compile_bitwise_and);
define_builtin_func("_|_", arith_bin_op, compile_bitwise_or);
define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor);
define_builtin_func("~_", arith_un_op, compile_bitwise_not);
define_builtin_func("^_+=_", arith_bin_op, compile_add);
define_builtin_func("^_-=_", arith_bin_op, compile_sub);
define_builtin_func("^_*=_", arith_bin_op, compile_mul);
@ -1192,9 +1211,9 @@ void define_builtins() {
define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1));
define_builtin_func("^_~>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0));
define_builtin_func("^_^>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1));
define_builtin_func("^_&=_", arith_bin_op, compile_and);
define_builtin_func("^_|=_", arith_bin_op, compile_or);
define_builtin_func("^_^=_", arith_bin_op, compile_xor);
define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and);
define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or);
define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor);
define_builtin_func("muldiv", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1));
define_builtin_func("muldivr", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0));
define_builtin_func("muldivc", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1));

View file

@ -41,25 +41,19 @@ Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list<Expr*> _arglist)
}
}
void Expr::chk_rvalue(const Lexem& lem) const {
void Expr::chk_rvalue(const Lexer& lex) const {
if (!is_rvalue()) {
lem.error_at("rvalue expected before `", "`");
lex.error_at("rvalue expected before `", "`");
}
}
void Expr::chk_lvalue(const Lexem& lem) const {
void Expr::chk_lvalue(const Lexer& lex) const {
if (!is_lvalue()) {
lem.error_at("lvalue expected before `", "`");
lex.error_at("lvalue expected before `", "`");
}
}
void Expr::chk_type(const Lexem& lem) const {
if (!is_type()) {
lem.error_at("type expression expected before `", "`");
}
}
bool Expr::deduce_type(const Lexem& lem) {
bool Expr::deduce_type(const Lexer& lex) {
if (e_type) {
return true;
}
@ -83,7 +77,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os;
os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type "
<< fun_type->args[0] << ": " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
@ -98,7 +92,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os;
os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type
<< ": " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
@ -113,7 +107,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os;
os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type "
<< args[0]->e_type << ": " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = args[0]->e_type;
TypeExpr::remove_indirect(e_type);
@ -130,7 +124,7 @@ bool Expr::deduce_type(const Lexem& lem) {
os << "cannot implicitly assign an expression of type " << args[1]->e_type
<< " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val)
<< "` : " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = rhs_type->args[1];
TypeExpr::remove_indirect(e_type);
@ -139,13 +133,13 @@ bool Expr::deduce_type(const Lexem& lem) {
}
case _CondExpr: {
tolk_assert(args.size() == 3);
auto flag_type = TypeExpr::new_atomic(_Int);
auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int);
try {
unify(args[0]->e_type, flag_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue;
lem.error(os.str());
lex.error(os.str());
}
try {
unify(args[1]->e_type, args[2]->e_type);
@ -153,7 +147,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os;
os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and "
<< args[2]->e_type << " : " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = args[1]->e_type;
TypeExpr::remove_indirect(e_type);
@ -176,13 +170,13 @@ int Expr::define_new_vars(CodeBlob& code) {
}
case _Var:
if (val < 0) {
val = code.create_var(TmpVar::_Named, e_type, sym, &here);
val = code.create_var(TmpVar::_Named, e_type, sym, here);
return 1;
}
break;
case _Hole:
if (val < 0) {
val = code.create_var(TmpVar::_Tmp, e_type, nullptr, &here);
val = code.create_var(TmpVar::_Tmp, e_type, nullptr, here);
}
break;
}
@ -202,7 +196,7 @@ int Expr::predefine_vars() {
}
case _Var:
if (!sym) {
tolk_assert(val < 0 && here.defined());
tolk_assert(val < 0 && here.is_defined());
if (prohibited_var_names.count(symbols.get_name(~val))) {
throw ParseError{
here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"};
@ -212,7 +206,7 @@ int Expr::predefine_vars() {
if (!sym) {
throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"};
}
sym->value = new SymVal{SymVal::_Var, -1, e_type};
sym->value = new SymVal{SymValKind::_Var, -1, e_type};
return 1;
}
break;
@ -221,17 +215,17 @@ int Expr::predefine_vars() {
}
var_idx_t Expr::new_tmp(CodeBlob& code) const {
return code.create_tmp_var(e_type, &here);
return code.create_tmp_var(e_type, here);
}
void add_set_globs(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>& globs, const SrcLocation& here) {
void add_set_globs(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>& globs, SrcLocation here) {
for (const auto& p : globs) {
auto& op = code.emplace_back(here, Op::_SetGlob, std::vector<var_idx_t>{}, std::vector<var_idx_t>{ p.second }, p.first);
op.set_impure(code);
}
}
std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) {
std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) {
while (lhs->is_type_apply()) {
lhs = lhs->args.at(0);
}
@ -245,7 +239,7 @@ std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, con
auto right = rhs->pre_compile(code);
TypeExpr::remove_indirect(rhs->e_type);
auto unpacked_type = rhs->e_type->args.at(0);
std::vector<var_idx_t> tmp{code.create_tmp_var(unpacked_type, &rhs->here)};
std::vector<var_idx_t> tmp{code.create_tmp_var(unpacked_type, rhs->here)};
code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right));
auto tvar = new Expr{Expr::_Var};
tvar->set_val(tmp[0]);
@ -286,14 +280,14 @@ std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *>& args, CodeB
for (size_t j = 0; j < res_lists[i].size(); ++j) {
TmpVar& var = code.vars.at(res_lists[i][j]);
if (!lval_globs && (var.cls & TmpVar::_Named)) {
var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](const SrcLocation &here) mutable {
var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable {
if (!done) {
done = true;
modified_vars.push_back({i, j, cur_ops});
}
});
} else {
var.on_modification.push_back([](const SrcLocation &) {
var.on_modification.push_back([](SrcLocation) {
});
}
}
@ -307,8 +301,8 @@ std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *>& args, CodeB
for (size_t idx = modified_vars.size(); idx--; ) {
const ModifiedVar &m = modified_vars[idx];
var_idx_t orig_v = res_lists[m.i][m.j];
var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where.get());
std::unique_ptr<Op> op = std::make_unique<Op>(*code.vars[orig_v].where, Op::_Let);
var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where);
std::unique_ptr<Op> op = std::make_unique<Op>(code.vars[orig_v].where, Op::_Let);
op->left = {tmp_v};
op->right = {orig_v};
op->next = std::move((*m.cur_ops));

View file

@ -1,129 +0,0 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
namespace tolk {
/*
*
* KEYWORD DEFINITION
*
*/
void define_keywords() {
symbols.add_kw_char('+')
.add_kw_char('-')
.add_kw_char('*')
.add_kw_char('/')
.add_kw_char('%')
.add_kw_char('?')
.add_kw_char(':')
.add_kw_char(',')
.add_kw_char(';')
.add_kw_char('(')
.add_kw_char(')')
.add_kw_char('[')
.add_kw_char(']')
.add_kw_char('{')
.add_kw_char('}')
.add_kw_char('=')
.add_kw_char('_')
.add_kw_char('<')
.add_kw_char('>')
.add_kw_char('&')
.add_kw_char('|')
.add_kw_char('^')
.add_kw_char('~');
symbols.add_keyword("==", Keyword::_Eq)
.add_keyword("!=", Keyword::_Neq)
.add_keyword("<=", Keyword::_Leq)
.add_keyword(">=", Keyword::_Geq)
.add_keyword("<=>", Keyword::_Spaceship)
.add_keyword("<<", Keyword::_Lshift)
.add_keyword(">>", Keyword::_Rshift)
.add_keyword("~>>", Keyword::_RshiftR)
.add_keyword("^>>", Keyword::_RshiftC)
.add_keyword("~/", Keyword::_DivR)
.add_keyword("^/", Keyword::_DivC)
.add_keyword("~%", Keyword::_ModR)
.add_keyword("^%", Keyword::_ModC)
.add_keyword("/%", Keyword::_DivMod)
.add_keyword("+=", Keyword::_PlusLet)
.add_keyword("-=", Keyword::_MinusLet)
.add_keyword("*=", Keyword::_TimesLet)
.add_keyword("/=", Keyword::_DivLet)
.add_keyword("~/=", Keyword::_DivRLet)
.add_keyword("^/=", Keyword::_DivCLet)
.add_keyword("%=", Keyword::_ModLet)
.add_keyword("~%=", Keyword::_ModRLet)
.add_keyword("^%=", Keyword::_ModCLet)
.add_keyword("<<=", Keyword::_LshiftLet)
.add_keyword(">>=", Keyword::_RshiftLet)
.add_keyword("~>>=", Keyword::_RshiftRLet)
.add_keyword("^>>=", Keyword::_RshiftCLet)
.add_keyword("&=", Keyword::_AndLet)
.add_keyword("|=", Keyword::_OrLet)
.add_keyword("^=", Keyword::_XorLet);
symbols.add_keyword("return", Keyword::_Return)
.add_keyword("var", Keyword::_Var)
.add_keyword("repeat", Keyword::_Repeat)
.add_keyword("do", Keyword::_Do)
.add_keyword("while", Keyword::_While)
.add_keyword("until", Keyword::_Until)
.add_keyword("try", Keyword::_Try)
.add_keyword("catch", Keyword::_Catch)
.add_keyword("if", Keyword::_If)
.add_keyword("ifnot", Keyword::_Ifnot)
.add_keyword("then", Keyword::_Then)
.add_keyword("else", Keyword::_Else)
.add_keyword("elseif", Keyword::_Elseif)
.add_keyword("elseifnot", Keyword::_Elseifnot);
symbols.add_keyword("int", Keyword::_Int)
.add_keyword("cell", Keyword::_Cell)
.add_keyword("slice", Keyword::_Slice)
.add_keyword("builder", Keyword::_Builder)
.add_keyword("cont", Keyword::_Cont)
.add_keyword("tuple", Keyword::_Tuple)
.add_keyword("type", Keyword::_Type)
.add_keyword("->", Keyword::_Mapsto)
.add_keyword("forall", Keyword::_Forall);
symbols.add_keyword("extern", Keyword::_Extern)
.add_keyword("global", Keyword::_Global)
.add_keyword("asm", Keyword::_Asm)
.add_keyword("impure", Keyword::_Impure)
.add_keyword("pure", Keyword::_Pure)
.add_keyword("inline", Keyword::_Inline)
.add_keyword("inline_ref", Keyword::_InlineRef)
.add_keyword("builtin", Keyword::_Builtin)
.add_keyword("auto_apply", Keyword::_AutoApply)
.add_keyword("method_id", Keyword::_MethodId)
.add_keyword("get", Keyword::_Get)
.add_keyword("operator", Keyword::_Operator)
.add_keyword("infix", Keyword::_Infix)
.add_keyword("infixl", Keyword::_Infixl)
.add_keyword("infixr", Keyword::_Infixr)
.add_keyword("const", Keyword::_Const);
symbols.add_keyword("#pragma", Keyword::_PragmaHashtag)
.add_keyword("#include", Keyword::_IncludeHashtag);
}
} // namespace tolk

View file

@ -16,335 +16,632 @@
*/
#include "lexer.h"
#include "symtable.h"
#include <sstream>
#include <cassert>
namespace tolk {
/*
*
* LEXER
*
*/
// By 'chunk' in lexer I mean a token or a list of tokens parsed simultaneously.
// E.g., when we meet "str", ChunkString is called, it emits tok_string.
// E.g., when we meet "str"x, ChunkString emits not only tok_string, but tok_string_modifier.
// E.g., when we meet //, ChunkInlineComment is called, it emits nothing (just skips a line).
// We store all valid chunks lexers in a prefix tree (LexingTrie), see below.
struct ChunkLexerBase {
ChunkLexerBase(const ChunkLexerBase&) = delete;
ChunkLexerBase &operator=(const ChunkLexerBase&) = delete;
ChunkLexerBase() = default;
std::string Lexem::lexem_name_str(int idx) {
if (idx == Eof) {
return "end of file";
} else if (idx == Ident) {
return "identifier";
} else if (idx == Number) {
return "number";
} else if (idx == String) {
return "string";
} else if (idx == Special) {
return "special";
} else if (symbols.get_keyword(idx)) {
return "`" + symbols.get_keyword(idx)->str + "`";
} else {
std::ostringstream os{"<unknown lexem of type "};
os << idx << ">";
return os.str();
}
virtual bool parse(Lexer* lex) const = 0;
virtual ~ChunkLexerBase() = default;
};
template <class T>
static T* singleton() {
static T obj;
return &obj;
}
std::string Lexem::name_str() const {
if (tp == Ident) {
return std::string{"identifier `"} + symbols.get_name(val) + "`";
} else if (tp == String) {
return std::string{"string \""} + str + '"';
} else {
return lexem_name_str(tp);
}
}
// LexingTrie is a prefix tree storing all available Tolk language constructs.
// It's effectively a map of a prefix to ChunkLexerBase.
class LexingTrie {
LexingTrie** next{nullptr}; // either nullptr or [256]
ChunkLexerBase* val{nullptr}; // non-null for leafs
bool is_number(std::string str) {
auto st = str.begin(), en = str.end();
if (st == en) {
return false;
}
if (*st == '-') {
st++;
}
bool hex = false;
if (st + 1 < en && *st == '0' && st[1] == 'x') {
st += 2;
hex = true;
}
if (st == en) {
return false;
}
while (st < en) {
int c = *st;
if (c >= '0' && c <= '9') {
++st;
continue;
GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_next_allocated() {
if (next == nullptr) {
next = new LexingTrie*[256];
std::memset(next, 0, 256 * sizeof(LexingTrie*));
}
if (!hex) {
return false;
}
GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_symbol_allocated(uint8_t symbol) const {
if (next[symbol] == nullptr) {
next[symbol] = new LexingTrie;
}
c |= 0x20;
if (c < 'a' || c > 'f') {
return false;
}
public:
// Maps a prefix onto a chunk lexer.
// E.g. " -> ChunkString
// E.g. """ -> ChunkMultilineString
void add_prefix(const char* s, ChunkLexerBase* val) {
LexingTrie* cur = this;
for (; *s; ++s) {
uint8_t symbol = static_cast<uint8_t>(*s);
cur->ensure_next_allocated();
cur->ensure_symbol_allocated(symbol);
cur = cur->next[symbol];
}
++st;
}
return true;
}
int Lexem::classify() {
if (tp != Unknown) {
return tp;
#ifdef TOLK_DEBUG
assert(!cur->val);
#endif
cur->val = val;
}
sym_idx_t i = symbols.lookup(str);
if (i) {
assert(str == symbols[i]->str);
str = symbols[i]->str;
sym_idx_t idx = symbols[i]->idx;
tp = (idx < 0 ? -idx : Ident);
val = i;
} else if (is_number(str)) {
tp = Number;
} else {
tp = 0;
}
if (tp == Unknown) {
tp = Ident;
val = symbols.lookup(str, 1);
}
return tp;
}
int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) {
str = _str;
loc = _loc;
tp = _tp;
val = _val;
return classify();
}
// Maps a pattern onto a chunk lexer.
// E.g. -[0-9] -> ChunkNegativeNumber
// Internally, it expands the pattern to all possible prefixes: -0, -1, etc.
// (for example, [0-9][a-z_$] gives 10*28=280 prefixes)
void add_pattern(const char* pattern, ChunkLexerBase* val) {
std::vector<LexingTrie*> all_possible_trie{this};
Lexer::Lexer(SourceReader& _src, std::string active_chars, std::string quote_chars, std::string multiline_quote)
: src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined),
multiline_quote(std::move(multiline_quote)) {
std::memset(char_class, 0, sizeof(char_class));
unsigned char activity = cc::active;
for (char c : active_chars) {
if (c == ' ') {
if (!--activity) {
activity = cc::allow_repeat;
for (const char* c = pattern; *c; ++c) {
std::string to_append;
if (*c == '[') {
c++;
while (*c != ']') { // assume that input is corrent, no out-of-string checks
if (*(c + 1) == '-') {
char l = *c, r = *(c + 2);
for (char symbol = l; symbol <= r; ++symbol) {
to_append += symbol;
}
c += 3;
} else {
to_append += *c;
c++;
}
}
} else {
to_append += *c;
}
} else if ((unsigned)c < 0x80) {
char_class[(unsigned)c] |= activity;
std::vector<LexingTrie*> next_all_possible_trie;
next_all_possible_trie.reserve(all_possible_trie.size() * to_append.size());
for (LexingTrie* cur : all_possible_trie) {
cur->ensure_next_allocated();
for (uint8_t symbol : to_append) {
cur->ensure_symbol_allocated(symbol);
next_all_possible_trie.emplace_back(cur->next[symbol]);
}
}
all_possible_trie = std::move(next_all_possible_trie);
}
for (LexingTrie* trie : all_possible_trie) {
trie->val = val;
}
}
for (int c : quote_chars) {
if (c > ' ' && c <= 0x7f) {
char_class[(unsigned)c] |= cc::quote_char;
}
}
}
void Lexer::set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts) {
set_spec(eol_cmt, eol_cmts);
set_spec(cmt_op, open_cmts);
set_spec(cmt_cl, close_cmts);
}
// Looks up a chunk lexer given a string (in practice, s points to cur position in the middle of the file).
// It returns the deepest case: pointing to ", it will return ChunkMultilineString if """, or ChunkString otherwize.
ChunkLexerBase* get_deepest(const char* s) const {
const LexingTrie* best = this;
void Lexer::set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2) {
set_spec(eol_cmt2, eol_cmts2);
set_spec(cmt_op2, open_cmts2);
set_spec(cmt_cl2, close_cmts2);
}
void Lexer::start_parsing() {
next();
}
void Lexer::set_spec(std::array<int, 3>& arr, std::string setup) {
arr[0] = arr[1] = arr[2] = -0x100;
std::size_t n = setup.size(), i;
for (i = 0; i < n; i++) {
if (setup[i] == ' ') {
continue;
}
if (i == n - 1 || setup[i + 1] == ' ') {
arr[0] = setup[i];
} else if (i == n - 2 || (i < n - 2 && setup[i + 2] == ' ')) {
arr[1] = setup[i];
arr[2] = setup[++i];
} else {
while (i < n && setup[i] != ' ') {
i++;
for (const LexingTrie* cur = this; cur && cur->next; ++s) {
cur = cur->next[static_cast<uint8_t>(*s)]; // if s reaches \0, cur will just become nullptr, and loop will end
if (cur && cur->val) {
best = cur;
}
}
}
}
bool Lexer::is_multiline_quote(const char* begin, const char* end) {
if (multiline_quote.empty()) {
return false;
return best->val;
}
for (const char& c : multiline_quote) {
if (begin == end || *begin != c) {
return false;
}
++begin;
}
return true;
}
};
void Lexer::expect(int exp_tp, const char* msg) {
if (tp() != exp_tp) {
throw ParseError{lexem.loc, (msg ? std::string{msg} : Lexem::lexem_name_str(exp_tp)) + " expected instead of " +
cur().name_str()};
}
next();
}
//
// ----------------------------------------------------------------------
// A list of valid parsed chunks.
//
const Lexem& Lexer::next() {
if (peek_lexem.valid()) {
lexem = std::move(peek_lexem);
peek_lexem.clear({}, Lexem::Undefined);
eof = (lexem.tp == Lexem::Eof);
return lexem;
// An inline comment, starting from '//'
struct ChunkInlineComment final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
lex->skip_line();
return true;
}
if (eof) {
return lexem.clear(src.here(), Lexem::Eof);
}
long long comm = 1;
// the code below is very complicated, because it tried to support one-symbol start/end and nesting
// in Tolk, we decided to stop supporting nesting (it was never used in practice and almost impossible for js highlighters)
// later on I'll simplify this code (more precisely, rewrite lexer from scratch)
while (!src.seek_eof()) {
int cc = src.cur_char(), nc = src.next_char();
// note, that in practice, [0]-th element is -256, condition for [0]-th is always false
// todo rewrite this all in the future
if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2]) || cc == eol_cmt2[0] || (cc == eol_cmt2[1] && nc == eol_cmt2[2])) {
if (comm == 1) { // just "//" — skip a whole line
src.load_line();
} else { // if "//" is nested into "/*", continue reading, since "*/" may be met
src.advance(1);
};
// A multiline comment, starting from '/*'
// Note, that nested comments are not supported.
struct ChunkMultilineComment final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
while (!lex->is_eof()) {
// todo drop -} later
if ((lex->char_at() == '-' && lex->char_at(1) == '}') || (lex->char_at() == '*' && lex->char_at(1) == '/')) {
lex->skip_chars(2);
return true;
}
} else if (cc == cmt_op[1] && nc == cmt_op[2] || cc == cmt_op2[1] && nc == cmt_op2[2]) {
src.advance(2);
comm = comm * 2 + 1;
} else if (cc == cmt_op[0] || cc == cmt_op2[0]) { // always false
src.advance(1);
comm *= 2;
} else if (comm == 1) {
break; // means that we are not inside a comment
} else if (cc == cmt_cl[1] && nc == cmt_cl[2] || cc == cmt_cl2[1] && nc == cmt_cl2[2]) {
if (!(comm & 1)) { // always false
src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] +
"`");
}
// note that {- may be closed with */, but assume it's ok (we'll get rid of {- in the future)
comm = 1;
src.advance(2);
} else if (cc == cmt_cl[0] || cc == cmt_cl2[0]) { // always false
if (!(comm & 1)) {
src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] +
"`");
}
comm = 1;
src.advance(1);
} else {
src.advance(1);
}
if (comm < 0) {
src.error("too many nested comments");
lex->skip_chars(1);
}
return true; // it's okay if comment extends past end of file
}
if (src.seek_eof()) {
eof = true;
if (comm > 1) {
src.error("comment extends past end of file");
};
// A string, starting from "
// Note, that there are no escape symbols inside: the purpose of strings in Tolk just doesn't need it.
// After a closing quote, a string modifier may be present, like "Ef8zMzMzMzMzMzMzMzMzMzM0vF"a.
// If present, it emits a separate tok_string_modifier.
struct ChunkString final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
const char* str_begin = lex->c_str();
lex->skip_chars(1);
while (!lex->is_eof() && lex->char_at() != '"' && lex->char_at() != '\n') {
lex->skip_chars(1);
}
return lexem.clear(src.here(), Lexem::Eof);
if (lex->char_at() != '"') {
lex->error("string extends past end of line");
}
std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1);
lex->skip_chars(1);
lex->add_token(tok_string_const, str_val);
if (std::isalpha(lex->char_at())) {
std::string_view modifier_val(lex->c_str(), 1);
lex->skip_chars(1);
lex->add_token(tok_string_modifier, modifier_val);
}
return true;
}
if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) {
src.advance(multiline_quote.size());
const char* end = nullptr;
SrcLocation here = src.here();
std::string body;
while (!src.is_eof()) {
if (src.is_eoln()) {
body.push_back('\n');
src.load_line();
continue;
}
if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) {
end = src.get_ptr();
src.advance(multiline_quote.size());
};
// A string starting from """
// Used for multiline asm constructions. Can not have a postfix modifier.
struct ChunkMultilineString final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
const char* str_begin = lex->c_str();
lex->skip_chars(3);
while (!lex->is_eof()) {
if (lex->char_at() == '"' && lex->char_at(1) == '"' && lex->char_at(2) == '"') {
break;
}
body.push_back(src.cur_char());
src.advance(1);
lex->skip_chars(1);
}
if (!end) {
src.error("string extends past end of file");
if (lex->is_eof()) {
lex->error("string extends past end of file");
}
lexem.set(body, here, Lexem::String);
int c = src.cur_char();
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
lexem.val = c;
src.advance(1);
}
return lexem;
std::string_view str_val(str_begin + 3, lex->c_str() - str_begin - 3);
lex->skip_chars(3);
lex->add_token(tok_string_const, str_val);
return true;
}
int c = src.cur_char();
const char* end = src.get_ptr();
if (is_quote_char(c) || c == '`') {
int qc = c;
++end;
while (end < src.get_end_ptr() && *end != qc) {
++end;
};
// A number, may be a hex one.
struct ChunkNumber final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
const char* str_begin = lex->c_str();
bool hex = false;
if (lex->char_at() == '0' && lex->char_at(1) == 'x') {
lex->skip_chars(2);
hex = true;
}
if (*end != qc) {
src.error(qc == '`' ? "a `back-quoted` token extends past end of line" : "string extends past end of line");
if (lex->is_eof()) {
return false;
}
lexem.set(std::string{src.get_ptr() + 1, end}, src.here(), qc == '`' ? Lexem::Unknown : Lexem::String);
src.set_ptr(end + 1);
c = src.cur_char();
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
lexem.val = c;
src.set_ptr(end + 2);
while (!lex->is_eof()) {
char c = lex->char_at();
if (c >= '0' && c <= '9') {
lex->skip_chars(1);
continue;
}
if (!hex) {
break;
}
c |= 0x20;
if (c < 'a' || c > 'f') {
break;
}
lex->skip_chars(1);
}
// std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl;
return lexem;
std::string_view str_val(str_begin, lex->c_str() - str_begin);
lex->add_token(tok_int_const, str_val);
return true;
}
int len = 0, pc = -0x100;
while (end < src.get_end_ptr()) {
c = *end;
bool repeated = (c == pc && is_repeatable(c));
if (c == ' ' || c == 9 || (len && is_left_active(c) && !repeated)) {
break;
};
// Anything starting from # is a compiler directive.
// Technically, #include and #pragma can be mapped as separate chunks,
// but storing such long strings in a trie increases its memory usage.
struct ChunkCompilerDirective final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
const char* str_begin = lex->c_str();
lex->skip_chars(1);
while (std::isalnum(lex->char_at())) {
lex->skip_chars(1);
}
++len;
++end;
if (is_right_active(c) && !repeated) {
break;
std::string_view str_val(str_begin, lex->c_str() - str_begin);
if (str_val == "#include") {
lex->add_token(tok_include, str_val);
return true;
}
pc = c;
if (str_val == "#pragma") {
lex->add_token(tok_pragma, str_val);
return true;
}
lex->error("unknown compiler directive");
}
lexem.set(std::string{src.get_ptr(), end}, src.here());
src.set_ptr(end);
// std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl;
return lexem;
};
// Tokens like !=, &, etc. emit just a simple TokenType.
// Since they are stored in trie, "parsing" them is just skipping len chars.
struct ChunkSimpleToken final : ChunkLexerBase {
TokenType tp;
int len;
ChunkSimpleToken(TokenType tp, int len) : tp(tp), len(len) {}
bool parse(Lexer* lex) const override {
std::string_view str_val(lex->c_str(), len);
lex->add_token(tp, str_val);
lex->skip_chars(len);
return true;
}
};
// Spaces and other space-like symbols are just skipped.
struct ChunkSkipWhitespace final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
lex->skip_chars(1);
lex->skip_spaces();
return true;
}
};
// Here we handle corner cases of grammar that are requested on demand.
// E.g., for 'pragma version >0.5.0', '0.5.0' should be parsed specially to emit tok_semver.
// See TolkLanguageGrammar::parse_next_chunk_special().
struct ChunkSpecialParsing {
static bool parse_pragma_name(Lexer* lex) {
const char* str_begin = lex->c_str();
while (std::isalnum(lex->char_at()) || lex->char_at() == '-') {
lex->skip_chars(1);
}
std::string_view str_val(str_begin, lex->c_str() - str_begin);
if (str_val.empty()) {
return false;
}
lex->add_token(tok_pragma_name, str_val);
return true;
}
static bool parse_semver(Lexer* lex) {
const char* str_begin = lex->c_str();
while (std::isdigit(lex->char_at()) || lex->char_at() == '.') {
lex->skip_chars(1);
}
std::string_view str_val(str_begin, lex->c_str() - str_begin);
if (str_val.empty()) {
return false;
}
lex->add_token(tok_semver, str_val);
return true;
}
};
// Anything starting from a valid identifier beginning symbol is parsed as an identifier.
// But if a resulting string is a keyword, a corresponding token is emitted instead of tok_identifier.
struct ChunkIdentifierOrKeyword final : ChunkLexerBase {
// having parsed str up to the valid end, look up whether it's a valid keyword
// in the future, this could be a bit more effective than just comparing strings (e.g. gperf),
// but nevertheless, performance of the naive code below is reasonably good
static TokenType maybe_keyword(std::string_view str) {
switch (str.size()) {
case 1:
if (str == "~") return tok_bitwise_not; // todo attention
if (str == "_") return tok_underscore; // todo attention
break;
case 2:
if (str == "do") return tok_do;
if (str == "if") return tok_if;
break;
case 3:
if (str == "int") return tok_int;
if (str == "var") return tok_var;
if (str == "asm") return tok_asm;
if (str == "get") return tok_get;
if (str == "try") return tok_try;
break;
case 4:
if (str == "else") return tok_else;
if (str == "pure") return tok_pure;
if (str == "then") return tok_then;
if (str == "cell") return tok_cell;
if (str == "cont") return tok_cont;
if (str == "type") return tok_type; // todo unused token?
break;
case 5:
if (str == "slice") return tok_slice;
if (str == "tuple") return tok_tuple;
if (str == "const") return tok_const;
if (str == "while") return tok_while;
if (str == "until") return tok_until;
if (str == "catch") return tok_catch;
if (str == "ifnot") return tok_ifnot;
break;
case 6:
if (str == "return") return tok_return;
if (str == "repeat") return tok_repeat;
if (str == "elseif") return tok_elseif;
if (str == "forall") return tok_forall;
if (str == "extern") return tok_extern;
if (str == "global") return tok_global;
if (str == "impure") return tok_impure;
if (str == "inline") return tok_inline;
break;
case 7:
if (str == "builder") return tok_builder;
if (str == "builtin") return tok_builtin;
break;
case 8:
if (str == "operator") return tok_operator;
break;
case 9:
if (str == "elseifnot") return tok_elseifnot;
if (str == "method_id") return tok_method_id;
break;
case 10:
if (str == "inline_ref") return tok_inlineref;
if (str == "auto_apply") return tok_autoapply;
break;
default:
break;
}
return tok_empty;
}
bool parse(Lexer* lex) const override {
const char* sym_begin = lex->c_str();
lex->skip_chars(1);
while (!lex->is_eof()) {
char c = lex->char_at();
// the pattern of valid identifier first symbol is provided in trie, here we test for identifier middle
bool allowed_in_identifier = std::isalnum(c) || c == '_' || c == '$' || c == ':' || c == '?' || c == '!' || c == '\'';
if (!allowed_in_identifier) {
break;
}
lex->skip_chars(1);
}
std::string_view str_val(sym_begin, lex->c_str() - sym_begin);
if (TokenType kw_tok = maybe_keyword(str_val)) {
lex->add_token(kw_tok, str_val);
} else {
symbols.lookup_add(static_cast<std::string>(str_val));
lex->add_token(tok_identifier, str_val);
}
return true;
}
};
// Like in Kotlin, `backticks` can be used to wrap identifiers (both in declarations/usage, both for vars/functions).
// E.g.: function `do`() { var `with spaces` = 1; }
// This could be useful to use reserved names as identifiers (in a probable codegen from TL, for example).
struct ChunkIdentifierInBackticks final : ChunkLexerBase {
bool parse(Lexer* lex) const override {
const char* str_begin = lex->c_str();
lex->skip_chars(1);
while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') {
if (std::isspace(lex->char_at())) { // probably, I'll remove this restriction after rewriting symtable and cur_sym_idx
lex->error("An identifier can't have a space in its name (even inside backticks)");
}
lex->skip_chars(1);
}
if (lex->char_at() != '`') {
lex->error("Unclosed backtick `");
}
std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1);
lex->skip_chars(1);
symbols.lookup_add(static_cast<std::string>(str_val));
lex->add_token(tok_identifier, str_val);
return true;
}
};
//
// ----------------------------------------------------------------------
// Here we define a grammar of Tolk.
// All valid chunks prefixes are stored in trie.
//
struct TolkLanguageGrammar {
static LexingTrie trie;
static bool parse_next_chunk(Lexer* lex) {
const ChunkLexerBase* best = trie.get_deepest(lex->c_str());
return best && best->parse(lex);
}
static bool parse_next_chunk_special(Lexer* lex, TokenType parse_next_as) {
switch (parse_next_as) {
case tok_pragma_name:
return ChunkSpecialParsing::parse_pragma_name(lex);
case tok_semver:
return ChunkSpecialParsing::parse_semver(lex);
default:
assert(false);
return false;
}
}
static void register_token(const char* str, int len, TokenType tp) {
trie.add_prefix(str, new ChunkSimpleToken(tp, len));
}
static void init() {
trie.add_prefix("//", singleton<ChunkInlineComment>());
trie.add_prefix(";;", singleton<ChunkInlineComment>());
trie.add_prefix("/*", singleton<ChunkMultilineComment>());
trie.add_prefix("{-", singleton<ChunkMultilineComment>());
trie.add_prefix(R"(")", singleton<ChunkString>());
trie.add_prefix(R"(""")", singleton<ChunkMultilineString>());
trie.add_prefix(" ", singleton<ChunkSkipWhitespace>());
trie.add_prefix("\t", singleton<ChunkSkipWhitespace>());
trie.add_prefix("\r", singleton<ChunkSkipWhitespace>());
trie.add_prefix("\n", singleton<ChunkSkipWhitespace>());
trie.add_prefix("#", singleton<ChunkCompilerDirective>());
trie.add_pattern("[0-9]", singleton<ChunkNumber>());
// todo think of . ~
trie.add_pattern("[a-zA-Z_$.~]", singleton<ChunkIdentifierOrKeyword>());
trie.add_prefix("`", singleton<ChunkIdentifierInBackticks>());
register_token("+", 1, tok_plus);
register_token("-", 1, tok_minus);
register_token("*", 1, tok_mul);
register_token("/", 1, tok_div);
register_token("%", 1, tok_mod);
register_token("?", 1, tok_question);
register_token(":", 1, tok_colon);
register_token(",", 1, tok_comma);
register_token(";", 1, tok_semicolon);
register_token("(", 1, tok_oppar);
register_token(")", 1, tok_clpar);
register_token("[", 1, tok_opbracket);
register_token("]", 1, tok_clbracket);
register_token("{", 1, tok_opbrace);
register_token("}", 1, tok_clbrace);
register_token("=", 1, tok_assign);
register_token("<", 1, tok_lt);
register_token(">", 1, tok_gt);
register_token("&", 1, tok_bitwise_and);
register_token("|", 1, tok_bitwise_or);
register_token("^", 1, tok_bitwise_xor);
register_token("==", 2, tok_eq);
register_token("!=", 2, tok_neq);
register_token("<=", 2, tok_leq);
register_token(">=", 2, tok_geq);
register_token("<<", 2, tok_lshift);
register_token(">>", 2, tok_rshift);
register_token("~/", 2, tok_divR);
register_token("^/", 2, tok_divC);
register_token("~%", 2, tok_modR);
register_token("^%", 2, tok_modC);
register_token("/%", 2, tok_divmod);
register_token("+=", 2, tok_set_plus);
register_token("-=", 2, tok_set_minus);
register_token("*=", 2, tok_set_mul);
register_token("/=", 2, tok_set_div);
register_token("%=", 2, tok_set_mod);
register_token("&=", 2, tok_set_bitwise_and);
register_token("|=", 2, tok_set_bitwise_or);
register_token("^=", 2, tok_set_bitwise_xor);
register_token("->", 2, tok_mapsto);
register_token("<=>", 3, tok_spaceship);
register_token("~>>", 3, tok_rshiftR);
register_token("^>>", 3, tok_rshiftC);
register_token("~/=", 3, tok_set_divR);
register_token("^/=", 3, tok_set_divC);
register_token("~%=", 3, tok_set_modR);
register_token("^%=", 3, tok_set_modC);
register_token("<<=", 3, tok_set_lshift);
register_token(">>=", 3, tok_set_rshift);
register_token("~>>=", 4, tok_set_rshiftR);
register_token("^>>=", 4, tok_set_rshiftC);
}
};
LexingTrie TolkLanguageGrammar::trie;
//
// ----------------------------------------------------------------------
// The Lexer class is to be used outside (by parser, which constructs AST from tokens).
// It's streaming. It means, that `next()` parses a next token on demand
// (instead of parsing all file contents to vector<Token> and iterating over it).
// Parsing on demand uses effectively less memory.
// Note, that chunks, being parsed, call `add_token()`, and a chunk may add multiple tokens at once.
// That's why a small cirlular buffer for tokens is used.
// `last_token_idx` actually means a number of total tokens added.
// `cur_token_idx` is a number of returned by `next()`.
// It's assumed that an input file has already been loaded, its contents is present and won't be deleted
// (`start`, `cur` and `end`, as well as every Token str_val, points inside file->text).
//
Lexer::Lexer(const SrcFile* file)
: file(file)
, p_start(file->text.data())
, p_end(p_start + file->text.size())
, p_next(p_start)
, location(file) {
next();
}
const Lexem& Lexer::peek() {
if (peek_lexem.valid()) {
return peek_lexem;
void Lexer::next() {
while (cur_token_idx == last_token_idx && !is_eof()) {
update_location();
if (!TolkLanguageGrammar::parse_next_chunk(this)) {
error("Failed to parse");
}
}
if (eof) {
return lexem.clear(src.here(), Lexem::Eof);
if (is_eof()) {
add_token(tok_eof, file->text);
}
cur_token = tokens_circularbuf[++cur_token_idx & 7];
}
void Lexer::next_special(TokenType parse_next_as, const char* str_expected) {
assert(cur_token_idx == last_token_idx);
skip_spaces();
update_location();
if (!TolkLanguageGrammar::parse_next_chunk_special(this, parse_next_as)) {
error(std::string(str_expected) + " expected");
}
cur_token = tokens_circularbuf[++cur_token_idx & 7];
}
int Lexer::cur_sym_idx() const {
assert(tok() == tok_identifier);
return symbols.lookup_add(cur_str_std_string());
}
void Lexer::error(const std::string& err_msg) const {
throw ParseError(cur_location(), err_msg);
}
void Lexer::error_at(const std::string& prefix, const std::string& suffix) const {
throw ParseError(cur_location(), prefix + cur_str_std_string() + suffix);
}
void Lexer::on_expect_call_failed(const char* str_expected) const {
throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + cur_str_std_string() + "`");
}
void lexer_init() {
TolkLanguageGrammar::init();
}
// todo #ifdef TOLK_PROFILING
// As told above, `next()` produces tokens on demand, while AST is being generated.
// Hence, it's difficult to measure Lexer performance separately.
// This function can be called just to tick Lexer performance, it just scans all input files.
// There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful.
void lexer_measure_performance(const std::vector<SrcFile*>& files_to_just_parse) {
for (const SrcFile* file : files_to_just_parse) {
Lexer lex(file);
while (!lex.is_eof()) {
lex.next();
}
}
Lexem keep = std::move(lexem);
next();
peek_lexem = std::move(lexem);
lexem = std::move(keep);
eof = false;
return peek_lexem;
}
} // namespace tolk

View file

@ -15,104 +15,225 @@
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "srcread.h"
#include <array>
#include <memory>
#include <cstring>
#include "platform-utils.h"
#include "src-file.h"
#include <string>
namespace tolk {
/*
*
* LEXER
*
*/
enum TokenType {
tok_empty,
struct Lexem {
enum { Undefined = -2, Eof = -1, Unknown = 0, Ident = 0, Number = 1, Special = 2, String = 3 };
int tp;
int val;
std::string str;
SrcLocation loc;
int classify();
Lexem(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0)
: tp(_tp), val(_val), str(_str), loc(_loc) {
classify();
}
int set(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0);
Lexem& clear(const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) {
tp = _tp;
val = _val;
loc = _loc;
str = "";
return *this;
}
bool valid() const {
return tp != Undefined;
}
std::string name_str() const;
void error(std::string _str) const {
throw ParseError{loc, _str};
}
void error_at(std::string str1, std::string str2) const {
error(str1 + str + str2);
}
tok_int_const,
tok_string_const,
tok_string_modifier,
static std::string lexem_name_str(int idx);
tok_identifier,
tok_plus,
tok_minus,
tok_mul,
tok_div,
tok_mod,
tok_question,
tok_colon,
tok_comma,
tok_semicolon,
tok_oppar,
tok_clpar,
tok_opbracket,
tok_clbracket,
tok_opbrace,
tok_clbrace,
tok_assign,
tok_underscore,
tok_lt,
tok_gt,
tok_bitwise_and,
tok_bitwise_or,
tok_bitwise_xor,
tok_bitwise_not,
tok_dot,
tok_eq,
tok_neq,
tok_leq,
tok_geq,
tok_spaceship,
tok_lshift,
tok_rshift,
tok_rshiftR,
tok_rshiftC,
tok_divR,
tok_divC,
tok_modR,
tok_modC,
tok_divmod,
tok_set_plus,
tok_set_minus,
tok_set_mul,
tok_set_div,
tok_set_divR,
tok_set_divC,
tok_set_mod,
tok_set_modR,
tok_set_modC,
tok_set_lshift,
tok_set_rshift,
tok_set_rshiftR,
tok_set_rshiftC,
tok_set_bitwise_and,
tok_set_bitwise_or,
tok_set_bitwise_xor,
tok_return,
tok_var,
tok_repeat,
tok_do,
tok_while,
tok_until,
tok_try,
tok_catch,
tok_if,
tok_ifnot,
tok_then,
tok_else,
tok_elseif,
tok_elseifnot,
tok_int,
tok_cell,
tok_slice,
tok_builder,
tok_cont,
tok_tuple,
tok_type,
tok_mapsto,
tok_forall,
tok_extern,
tok_global,
tok_asm,
tok_impure,
tok_pure,
tok_inline,
tok_inlineref,
tok_builtin,
tok_autoapply,
tok_method_id,
tok_get,
tok_operator,
tok_infix,
tok_infixl,
tok_infixr,
tok_const,
tok_pragma,
tok_pragma_name,
tok_semver,
tok_include,
tok_eof
};
// All tolk language is parsed into tokens.
// Lexer::next() returns a Token.
struct Token {
TokenType type = tok_empty;
std::string_view str_val;
Token() = default;
Token(TokenType type, std::string_view str_val): type(type), str_val(str_val) {}
};
// Lexer::next() is a method to be used externally (while parsing tolk file to AST).
// It's streaming: `next()` parses a token on demand.
// For comments, see lexer.cpp, a comment above Lexer constructor.
class Lexer {
SourceReader& src;
bool eof;
Lexem lexem, peek_lexem;
unsigned char char_class[128];
std::array<int, 3> eol_cmt, cmt_op, cmt_cl; // for ;; {- -}
std::array<int, 3> eol_cmt2, cmt_op2, cmt_cl2; // for // /* */
std::string multiline_quote;
enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 };
Token tokens_circularbuf[8]{};
int last_token_idx = -1;
int cur_token_idx = -1;
Token cur_token; // = tokens_circularbuf[cur_token_idx & 7]
public:
bool eof_found() const {
return eof;
}
explicit Lexer(SourceReader& _src, std::string active_chars = ";,() ~.",
std::string quote_chars = "\"", std::string multiline_quote = "\"\"\"");
const SrcFile* file;
const char *p_start, *p_end, *p_next;
SrcLocation location;
void set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts);
void set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2);
void start_parsing();
const Lexem& next();
const Lexem& cur() const {
return lexem;
}
const Lexem& peek();
int tp() const {
return lexem.tp;
}
void expect(int exp_tp, const char* msg = 0);
int classify_char(unsigned c) const {
return c < 0x80 ? char_class[c] : 0;
}
bool is_active(int c) const {
return (classify_char(c) & cc::active) == cc::active;
}
bool is_left_active(int c) const {
return (classify_char(c) & cc::left_active);
}
bool is_right_active(int c) const {
return (classify_char(c) & cc::right_active);
}
bool is_repeatable(int c) const {
return (classify_char(c) & cc::allow_repeat);
}
bool is_quote_char(int c) const {
return (classify_char(c) & cc::quote_char);
void update_location() {
location.char_offset = static_cast<int>(p_next - p_start);
}
private:
void set_spec(std::array<int, 3>& arr, std::string setup);
bool is_multiline_quote(const char* begin, const char* end);
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void on_expect_call_failed(const char* str_expected) const;
public:
explicit Lexer(const SrcFile* file);
Lexer(const Lexer&) = delete;
Lexer &operator=(const Lexer&) = delete;
void add_token(TokenType type, std::string_view str) {
tokens_circularbuf[++last_token_idx & 7] = Token(type, str);
}
void skip_spaces() {
while (std::isspace(*p_next)) {
++p_next;
}
}
void skip_line() {
while (p_next < p_end && *p_next != '\n' && *p_next != '\r') {
++p_next;
}
while (*p_next == '\n' || *p_next == '\r') {
++p_next;
}
}
void skip_chars(int n) {
p_next += n;
}
bool is_eof() const {
return p_next >= p_end;
}
char char_at() const { return *p_next; }
char char_at(int shift) const { return *(p_next + shift); }
const char* c_str() const { return p_next; }
TokenType tok() const { return cur_token.type; }
std::string_view cur_str() const { return cur_token.str_val; }
std::string cur_str_std_string() const { return static_cast<std::string>(cur_token.str_val); }
SrcLocation cur_location() const { return location; }
int cur_sym_idx() const;
void next();
void next_special(TokenType parse_next_as, const char* str_expected);
void check(TokenType next_tok, const char* str_expected) const {
if (cur_token.type != next_tok) {
on_expect_call_failed(str_expected); // unlikely path, not inlined
}
}
void expect(TokenType next_tok, const char* str_expected) {
if (cur_token.type != next_tok) {
on_expect_call_failed(str_expected);
}
next();
}
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void error(const std::string& err_msg) const;
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void error_at(const std::string& prefix, const std::string& suffix) const;
};
void lexer_init();
// todo #ifdef TOLK_PROFILING
void lexer_measure_performance(const std::vector<SrcFile*>& files_to_just_parse);
} // namespace tolk

View file

@ -612,7 +612,7 @@ bool Optimizer::optimize() {
}
AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode) {
Optimizer opt(std::move(op_list), op_rewrite_comments, mode);
Optimizer opt(std::move(op_list), false, mode);
opt.optimize();
return opt.extract_code();
}

File diff suppressed because it is too large Load diff

44
tolk/platform-utils.h Normal file
View file

@ -0,0 +1,44 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#pragma once
#if __GNUC__
#define GNU_ATTRIBUTE_COLD [[gnu::cold]]
#define GNU_ATTRIBUTE_NORETURN [[gnu::noreturn]]
#define GNU_ATTRIBUTE_ALWAYS_INLINE [[gnu::always_inline]]
#else
#define GNU_ATTRIBUTE_COLD
#define GNU_ATTRIBUTE_NORETURN [[noreturn]]
#define GNU_ATTRIBUTE_ALWAYS_INLINE
#endif
#if defined(__GNUC__)
#define LIKELY(x) __builtin_expect(x, true)
#define UNLIKELY(x) __builtin_expect(x, false)
#else
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif

164
tolk/src-file.cpp Normal file
View file

@ -0,0 +1,164 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "src-file.h"
#include <iostream>
namespace tolk {
extern AllRegisteredSrcFiles all_src_files;
extern std::string stdlib_filename;
static_assert(sizeof(SrcLocation) == 8);
const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const {
for (const SrcFile* file : all_src_files) {
if (file->file_id == file_id) {
return file;
}
}
return nullptr;
}
const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const {
for (const SrcFile* file : all_src_files) {
if (file->abs_filename == abs_filename) {
return file;
}
}
return nullptr;
}
const SrcFile* AllRegisteredSrcFiles::register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from) {
SrcFile* created = new SrcFile(++last_file_id, rel_filename, abs_filename, std::move(text), included_from);
all_src_files.push_back(created);
return created;
}
bool SrcFile::is_entrypoint_file() const {
return file_id == (stdlib_filename.empty() ? 0 : 1);
}
bool SrcFile::is_offset_valid(int offset) const {
return offset >= 0 && offset < static_cast<int>(text.size());
}
SrcFile::SrcPosition SrcFile::convert_offset(int offset) const {
if (!is_offset_valid(offset)) {
return SrcPosition{offset, -1, -1, "invalid offset"};
}
int line_idx = 0;
int char_idx = 0;
int line_offset = 0;
for (int i = 0; i < offset; ++i) {
char c = text[i];
if (c == '\n') {
line_idx++;
char_idx = 0;
line_offset = i + 1;
} else {
char_idx++;
}
}
size_t line_len = text.size() - line_offset;
for (int i = line_offset; i < static_cast<int>(text.size()); ++i) {
if (text[i] == '\n') {
line_len = i - line_offset;
break;
}
}
std::string_view line_str(text.data() + line_offset, line_len);
return SrcPosition{offset, line_idx + 1, char_idx + 1, line_str};
}
std::ostream& operator<<(std::ostream& os, const SrcFile* src_file) {
return os << (src_file ? src_file->rel_filename : "unknown-location");
}
std::ostream& operator<<(std::ostream& os, const Fatal& fatal) {
return os << fatal.what();
}
const SrcFile* SrcLocation::get_src_file() const {
return all_src_files.find_file(file_id);
}
void SrcLocation::show(std::ostream& os) const {
const SrcFile* src_file = get_src_file();
os << src_file;
if (src_file && src_file->is_offset_valid(char_offset)) {
SrcFile::SrcPosition pos = src_file->convert_offset(char_offset);
os << ':' << pos.line_no << ':' << pos.char_no;
}
}
void SrcLocation::show_context(std::ostream& os) const {
const SrcFile* src_file = get_src_file();
if (!src_file || !src_file->is_offset_valid(char_offset)) {
return;
}
SrcFile::SrcPosition pos = src_file->convert_offset(char_offset);
os << " " << pos.line_str << "\n";
os << " ";
for (int i = 1; i < pos.char_no; ++i) {
os << ' ';
}
os << '^' << "\n";
}
std::ostream& operator<<(std::ostream& os, SrcLocation loc) {
loc.show(os);
return os;
}
void SrcLocation::show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const {
show(os);
if (!err_type.empty()) {
os << ": " << err_type;
}
os << ": " << message << std::endl;
show_context(os);
}
void SrcLocation::show_note(const std::string& err_msg) const {
show_general_error(std::cerr, err_msg, "note");
}
void SrcLocation::show_warning(const std::string& err_msg) const {
show_general_error(std::cerr, err_msg, "warning");
}
void SrcLocation::show_error(const std::string& err_msg) const {
show_general_error(std::cerr, err_msg, "error");
}
std::ostream& operator<<(std::ostream& os, const ParseError& error) {
error.show(os);
return os;
}
void ParseError::show(std::ostream& os) const {
os << where << ": error: " << message << std::endl;
where.show_context(os);
}
} // namespace tolk

120
tolk/src-file.h Normal file
View file

@ -0,0 +1,120 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
namespace tolk {
struct SrcFile {
struct SrcPosition {
int offset;
int line_no;
int char_no;
std::string_view line_str;
};
int file_id;
std::string rel_filename;
std::string abs_filename;
std::string text;
const SrcFile* included_from{nullptr};
SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text, const SrcFile* included_from)
: file_id(file_id)
, rel_filename(std::move(rel_filename))
, abs_filename(std::move(abs_filename))
, text(std::move(text))
, included_from(included_from) { }
SrcFile(const SrcFile& other) = delete;
SrcFile &operator=(const SrcFile&) = delete;
bool is_entrypoint_file() const;
bool is_offset_valid(int offset) const;
SrcPosition convert_offset(int offset) const;
};
class AllRegisteredSrcFiles {
std::vector<SrcFile*> all_src_files;
int last_file_id = -1;
public:
const SrcFile *find_file(int file_id) const;
const SrcFile* find_file(const std::string& abs_filename) const;
const SrcFile* register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from);
const std::vector<SrcFile*>& get_all_files() const { return all_src_files; }
};
struct Fatal final : std::exception {
std::string message;
explicit Fatal(std::string _msg) : message(std::move(_msg)) {
}
const char* what() const noexcept override {
return message.c_str();
}
};
std::ostream& operator<<(std::ostream& os, const Fatal& fatal);
// SrcLocation points to a location (line, column) in some loaded .tolk source SrcFile.
// Note, that instead of storing src_file, line_no, etc., only 2 ints are stored.
// The purpose is: sizeof(SrcLocation) == 8, so it's just passed/stored without pointers/refs, just like int64_t.
// When decoding SrcLocation into human-readable format, it's converted to SrcFile::SrcPosition via offset.
class SrcLocation {
friend class Lexer;
int file_id = -1; // file_id from AllRegisteredSrcFiles
int char_offset = -1; // offset from SrcFile::text
public:
SrcLocation() = default;
explicit SrcLocation(const SrcFile* src_file) : file_id(src_file->file_id) {
}
bool is_defined() const { return file_id != -1; }
const SrcFile* get_src_file() const;
void show(std::ostream& os) const;
void show_context(std::ostream& os) const;
void show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const;
void show_note(const std::string& err_msg) const;
void show_warning(const std::string& err_msg) const;
void show_error(const std::string& err_msg) const;
};
std::ostream& operator<<(std::ostream& os, SrcLocation loc);
struct ParseError : std::exception {
SrcLocation where;
std::string message;
ParseError(SrcLocation _where, std::string _msg) : where(_where), message(std::move(_msg)) {
}
const char* what() const noexcept override {
return message.c_str();
}
void show(std::ostream& os) const;
};
std::ostream& operator<<(std::ostream& os, const ParseError& error);
} // namespace tolk

View file

@ -1,228 +0,0 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "srcread.h"
#include <algorithm>
namespace tolk {
/*
*
* SOURCE FILE READER
*
*/
std::ostream& operator<<(std::ostream& os, const FileDescr* fdescr) {
return os << (fdescr ? (fdescr->is_stdin ? "stdin" : fdescr->filename) : "unknown-location");
}
std::ostream& operator<<(std::ostream& os, const Fatal& fatal) {
return os << fatal.get_msg();
}
const char* FileDescr::convert_offset(long offset, long* line_no, long* line_pos, long* line_size) const {
long lno = 0, lpos = -1, lsize = 0;
const char* lstart = nullptr;
if (offset >= 0 && offset < (long)text.size()) {
auto it = std::upper_bound(line_offs.begin(), line_offs.end(), offset);
lno = it - line_offs.begin();
if (lno && it != line_offs.end()) {
lsize = it[0] - it[-1];
lpos = offset - it[-1];
lstart = text.data() + it[-1];
}
} else {
lno = (long)line_offs.size();
}
if (line_no) {
*line_no = lno;
}
if (line_pos) {
*line_pos = lpos;
}
if (line_size) {
*line_size = lsize;
}
return lstart;
}
const char* FileDescr::push_line(std::string new_line) {
if (line_offs.empty()) {
line_offs.push_back(0);
}
std::size_t cur_size = text.size();
text += new_line;
text += '\0';
line_offs.push_back((long)text.size());
return text.data() + cur_size;
}
void SrcLocation::show(std::ostream& os) const {
os << fdescr;
long line_no, line_pos;
if (fdescr && convert_pos(&line_no, &line_pos)) {
os << ':' << line_no;
if (line_pos >= 0) {
os << ':' << (line_pos + 1);
}
}
}
bool SrcLocation::show_context(std::ostream& os) const {
long line_no, line_pos, line_size;
if (!fdescr || !convert_pos(&line_no, &line_pos, &line_size)) {
return false;
}
bool skip_left = (line_pos > 200), skip_right = (line_pos + 200u < line_size);
const char* here = fdescr->text.data() + char_offs;
const char* base = here - line_pos;
const char* start = skip_left ? here - 100 : base;
const char* end = skip_right ? here + 100 : base + line_size;
os << " ";
if (skip_left) {
os << "... ";
}
for (const char* ptr = start; ptr < end; ptr++) {
os << (char)*ptr;
}
if (skip_right) {
os << " ...";
}
os << std::endl;
os << " ";
if (skip_left) {
os << "... ";
}
for (const char* ptr = start; ptr < here; ptr++) {
char c = *ptr;
os << (c == 9 || c == 10 ? c : ' ');
}
os << '^' << std::endl;
return true;
}
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc) {
loc.show(os);
return os;
}
void SrcLocation::show_gen_error(std::ostream& os, std::string message, std::string err_type) const {
show(os);
if (!err_type.empty()) {
os << ": " << err_type;
}
os << ": " << message << std::endl;
show_context(os);
}
std::ostream& operator<<(std::ostream& os, const Error& error) {
error.show(os);
return os;
}
void ParseError::show(std::ostream& os) const {
os << where << ": error: " << message << std::endl;
where.show_context(os);
}
SourceReader::SourceReader(std::istream* _is, FileDescr* _fdescr)
: ifs(_is), fdescr(_fdescr), loc(_fdescr), eof(false), cur_line_len(0), start(0), cur(0), end(0) {
load_line();
}
void SourceReader::set_eof() {
if (!eof) {
eof = true;
start = cur = end = 0;
}
}
int SourceReader::skip_spc() {
if (!cur) {
return 0;
}
const char* ptr = cur;
int res = 0;
while (*ptr == ' ' || *ptr == 9) {
++ptr;
++res;
}
set_ptr(ptr);
return res;
}
bool SourceReader::seek_eof() {
while (seek_eoln()) {
if (!load_line()) {
return true;
}
}
return false;
}
const char* SourceReader::set_ptr(const char* ptr) {
if (ptr != cur) {
if (ptr < cur || ptr > end) {
error("parsing position went outside of line");
}
loc.char_offs += ptr - cur;
cur = ptr;
}
return ptr;
}
bool SourceReader::load_line() {
if (eof) {
return false;
}
loc.set_eof();
if (ifs->eof()) {
set_eof();
return false;
}
std::getline(*ifs, cur_line);
if (ifs->fail()) {
set_eof();
if (!ifs->eof()) {
error("cannot read line from source stream");
}
return false;
}
std::size_t len = cur_line.size();
if (len > 0xffffff) {
set_eof();
error("line too long");
return false;
}
if (len && cur_line.back() == '\r') {
// CP/M line breaks support
cur_line.pop_back();
--len;
}
cur_line_len = (int)len;
if (fdescr) {
cur = start = fdescr->push_line(std::move(cur_line));
end = start + len;
loc.char_offs = (std::size_t)(cur - fdescr->text.data());
cur_line.clear();
} else {
cur = start = cur_line.c_str();
end = start + cur_line_len;
}
return true;
}
} // namespace tolk

View file

@ -1,162 +0,0 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
#include <iostream>
namespace tolk {
/*
*
* SOURCE FILE READER
*
*/
struct FileDescr {
std::string filename;
std::string text;
std::vector<long> line_offs;
bool is_stdin;
bool is_main = false;
FileDescr(std::string _fname, bool _stdin = false) : filename(std::move(_fname)), is_stdin(_stdin) {
}
const char* push_line(std::string new_line);
const char* convert_offset(long offset, long* line_no, long* line_pos, long* line_size = nullptr) const;
};
struct Fatal {
std::string message;
Fatal(std::string _msg) : message(std::move(_msg)) {
}
std::string get_msg() const {
return message;
}
};
std::ostream& operator<<(std::ostream& os, const Fatal& fatal);
struct SrcLocation {
const FileDescr* fdescr;
long char_offs;
SrcLocation() : fdescr(nullptr), char_offs(-1) {
}
SrcLocation(const FileDescr* _fdescr, long offs = -1) : fdescr(_fdescr), char_offs(-1) {
}
bool defined() const {
return fdescr;
}
bool eof() const {
return char_offs == -1;
}
void set_eof() {
char_offs = -1;
}
const char* convert_pos(long* line_no, long* line_pos, long* line_size = nullptr) const {
return defined() ? fdescr->convert_offset(char_offs, line_no, line_pos, line_size) : nullptr;
}
void show(std::ostream& os) const;
bool show_context(std::ostream& os) const;
void show_gen_error(std::ostream& os, std::string message, std::string err_type = "") const;
void show_note(std::string err_msg) const {
show_gen_error(std::cerr, err_msg, "note");
}
void show_warning(std::string err_msg) const {
show_gen_error(std::cerr, err_msg, "warning");
}
void show_error(std::string err_msg) const {
show_gen_error(std::cerr, err_msg, "error");
}
};
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc);
struct Error {
virtual ~Error() = default;
virtual void show(std::ostream& os) const = 0;
};
std::ostream& operator<<(std::ostream& os, const Error& error);
struct ParseError : Error {
SrcLocation where;
std::string message;
ParseError(const SrcLocation& _where, std::string _msg) : where(_where), message(_msg) {
}
ParseError(const SrcLocation* _where, std::string _msg) : message(_msg) {
if (_where) {
where = *_where;
}
}
~ParseError() override = default;
void show(std::ostream& os) const override;
};
class SourceReader {
std::istream* ifs;
FileDescr* fdescr;
SrcLocation loc;
bool eof;
std::string cur_line;
int cur_line_len;
void set_eof();
const char *start, *cur, *end;
public:
SourceReader(std::istream* _is, FileDescr* _fdescr);
bool load_line();
bool is_eof() const {
return eof;
}
int is_eoln() const {
return cur == end;
}
int skip_spc();
bool seek_eoln() {
skip_spc();
return is_eoln();
}
bool seek_eof();
const char* cur_line_cstr() const {
return cur_line.c_str();
}
const SrcLocation& here() const {
return loc;
}
char cur_char() const {
return *cur;
}
char next_char() const {
return cur[1];
}
const char* get_ptr() const {
return cur;
}
const char* get_end_ptr() const {
return end;
}
const char* set_ptr(const char* ptr);
void advance(int n) {
set_ptr(get_ptr() + n);
}
void error(std::string err_msg) {
throw ParseError{loc, err_msg};
}
};
} // namespace tolk

View file

@ -28,13 +28,19 @@ namespace tolk {
int scope_level;
SymTable<100003> symbols;
SymTable symbols;
SymDef* sym_def[symbols.hprime + 1];
SymDef* global_sym_def[symbols.hprime + 1];
SymDef* sym_def[symbols.SIZE_PRIME + 1];
SymDef* global_sym_def[symbols.SIZE_PRIME + 1];
std::vector<std::pair<int, SymDef>> symbol_stack;
std::vector<SrcLocation> scope_opened_at;
Symbol::Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {
subclass = this->str[0] == '.' ? SymbolSubclass::dot_identifier
: this->str[0] == '~' ? SymbolSubclass::tilde_identifier
: SymbolSubclass::undef;
}
std::string Symbol::unknown_symbol_name(sym_idx_t i) {
if (!i) {
return "_";
@ -45,57 +51,43 @@ std::string Symbol::unknown_symbol_name(sym_idx_t i) {
}
}
sym_idx_t SymTableBase::gen_lookup(std::string str, int mode, sym_idx_t idx) {
sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) {
unsigned long long h1 = 1, h2 = 1;
for (char c : str) {
h1 = ((h1 * 239) + (unsigned char)(c)) % p;
h2 = ((h2 * 17) + (unsigned char)(c)) % (p - 1);
h1 = ((h1 * 239) + (unsigned char)(c)) % SIZE_PRIME;
h2 = ((h2 * 17) + (unsigned char)(c)) % (SIZE_PRIME - 1);
}
++h2;
++h1;
while (true) {
if (sym_table[h1]) {
if (sym_table[h1]->str == str) {
if (sym[h1]) {
if (sym[h1]->str == str) {
return (mode & 2) ? not_found : sym_idx_t(h1);
}
h1 += h2;
if (h1 > p) {
h1 -= p;
if (h1 > SIZE_PRIME) {
h1 -= SIZE_PRIME;
}
} else {
if (!(mode & 1)) {
return not_found;
}
if (def_sym >= ((long long)p * 3) / 4) {
if (def_sym >= ((long long)SIZE_PRIME * 3) / 4) {
throw SymTableOverflow{def_sym};
}
sym_table[h1] = std::make_unique<Symbol>(str, idx <= 0 ? sym_idx_t(h1) : -idx);
sym[h1] = std::make_unique<Symbol>(static_cast<std::string>(str), idx <= 0 ? sym_idx_t(h1) : -idx);
++def_sym;
return sym_idx_t(h1);
}
}
}
SymTableBase& SymTableBase::add_keyword(std::string str, sym_idx_t idx) {
if (idx <= 0) {
idx = ++def_kw;
}
sym_idx_t res = gen_lookup(str, -1, idx);
if (!res) {
throw SymTableKwRedef{str};
}
if (idx < max_kw_idx) {
keywords[idx] = res;
}
return *this;
}
void open_scope(Lexer& lex) {
void open_scope(SrcLocation loc) {
++scope_level;
scope_opened_at.push_back(lex.cur().loc);
scope_opened_at.push_back(loc);
}
void close_scope(Lexer& lex) {
void close_scope(SrcLocation loc) {
if (!scope_level) {
throw Fatal{"cannot close the outer scope"};
}
@ -124,24 +116,20 @@ void close_scope(Lexer& lex) {
scope_opened_at.pop_back();
}
SymDef* lookup_symbol(sym_idx_t idx, int flags) {
SymDef* lookup_symbol(sym_idx_t idx) {
if (!idx) {
return nullptr;
}
if ((flags & 1) && sym_def[idx]) {
if (sym_def[idx]) {
return sym_def[idx];
}
if ((flags & 2) && global_sym_def[idx]) {
if (global_sym_def[idx]) {
return global_sym_def[idx];
}
return nullptr;
}
SymDef* lookup_symbol(std::string name, int flags) {
return lookup_symbol(symbols.lookup(name), flags);
}
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) {
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
if (!name_idx) {
return nullptr;
}
@ -156,7 +144,7 @@ SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocati
return found;
}
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) {
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
if (!name_idx) {
return nullptr;
}
@ -176,7 +164,7 @@ SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc
return found;
}
found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc);
symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx}));
symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx, loc}));
#ifdef TOLK_DEBUG
found->sym_name = found->name();
symbol_stack.back().second.sym_name = found->name();

View file

@ -15,8 +15,9 @@
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "srcread.h"
#include "lexer.h"
#include "src-file.h"
#include <functional>
#include <memory>
#include <vector>
namespace tolk {
@ -29,11 +30,12 @@ namespace tolk {
typedef int var_idx_t;
enum class SymValKind { _Param, _Var, _Func, _Typename, _GlobVar, _Const };
struct SymValBase {
enum { _Param, _Var, _Func, _Typename, _GlobVar, _Const };
int type;
SymValKind kind;
int idx;
SymValBase(int _type, int _idx) : type(_type), idx(_idx) {
SymValBase(SymValKind kind, int idx) : kind(kind), idx(idx) {
}
virtual ~SymValBase() = default;
};
@ -44,92 +46,69 @@ struct SymValBase {
*
*/
// defined outside this module (by the end user)
int compute_symbol_subclass(std::string str); // return 0 if unneeded
enum class SymbolSubclass {
undef = 0,
dot_identifier = 1, // begins with . (a const method)
tilde_identifier = 2 // begins with ~ (a non-const method)
};
typedef int sym_idx_t;
struct Symbol {
std::string str;
sym_idx_t idx;
int subclass;
Symbol(std::string _str, sym_idx_t _idx, int _sc) : str(_str), idx(_idx), subclass(_sc) {
}
Symbol(std::string _str, sym_idx_t _idx) : str(_str), idx(_idx) {
subclass = compute_symbol_subclass(std::move(_str));
}
SymbolSubclass subclass;
Symbol(std::string str, sym_idx_t idx);
static std::string unknown_symbol_name(sym_idx_t i);
};
class SymTableBase {
unsigned p;
std::unique_ptr<Symbol>* sym_table;
sym_idx_t def_kw, def_sym;
class SymTable {
public:
static constexpr int SIZE_PRIME = 100003;
private:
sym_idx_t def_sym{0};
std::unique_ptr<Symbol> sym[SIZE_PRIME + 1];
sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0);
static constexpr int max_kw_idx = 10000;
sym_idx_t keywords[max_kw_idx];
public:
SymTableBase(unsigned p_, std::unique_ptr<Symbol>* sym_table_)
: p(p_), sym_table(sym_table_), def_kw(0x100), def_sym(0) {
std::memset(keywords, 0, sizeof(keywords));
}
public:
static constexpr sym_idx_t not_found = 0;
SymTableBase& add_keyword(std::string str, sym_idx_t idx = 0);
SymTableBase& add_kw_char(char c) {
return add_keyword(std::string{c}, c);
}
sym_idx_t lookup(std::string str, int mode = 0) {
sym_idx_t lookup(const std::string_view& str, int mode = 0) {
return gen_lookup(str, mode);
}
sym_idx_t lookup_add(std::string str) {
sym_idx_t lookup_add(const std::string& str) {
return gen_lookup(str, 1);
}
Symbol* operator[](sym_idx_t i) const {
return sym_table[i].get();
return sym[i].get();
}
bool is_keyword(sym_idx_t i) const {
return sym_table[i] && sym_table[i]->idx < 0;
return sym[i] && sym[i]->idx < 0;
}
std::string get_name(sym_idx_t i) const {
return sym_table[i] ? sym_table[i]->str : Symbol::unknown_symbol_name(i);
return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i);
}
int get_subclass(sym_idx_t i) const {
return sym_table[i] ? sym_table[i]->subclass : 0;
SymbolSubclass get_subclass(sym_idx_t i) const {
return sym[i] ? sym[i]->subclass : SymbolSubclass::undef;
}
Symbol* get_keyword(int i) const {
return ((unsigned)i < (unsigned)max_kw_idx) ? sym_table[keywords[i]].get() : nullptr;
return ((unsigned)i < (unsigned)max_kw_idx) ? sym[keywords[i]].get() : nullptr;
}
protected:
sym_idx_t gen_lookup(std::string str, int mode = 0, sym_idx_t idx = 0);
};
template <unsigned pp>
class SymTable : public SymTableBase {
public:
static constexpr int hprime = pp;
static int size() {
return pp + 1;
}
private:
std::unique_ptr<Symbol> sym[pp + 1];
public:
SymTable() : SymTableBase(pp, sym) {
}
SymTable& add_keyword(std::string str, sym_idx_t idx = 0) {
SymTableBase::add_keyword(str, idx);
return *this;
}
SymTable& add_kw_char(char c) {
return add_keyword(std::string{c}, c);
SymTable() {
std::memset(keywords, 0, sizeof(keywords));
}
};
struct SymTableOverflow {
int sym_def;
SymTableOverflow(int x) : sym_def(x) {
explicit SymTableOverflow(int x) : sym_def(x) {
}
};
@ -139,7 +118,7 @@ struct SymTableKwRedef {
}
};
extern SymTable<100003> symbols;
extern SymTable symbols;
extern int scope_level;
@ -151,7 +130,7 @@ struct SymDef {
#ifdef TOLK_DEBUG
std::string sym_name;
#endif
SymDef(int lvl, sym_idx_t idx, const SrcLocation& _loc = {}, SymValBase* val = 0)
SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr)
: level(lvl), sym_idx(idx), value(val), loc(_loc) {
}
bool has_name() const {
@ -162,17 +141,16 @@ struct SymDef {
}
};
extern SymDef* sym_def[symbols.hprime + 1];
extern SymDef* global_sym_def[symbols.hprime + 1];
extern SymDef* sym_def[symbols.SIZE_PRIME + 1];
extern SymDef* global_sym_def[symbols.SIZE_PRIME + 1];
extern std::vector<std::pair<int, SymDef>> symbol_stack;
extern std::vector<SrcLocation> scope_opened_at;
void open_scope(Lexer& lex);
void close_scope(Lexer& lex);
SymDef* lookup_symbol(sym_idx_t idx, int flags = 3);
SymDef* lookup_symbol(std::string name, int flags = 3);
void open_scope(SrcLocation loc);
void close_scope(SrcLocation loc);
SymDef* lookup_symbol(sym_idx_t idx);
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {});
SymDef* define_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {});
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, SrcLocation loc = {});
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc);
} // namespace tolk

View file

@ -30,67 +30,41 @@
void usage(const char* progname) {
std::cerr
<< "usage: " << progname
<< " [-vIAPSR][-O<level>][-i<indent-spc>][-o<output-filename>][-W<boc-filename>] {<filename.tolk> ...}\n"
"\tGenerates Fift TVM assembler code from a Tolk source\n"
"-I\tEnables interactive mode (parse stdin)\n"
"-o<fift-output-filename>\tWrites generated code into specified file instead of stdout\n"
"-v\tIncreases verbosity level (extra information output into stderr)\n"
"-i<indent>\tSets indentation for the output code (in two-space units)\n"
"-A\tPrefix code with `\"Asm.fif\" include` preamble\n"
<< "usage: " << progname << " [options] <filename.tolk>\n"
"\tGenerates Fift TVM assembler code from a .tolk file\n"
"-o<fif-filename>\tWrites generated code into specified .fif file instead of stdout\n"
"-b<boc-filename>\tGenerate Fift instructions to save TVM bytecode into .boc file\n"
"-O<level>\tSets optimization level (2 by default)\n"
"-P\tEnvelope code into PROGRAM{ ... }END>c\n"
"-S\tInclude stack layout comments in the output code\n"
"-R\tInclude operation rewrite comments in the output code\n"
"-W<output-boc-file>\tInclude Fift code to serialize and save generated code into specified BoC file. Enables "
"-A and -P.\n"
"\t-s\tOutput semantic version of Tolk and exit\n"
"\t-V<version>\tShow Tolk build information\n";
"-S\tDon't include stack layout comments into Fift output\n"
"-e\tIncreases verbosity level (extra output into stderr)\n"
"-v\tOutput version of Tolk and exit\n";
std::exit(2);
}
int main(int argc, char* const argv[]) {
int i;
std::string output_filename;
while ((i = getopt(argc, argv, "Ahi:Io:O:PRsSvW:V")) != -1) {
while ((i = getopt(argc, argv, "o:b:O:Sevh")) != -1) {
switch (i) {
case 'A':
tolk::asm_preamble = true;
break;
case 'I':
tolk::interactive = true;
break;
case 'i':
tolk::indent = std::max(0, atoi(optarg));
break;
case 'o':
output_filename = optarg;
break;
case 'b':
tolk::boc_output_filename = optarg;
break;
case 'O':
tolk::opt_level = std::max(0, atoi(optarg));
break;
case 'P':
tolk::program_envelope = true;
break;
case 'R':
tolk::op_rewrite_comments = true;
break;
case 'S':
tolk::stack_layout_comments = true;
tolk::stack_layout_comments = false;
break;
case 'v':
case 'e':
++tolk::verbosity;
break;
case 'W':
tolk::boc_output_filename = optarg;
tolk::asm_preamble = tolk::program_envelope = true;
break;
case 's':
std::cout << tolk::tolk_version << "\n";
std::exit(0);
case 'V':
std::cout << "Tolk semantic version: v" << tolk::tolk_version << "\n";
std::cout << "Build information: [ Commit: " << GitMetadata::CommitSHA1() << ", Date: " << GitMetadata::CommitDate() << "]\n";
case 'v':
std::cout << "Tolk compiler v" << tolk::tolk_version << "\n";
std::cout << "Build commit: " << GitMetadata::CommitSHA1() << "\n";
std::cout << "Build date: " << GitMetadata::CommitDate() << "\n";
std::exit(0);
case 'h':
default:
@ -110,13 +84,14 @@ int main(int argc, char* const argv[]) {
outs = fs.get();
}
std::vector<std::string> sources;
while (optind < argc) {
sources.push_back(std::string(argv[optind++]));
if (optind != argc - 1) {
std::cerr << "invalid usage: should specify exactly one input file.tolk";
return 2;
}
std::string entrypoint_file_name = argv[optind];
tolk::read_callback = tolk::fs_read_callback;
return tolk::tolk_proceed(sources, *outs, std::cerr);
return tolk::tolk_proceed(entrypoint_file_name, *outs, std::cerr);
}

View file

@ -31,81 +31,58 @@
#include "td/utils/Status.h"
#include <sstream>
#include <iomanip>
#include "vm/boc.h"
td::Result<std::string> compile_internal(char *config_json) {
TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json)))
auto &obj = input_json.get_object();
td::JsonObject& config = input_json.get_object();
TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false));
TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false));
auto &sources_arr = sources_obj.get_array();
std::vector<std::string> sources;
for (auto &item : sources_arr) {
sources.push_back(item.get_string().str());
}
TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2));
TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false));
TRY_RESULT(entrypoint_file_name, td::get_json_object_string_field(config, "entrypointFileName", false));
tolk::opt_level = std::max(0, opt_level);
tolk::program_envelope = true;
tolk::verbosity = 0;
tolk::indent = 1;
tolk::stack_layout_comments = stack_comments;
std::ostringstream outs, errs;
auto compile_res = tolk::tolk_proceed(sources, outs, errs);
if (compile_res != 0) {
return td::Status::Error(std::string("Tolk compilation error: ") + errs.str());
int tolk_res = tolk::tolk_proceed(entrypoint_file_name, outs, errs);
if (tolk_res != 0) {
return td::Status::Error("Tolk compilation error: " + errs.str());
}
TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false));
TRY_RESULT(boc, vm::std_boc_serialize(code_cell));
TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/"));
td::JsonBuilder result_json;
auto result_obj = result_json.enter_object();
result_obj("status", "ok");
result_obj("codeBoc", td::base64_encode(boc));
result_obj("fiftCode", outs.str());
result_obj("codeHashHex", code_cell->get_hash().to_hex());
result_obj.leave();
outs.clear();
errs.clear();
auto obj = result_json.enter_object();
obj("status", "ok");
obj("fiftCode", fift_res.fiftCode);
obj("codeBoc64", fift_res.codeBoc64);
obj("codeHashHex", fift_res.codeHashHex);
obj.leave();
return result_json.string_builder().as_cslice().str();
}
/// Callback used to retrieve additional source files or data.
///
/// @param _kind The kind of callback (a string).
/// @param _data The data for the callback (a string).
/// @param o_contents A pointer to the contents of the file, if found. Allocated via malloc().
/// @param o_error A pointer to an error message, if there is one. Allocated via malloc().
///
/// The callback implementor must use malloc() to allocate storage for
/// contents or error. The callback implementor must use free() to free
/// said storage after tolk_compile returns.
///
/// If the callback is not supported, *o_contents and *o_error must be set to NULL.
typedef void (*CStyleReadFileCallback)(char const* _kind, char const* _data, char** o_contents, char** o_error);
/// Callback used to retrieve file contents from a "not file system". See tolk-js for implementation.
/// The callback must fill either destContents or destError.
/// The implementor must use malloc() for them and use free() after tolk_compile returns.
typedef void (*CStyleReadFileCallback)(int kind, char const* data, char** destContents, char** destError);
tolk::ReadCallback::Callback wrapReadCallback(CStyleReadFileCallback _readCallback)
{
tolk::ReadCallback::Callback readCallback;
if (_readCallback) {
readCallback = [=](tolk::ReadCallback::Kind _kind, char const* _data) -> td::Result<std::string> {
char* contents_c = nullptr;
char* error_c = nullptr;
_readCallback(tolk::ReadCallback::kindString(_kind).data(), _data, &contents_c, &error_c);
if (!contents_c && !error_c) {
readCallback = [=](tolk::ReadCallback::Kind kind, char const* data) -> td::Result<std::string> {
char* destContents = nullptr;
char* destError = nullptr;
_readCallback(static_cast<int>(kind), data, &destContents, &destError);
if (!destContents && !destError) {
return td::Status::Error("Callback not supported");
}
if (contents_c) {
return contents_c;
if (destContents) {
return destContents;
}
return td::Status::Error(std::string(error_c));
return td::Status::Error(std::string(destError));
};
}
return readCallback;

View file

@ -24,18 +24,17 @@
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include "srcread.h"
#include "lexer.h"
#include <getopt.h>
#include "git.h"
#include <fstream>
#include "td/utils/port/path.h"
#include <sys/stat.h>
namespace tolk {
int verbosity, indent, opt_level = 2;
bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble;
bool interactive = false;
int verbosity = 0, opt_level = 2;
bool stack_layout_comments = true;
GlobalPragma pragma_allow_post_modification{"allow-post-modification"};
GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"};
GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"};
@ -82,23 +81,13 @@ void GlobalPragma::enable(SrcLocation loc) {
". Please, remove this line from your code.");
return;
}
if (!loc.get_src_file()->is_entrypoint_file()) {
// todo generally it's not true; rework pragmas completely
loc.show_warning(PSTRING() << "#pragma " << name_ <<
" should be used in the main file only.");
}
enabled_ = true;
locs_.push_back(std::move(loc));
}
void GlobalPragma::check_enable_in_libs() {
if (locs_.empty()) {
return;
}
for (const SrcLocation& loc : locs_) {
if (loc.fdescr->is_main) {
return;
}
}
locs_[0].show_warning(PSTRING() << "#pragma " << name_
<< " is enabled in included libraries, it may change the behavior of your code. "
<< "Add this #pragma to the main source file to suppress this warning.");
}
void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) {
@ -109,14 +98,19 @@ void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) {
td::Result<std::string> fs_read_callback(ReadCallback::Kind kind, const char* query) {
switch (kind) {
case ReadCallback::Kind::ReadFile: {
std::ifstream ifs{query};
if (ifs.fail()) {
auto msg = std::string{"cannot open source file `"} + query + "`";
return td::Status::Error(msg);
struct stat f_stat;
int res = stat(query, &f_stat);
if (res != 0) {
return td::Status::Error(std::string{"cannot open source file: "} + query);
}
std::stringstream ss;
ss << ifs.rdbuf();
return ss.str();
size_t file_size = static_cast<size_t>(f_stat.st_size);
std::string str;
str.resize(file_size);
FILE* f = fopen(query, "r");
fread(str.data(), file_size, 1, f);
fclose(f);
return std::move(str);
}
case ReadCallback::Kind::Realpath: {
return td::realpath(td::CSlice(query));
@ -241,7 +235,7 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er
} else if (func_val->is_inline_ref()) {
modifier = "REF";
}
outs << std::string(indent * 2, ' ') << name << " PROC" << modifier << ":<{\n";
outs << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n";
int mode = 0;
if (stack_layout_comments) {
mode |= Stack::_StkCmt | Stack::_CptStkCmt;
@ -255,8 +249,8 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er
if (func_val->is_inline() || func_val->is_inline_ref()) {
mode |= Stack::_InlineAny;
}
code.generate_code(outs, mode, indent + 1);
outs << std::string(indent * 2, ' ') << "}>\n";
code.generate_code(outs, mode, 2);
outs << std::string(2, ' ') << "}>\n";
if (verbosity >= 2) {
errs << "--------------\n";
}
@ -264,13 +258,9 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er
}
int generate_output(std::ostream &outs, std::ostream &errs) {
if (asm_preamble) {
outs << "\"Asm.fif\" include\n";
}
outs << "\"Asm.fif\" include\n";
outs << "// automatically generated from " << generated_from << std::endl;
if (program_envelope) {
outs << "PROGRAM{\n";
}
outs << "PROGRAM{\n";
mark_used_symbols();
for (SymDef* func_sym : glob_func) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
@ -283,7 +273,7 @@ int generate_output(std::ostream &outs, std::ostream &errs) {
}
std::string name = symbols.get_name(func_sym->sym_idx);
outs << std::string(indent * 2, ' ');
outs << std::string(2, ' ');
if (func_val->method_id.is_null()) {
outs << "DECLPROC " << name << "\n";
} else {
@ -300,7 +290,7 @@ int generate_output(std::ostream &outs, std::ostream &errs) {
continue;
}
std::string name = symbols.get_name(gvar_sym->sym_idx);
outs << std::string(indent * 2, ' ') << "DECLGLOBVAR " << name << "\n";
outs << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n";
}
int errors = 0;
for (SymDef* func_sym : glob_func) {
@ -310,76 +300,46 @@ int generate_output(std::ostream &outs, std::ostream &errs) {
}
try {
generate_output_func(func_sym, outs, errs);
} catch (Error& err) {
} catch (ParseError& err) {
errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n"
<< err << std::endl;
++errors;
}
}
if (program_envelope) {
outs << "}END>c\n";
}
outs << "}END>c\n";
if (!boc_output_filename.empty()) {
outs << "2 boc+>B \"" << boc_output_filename << "\" B>file\n";
outs << "boc>B \"" << boc_output_filename << "\" B>file\n";
}
return errors;
}
void output_inclusion_stack(std::ostream &errs) {
while (!inclusion_locations.empty()) {
SrcLocation loc = inclusion_locations.top();
inclusion_locations.pop();
if (loc.fdescr) {
errs << "note: included from ";
loc.show(errs);
errs << std::endl;
}
}
}
int tolk_proceed(const std::vector<std::string> &sources, std::ostream &outs, std::ostream &errs) {
if (program_envelope && !indent) {
indent = 1;
}
define_keywords();
int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs) {
define_builtins();
lexer_init();
pragma_allow_post_modification.always_on_and_deprecated("0.5.0");
pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0");
int ok = 0, proc = 0;
try {
for (auto src : sources) {
ok += parse_source_file(src.c_str(), {}, true);
proc++;
}
if (interactive) {
generated_from += "stdin ";
ok += parse_source_stdin();
proc++;
}
if (ok < proc) {
bool ok = parse_source_file(entrypoint_file_name.c_str(), {});
if (!ok) {
throw Fatal{"output code generation omitted because of errors"};
}
if (!proc) {
throw Fatal{"no source files, no output"};
}
pragma_remove_unused_functions.check_enable_in_libs();
// todo #ifdef TOLK_PROFILING + comment
// lexer_measure_performance(all_src_files.get_all_files());
return generate_output(outs, errs);
} catch (Fatal& fatal) {
errs << "fatal: " << fatal << std::endl;
output_inclusion_stack(errs);
return 2;
} catch (Error& error) {
} catch (ParseError& error) {
errs << error << std::endl;
output_inclusion_stack(errs);
return 2;
} catch (UnifyError& unif_err) {
errs << "fatal: ";
unif_err.print_message(errs);
errs << std::endl;
output_inclusion_stack(errs);
return 2;
}

View file

@ -15,6 +15,7 @@
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <utility>
#include <vector>
#include <string>
#include <set>
@ -26,7 +27,7 @@
#include "common/refcnt.hpp"
#include "common/bigint.hpp"
#include "common/refint.h"
#include "srcread.h"
#include "src-file.h"
#include "lexer.h"
#include "symtable.h"
#include "td/utils/Status.h"
@ -45,104 +46,6 @@ constexpr int optimize_depth = 20;
const std::string tolk_version{"0.4.5"};
enum Keyword {
_Eof = -1,
_Ident = 0,
_Number,
_Special,
_String,
_Return = 0x80,
_Var,
_Repeat,
_Do,
_While,
_Until,
_Try,
_Catch,
_If,
_Ifnot,
_Then,
_Else,
_Elseif,
_Elseifnot,
_Eq,
_Neq,
_Leq,
_Geq,
_Spaceship,
_Lshift,
_Rshift,
_RshiftR,
_RshiftC,
_DivR,
_DivC,
_ModR,
_ModC,
_DivMod,
_PlusLet,
_MinusLet,
_TimesLet,
_DivLet,
_DivRLet,
_DivCLet,
_ModLet,
_ModRLet,
_ModCLet,
_LshiftLet,
_RshiftLet,
_RshiftRLet,
_RshiftCLet,
_AndLet,
_OrLet,
_XorLet,
_Int,
_Cell,
_Slice,
_Builder,
_Cont,
_Tuple,
_Type,
_Mapsto,
_Forall,
_Asm,
_Impure,
_Pure,
_Global,
_Extern,
_Inline,
_InlineRef,
_Builtin,
_AutoApply,
_MethodId,
_Get,
_Operator,
_Infix,
_Infixl,
_Infixr,
_Const,
_PragmaHashtag,
_IncludeHashtag
};
void define_keywords();
class IdSc {
int cls;
public:
enum { undef = 0, dotid = 1, tildeid = 2 };
IdSc(int _cls = undef) : cls(_cls) {
}
operator int() {
return cls;
}
};
// symbol subclass:
// 1 = begins with . (a const method)
// 2 = begins with ~ (a non-const method)
// 0 = else
/*
*
* TYPE EXPRESSIONS
@ -152,13 +55,13 @@ class IdSc {
struct TypeExpr {
enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll } constr;
enum AtomicType {
_Int = Keyword::_Int,
_Cell = Keyword::_Cell,
_Slice = Keyword::_Slice,
_Builder = Keyword::_Builder,
_Cont = Keyword::_Cont,
_Tuple = Keyword::_Tuple,
_Type = Keyword::_Type
_Int = tok_int,
_Cell = tok_cell,
_Slice = tok_slice,
_Builder = tok_builder,
_Cont = tok_cont,
_Tuple = tok_tuple,
_Type = tok_type
};
int value;
int minw, maxw;
@ -279,14 +182,18 @@ struct TypeExpr {
std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr);
struct UnifyError {
struct UnifyError : std::exception {
TypeExpr* te1;
TypeExpr* te2;
std::string msg;
UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(_msg) {
UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(std::move(_msg)) {
}
void print_message(std::ostream& os) const;
std::string message() const;
const char* what() const noexcept override {
return msg.c_str();
}
};
std::ostream& operator<<(std::ostream& os, const UnifyError& ue);
@ -310,18 +217,13 @@ struct TmpVar {
int cls;
sym_idx_t name;
int coord;
std::unique_ptr<SrcLocation> where;
std::vector<std::function<void(const SrcLocation &)>> on_modification;
bool undefined = false;
TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type = 0, SymDef* sym = 0, const SrcLocation* loc = 0);
SrcLocation where;
std::vector<std::function<void(SrcLocation)>> on_modification;
TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc);
void show(std::ostream& os, int omit_idx = 0) const;
void dump(std::ostream& os) const;
void set_location(const SrcLocation& loc);
std::string to_string() const {
std::ostringstream s;
show(s, 2);
return s.str();
}
void set_location(SrcLocation loc);
};
struct VarDescr {
@ -566,25 +468,25 @@ struct Op {
std::unique_ptr<Op> block0, block1;
td::RefInt256 int_const;
std::string str_const;
Op(const SrcLocation& _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) {
Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) {
}
Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left)
Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left)
: cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) {
}
Op(const SrcLocation& _where, OpKind _cl, std::vector<var_idx_t>&& _left)
Op(SrcLocation _where, OpKind _cl, std::vector<var_idx_t>&& _left)
: cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) {
}
Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left, td::RefInt256 _const)
Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left, td::RefInt256 _const)
: cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) {
}
Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left, std::string _const)
Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left, std::string _const)
: cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) {
}
Op(const SrcLocation& _where, OpKind _cl, const std::vector<var_idx_t>& _left, const std::vector<var_idx_t>& _right,
Op(SrcLocation _where, OpKind _cl, const std::vector<var_idx_t>& _left, const std::vector<var_idx_t>& _right,
SymDef* _fun = nullptr)
: cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) {
}
Op(const SrcLocation& _where, OpKind _cl, std::vector<var_idx_t>&& _left, std::vector<var_idx_t>&& _right,
Op(SrcLocation _where, OpKind _cl, std::vector<var_idx_t>&& _left, std::vector<var_idx_t>&& _right,
SymDef* _fun = nullptr)
: cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) {
}
@ -700,8 +602,8 @@ struct CodeBlob {
return res;
}
bool import_params(FormalArgList arg_list);
var_idx_t create_var(int cls, TypeExpr* var_type = 0, SymDef* sym = 0, const SrcLocation* loc = 0);
var_idx_t create_tmp_var(TypeExpr* var_type = 0, const SrcLocation* loc = 0) {
var_idx_t create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation loc);
var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) {
return create_var(TmpVar::_Tmp, var_type, nullptr, loc);
}
int split_vars(bool strict = false);
@ -712,14 +614,14 @@ struct CodeBlob {
cur_ops_stack.push(cur_ops);
cur_ops = &new_cur_ops;
}
void close_blk(const SrcLocation& location) {
void close_blk(SrcLocation location) {
*cur_ops = std::make_unique<Op>(location, Op::_Nop);
}
void pop_cur() {
cur_ops = cur_ops_stack.top();
cur_ops_stack.pop();
}
void close_pop_cur(const SrcLocation& location) {
void close_pop_cur(SrcLocation location) {
close_blk(location);
pop_cur();
}
@ -730,7 +632,7 @@ struct CodeBlob {
void generate_code(AsmOpList& out_list, int mode = 0);
void generate_code(std::ostream& os, int mode = 0, int indent = 0);
void on_var_modification(var_idx_t idx, const SrcLocation& here) const {
void on_var_modification(var_idx_t idx, SrcLocation here) const {
for (auto& f : vars.at(idx).on_modification) {
f(here);
}
@ -746,8 +648,8 @@ struct CodeBlob {
struct SymVal : SymValBase {
TypeExpr* sym_type;
bool auto_apply{false};
SymVal(int _type, int _idx, TypeExpr* _stype = nullptr)
: SymValBase(_type, _idx), sym_type(_stype) {
SymVal(SymValKind kind, int idx, TypeExpr* sym_type = nullptr)
: SymValBase(kind, idx), sym_type(sym_type) {
}
~SymVal() override = default;
TypeExpr* get_type() const {
@ -774,9 +676,9 @@ struct SymValFunc : SymVal {
#endif
~SymValFunc() override = default;
SymValFunc(int val, TypeExpr* _ft, bool marked_as_pure)
: SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {}
: SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {}
SymValFunc(int val, TypeExpr* _ft, std::initializer_list<int> _arg_order, std::initializer_list<int> _ret_order, bool marked_as_pure)
: SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) {
: SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) {
}
const std::vector<int>* get_arg_order() const {
@ -818,7 +720,7 @@ struct SymValCodeFunc : SymValFunc {
struct SymValType : SymValBase {
TypeExpr* sym_type;
SymValType(int _type, int _idx, TypeExpr* _stype = nullptr) : SymValBase(_type, _idx), sym_type(_stype) {
SymValType(SymValKind kind, int idx, TypeExpr* _stype = nullptr) : SymValBase(kind, idx), sym_type(_stype) {
}
~SymValType() override = default;
TypeExpr* get_type() const {
@ -834,7 +736,7 @@ struct SymValGlobVar : SymValBase {
std::string name; // seeing variable name in debugger makes it much easier to delve into Tolk sources
#endif
SymValGlobVar(int val, TypeExpr* gvtype, int oidx = 0)
: SymValBase(_GlobVar, val), sym_type(gvtype), out_idx(oidx) {
: SymValBase(SymValKind::_GlobVar, val), sym_type(gvtype), out_idx(oidx) {
}
~SymValGlobVar() override = default;
TypeExpr* get_type() const {
@ -843,16 +745,16 @@ struct SymValGlobVar : SymValBase {
};
struct SymValConst : SymValBase {
enum ConstKind { IntConst, SliceConst };
td::RefInt256 intval;
std::string strval;
Keyword type;
ConstKind kind;
SymValConst(int idx, td::RefInt256 value)
: SymValBase(_Const, idx), intval(value) {
type = _Int;
: SymValBase(SymValKind::_Const, idx), intval(value), kind(IntConst) {
}
SymValConst(int idx, std::string value)
: SymValBase(_Const, idx), strval(value) {
type = _Slice;
: SymValBase(SymValKind::_Const, idx), strval(value), kind(SliceConst) {
}
~SymValConst() override = default;
td::RefInt256 get_int_value() const {
@ -861,8 +763,8 @@ struct SymValConst : SymValBase {
std::string get_str_value() const {
return strval;
}
Keyword get_type() const {
return type;
ConstKind get_kind() const {
return kind;
}
};
@ -882,35 +784,21 @@ public:
ReadCallback(ReadCallback const&) = delete;
ReadCallback& operator=(ReadCallback const&) = delete;
enum class Kind
{
enum class Kind {
Realpath,
ReadFile,
Realpath
};
static std::string kindString(Kind _kind)
{
switch (_kind)
{
case Kind::ReadFile:
return "source";
case Kind::Realpath:
return "realpath";
default:
throw ""; // todo ?
}
}
/// File reading or generic query callback.
using Callback = std::function<td::Result<std::string>(ReadCallback::Kind, const char*)>;
using Callback = std::function<td::Result<std::string>(Kind, const char*)>;
};
// defined in parse-tolk.cpp
bool parse_source(std::istream* is, const FileDescr* fdescr);
bool parse_source_file(const char* filename, Lexem lex = {}, bool is_main = false);
bool parse_source_stdin();
void parse_source(const SrcFile* file);
bool parse_source_file(const char* filename, SrcLocation loc_included_from);
extern std::stack<SrcLocation> inclusion_locations;
extern AllRegisteredSrcFiles all_src_files;
/*
*
@ -949,7 +837,7 @@ struct Expr {
std::vector<Expr*> args;
explicit Expr(ExprCls c = _None) : cls(c) {
}
Expr(ExprCls c, const SrcLocation& loc) : cls(c), here(loc) {
Expr(ExprCls c, SrcLocation loc) : cls(c), here(loc) {
}
Expr(ExprCls c, std::vector<Expr*> _args) : cls(c), args(std::move(_args)) {
}
@ -990,14 +878,13 @@ struct Expr {
bool is_mktuple() const {
return cls == _MkTuple;
}
void chk_rvalue(const Lexem& lem) const;
void chk_lvalue(const Lexem& lem) const;
void chk_type(const Lexem& lem) const;
bool deduce_type(const Lexem& lem);
void set_location(const SrcLocation& loc) {
void chk_rvalue(const Lexer& lex) const; // todo here and below: strange to pass Lexer
void chk_lvalue(const Lexer& lex) const;
bool deduce_type(const Lexer& lex);
void set_location(SrcLocation loc) {
here = loc;
}
const SrcLocation& get_location() const {
SrcLocation get_location() const {
return here;
}
int define_new_vars(CodeBlob& code);
@ -1699,11 +1586,11 @@ struct Stack {
*
*/
typedef std::function<AsmOp(std::vector<VarDescr>&, std::vector<VarDescr>&, const SrcLocation)> simple_compile_func_t;
typedef std::function<AsmOp(std::vector<VarDescr>&, std::vector<VarDescr>&, SrcLocation)> simple_compile_func_t;
typedef std::function<bool(AsmOpList&, std::vector<VarDescr>&, std::vector<VarDescr>&)> compile_func_t;
inline simple_compile_func_t make_simple_compile(AsmOp op) {
return [op](std::vector<VarDescr>& out, std::vector<VarDescr>& in, const SrcLocation&) -> AsmOp { return op; };
return [op](std::vector<VarDescr>& out, std::vector<VarDescr>& in, SrcLocation) -> AsmOp { return op; };
}
inline compile_func_t make_ext_compile(std::vector<AsmOp>&& ops) {
@ -1739,7 +1626,7 @@ struct SymValAsmFunc : SymValFunc {
std::initializer_list<int> ret_order = {}, bool marked_as_pure = false)
: SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), ext_compile(std::move(_compile)) {
}
bool compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in, const SrcLocation& where) const;
bool compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in, SrcLocation where) const;
};
// defined in builtins.cpp
@ -1753,8 +1640,8 @@ AsmOp push_const(td::RefInt256 x);
void define_builtins();
extern int verbosity, indent, opt_level;
extern bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble, interactive;
extern int verbosity, opt_level;
extern bool stack_layout_comments;
extern std::string generated_from, boc_output_filename;
extern ReadCallback::Callback read_callback;
@ -1764,6 +1651,7 @@ class GlobalPragma {
public:
explicit GlobalPragma(std::string name) : name_(std::move(name)) {
}
const std::string& name() const {
return name_;
}
@ -1771,14 +1659,12 @@ class GlobalPragma {
return enabled_;
}
void enable(SrcLocation loc);
void check_enable_in_libs();
void always_on_and_deprecated(const char *deprecated_from_v);
private:
std::string name_;
bool enabled_ = false;
const char *deprecated_from_v_ = nullptr;
std::vector<SrcLocation> locs_;
};
extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, pragma_remove_unused_functions;
@ -1788,7 +1674,7 @@ extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, prag
*
*/
int tolk_proceed(const std::vector<std::string> &sources, std::ostream &outs, std::ostream &errs);
int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs);
} // namespace tolk

View file

@ -354,12 +354,6 @@ std::ostream& operator<<(std::ostream& os, const UnifyError& ue) {
return os;
}
std::string UnifyError::message() const {
std::ostringstream os;
print_message(os);
return os.str();
}
void check_width_compat(TypeExpr* te1, TypeExpr* te2) {
if (te1->minw > te2->maxw || te2->minw > te1->maxw) {
std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate};