diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt
index a47c7614..5306354d 100644
--- a/tolk/CMakeLists.txt
+++ b/tolk/CMakeLists.txt
@@ -5,8 +5,10 @@ set(TOLK_SOURCE
lexer.cpp
symtable.cpp
compiler-state.cpp
+ ast.cpp
+ ast-from-tokens.cpp
+ ast-to-legacy.cpp
unify-types.cpp
- parse-tolk.cpp
abscode.cpp
gen-abscode.cpp
analyzer.cpp
diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp
index c028a531..0702b1b9 100644
--- a/tolk/abscode.cpp
+++ b/tolk/abscode.cpp
@@ -25,8 +25,8 @@ namespace tolk {
*
*/
-TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc)
- : v_type(_type), idx(_idx), cls(_cls), coord(0), where(loc) {
+TmpVar::TmpVar(var_idx_t _idx, bool _is_tmp_unnamed, TypeExpr* _type, SymDef* sym, SrcLocation loc)
+ : v_type(_type), idx(_idx), is_tmp_unnamed(_is_tmp_unnamed), coord(0), where(loc) {
if (sym) {
name = sym->sym_idx;
sym->value->idx = _idx;
@@ -59,9 +59,9 @@ void TmpVar::dump(std::ostream& os) const {
}
void TmpVar::show(std::ostream& os, int omit_idx) const {
- if (cls & _Named) {
+ if (!is_tmp_unnamed) {
os << G.symbols.get_name(name);
- if (omit_idx && (omit_idx >= 2 || (cls & _UniqueName))) {
+ if (omit_idx >= 2) {
return;
}
}
@@ -474,8 +474,8 @@ void CodeBlob::print(std::ostream& os, int flags) const {
os << "-------- END ---------\n\n";
}
-var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation location) {
- vars.emplace_back(var_cnt, cls, var_type, sym, location);
+var_idx_t CodeBlob::create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation location) {
+ vars.emplace_back(var_cnt, is_tmp_unnamed, var_type, sym, location);
if (sym) {
sym->value->idx = var_cnt;
}
@@ -492,7 +492,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) {
SymDef* arg_sym;
SrcLocation arg_loc;
std::tie(arg_type, arg_sym, arg_loc) = par;
- list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, arg_loc));
+ list.push_back(create_var(arg_sym == nullptr, arg_type, arg_sym, arg_loc));
}
emplace_back(loc, Op::_Import, list);
in_var_cnt = var_cnt;
diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp
index 91b66ae9..cefa83b9 100644
--- a/tolk/analyzer.cpp
+++ b/tolk/analyzer.cpp
@@ -46,7 +46,7 @@ int CodeBlob::split_vars(bool strict) {
if (k != 1) {
var.coord = ~((n << 8) + k);
for (int i = 0; i < k; i++) {
- auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where);
+ auto v = create_var(vars[j].is_tmp_unnamed, comp_types[i], 0, vars[j].where);
tolk_assert(v == n + i);
tolk_assert(vars[v].idx == v);
vars[v].name = vars[j].name;
diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp
new file mode 100644
index 00000000..38657684
--- /dev/null
+++ b/tolk/ast-from-tokens.cpp
@@ -0,0 +1,877 @@
+/*
+ This file is part of TON Blockchain Library.
+
+ TON Blockchain Library is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ TON Blockchain Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with TON Blockchain Library. If not, see .
+*/
+#include "ast-from-tokens.h"
+#include "ast.h"
+#include "platform-utils.h"
+#include "type-expr.h"
+
+/*
+ * Here we construct AST for a tolk file.
+ * While constructing, no global state is modified.
+ * Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on.
+ * There was no way to perform any more or less semantic analysis.
+ * Implementing AST gives a giant advance for future modifications and stability.
+ */
+
+namespace tolk {
+
+// given a token, determine whether it's <, or >, or similar
+static bool is_comparison_binary_op(TokenType tok) {
+ return tok == tok_lt || tok == tok_gt || tok == tok_leq || tok == tok_geq || tok == tok_eq || tok == tok_neq || tok == tok_spaceship;
+}
+
+// same as above, but to detect bitwise operators: & | ^
+// (in Tolk, they are used as logical ones due to absence of a boolean type and && || operators)
+static bool is_bitwise_binary_op(TokenType tok) {
+ return tok == tok_bitwise_and || tok == tok_bitwise_or || tok == tok_bitwise_xor;
+}
+
+// same as above, but to detect addition/subtraction
+static bool is_add_or_sub_binary_op(TokenType tok) {
+ return tok == tok_plus || tok == tok_minus;
+}
+
+// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected)
+// it would better be a warning, but we decided to make it a strict error
+GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
+static void fire_error_lower_precedence(SrcLocation loc, std::string_view op_lower, std::string_view op_higher) {
+ std::string name_lower = static_cast(op_lower);
+ std::string name_higher = static_cast(op_higher);
+ throw ParseError(loc, name_lower + " has lower precedence than " + name_higher +
+ ", probably this code won't work as you expected. "
+ "Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error.");
+}
+
+// fire an error for a case "arg1 & arg2 | arg3"
+GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
+static void fire_error_mix_bitwise_and_or(SrcLocation loc, std::string_view op1, std::string_view op2) {
+ std::string name1 = static_cast(op1);
+ std::string name2 = static_cast(op2);
+ throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis"
+ ", probably this code won't work as you expected. "
+ "Use parenthesis to emphasize operator precedence.");
+}
+
+// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence
+// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected
+// the only way to suppress this error for the programmer is to use parenthesis
+// (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator},
+// that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis)
+static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyV lhs, AnyV rhs) {
+ // handle "flags & 0xFF != 0" (rhs = "0xFF != 0")
+ if (rhs->type == ast_binary_operator && is_comparison_binary_op(rhs->as()->tok)) {
+ fire_error_lower_precedence(loc, operator_name, rhs->as()->operator_name);
+ }
+
+ // handle "0 != flags & 0xFF" (lhs = "0 != flags")
+ if (lhs->type == ast_binary_operator && is_comparison_binary_op(lhs->as()->tok)) {
+ fire_error_lower_precedence(loc, operator_name, lhs->as()->operator_name);
+ }
+
+ // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2")
+ if (lhs->type == ast_binary_operator && is_bitwise_binary_op(lhs->as()->tok) && lhs->as()->operator_name != operator_name) {
+ fire_error_mix_bitwise_and_or(loc, lhs->as()->operator_name, operator_name);
+ }
+}
+
+// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected)
+static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyV rhs) {
+ if (rhs->type == ast_binary_operator && is_add_or_sub_binary_op(rhs->as()->tok)) {
+ fire_error_lower_precedence(loc, bitshift_operator_name, rhs->as()->operator_name);
+ }
+}
+
+/*
+ *
+ * PARSE SOURCE
+ *
+ */
+
+// TE ::= TA | TA -> TE
+// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ]
+TypeExpr* parse_type(Lexer& lex, V forall_list);
+
+TypeExpr* parse_type1(Lexer& lex, V forall_list) {
+ switch (lex.tok()) {
+ case tok_int:
+ lex.next();
+ return TypeExpr::new_atomic(TypeExpr::_Int);
+ case tok_cell:
+ lex.next();
+ return TypeExpr::new_atomic(TypeExpr::_Cell);
+ case tok_slice:
+ lex.next();
+ return TypeExpr::new_atomic(TypeExpr::_Slice);
+ case tok_builder:
+ lex.next();
+ return TypeExpr::new_atomic(TypeExpr::_Builder);
+ case tok_cont:
+ lex.next();
+ return TypeExpr::new_atomic(TypeExpr::_Cont);
+ case tok_tuple:
+ lex.next();
+ return TypeExpr::new_atomic(TypeExpr::_Tuple);
+ case tok_var:
+ case tok_underscore:
+ lex.next();
+ return TypeExpr::new_hole();
+ case tok_identifier: {
+ if (int idx = forall_list ? forall_list->lookup_idx(lex.cur_str()) : -1; idx != -1) {
+ lex.next();
+ return forall_list->get_item(idx)->created_type;
+ }
+ lex.error("Is not a type identifier");
+ }
+ default:
+ break;
+ }
+ TokenType c;
+ if (lex.tok() == tok_opbracket) {
+ lex.next();
+ c = tok_clbracket;
+ } else {
+ lex.expect(tok_oppar, "");
+ c = tok_clpar;
+ }
+ if (lex.tok() == c) {
+ lex.next();
+ return c == tok_clpar ? TypeExpr::new_unit() : TypeExpr::new_tuple({});
+ }
+ auto t1 = parse_type(lex, forall_list);
+ if (lex.tok() == tok_clpar) {
+ lex.expect(c, c == tok_clpar ? "')'" : "']'");
+ return t1;
+ }
+ std::vector tlist{1, t1};
+ while (lex.tok() == tok_comma) {
+ lex.next();
+ tlist.push_back(parse_type(lex, forall_list));
+ }
+ lex.expect(c, c == tok_clpar ? "')'" : "']'");
+ return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist));
+}
+
+TypeExpr* parse_type(Lexer& lex, V forall_list) {
+ TypeExpr* res = parse_type1(lex, forall_list);
+ if (lex.tok() == tok_mapsto) {
+ lex.next();
+ TypeExpr* to = parse_type(lex, forall_list);
+ return TypeExpr::new_map(res, to);
+ }
+ return res;
+}
+
+AnyV parse_argument(Lexer& lex, V forall_list) {
+ TypeExpr* arg_type = nullptr;
+ SrcLocation loc = lex.cur_location();
+ if (lex.tok() == tok_underscore) {
+ lex.next();
+ if (lex.tok() == tok_comma || lex.tok() == tok_clpar) {
+ return createV(loc, "", TypeExpr::new_hole());
+ }
+ arg_type = TypeExpr::new_hole();
+ loc = lex.cur_location();
+ } else if (lex.tok() != tok_identifier) { // int, cell, [X], etc.
+ arg_type = parse_type(lex, forall_list);
+ } else if (lex.tok() == tok_identifier) {
+ if (forall_list && forall_list->lookup_idx(lex.cur_str()) != -1) {
+ arg_type = parse_type(lex, forall_list);
+ } else {
+ arg_type = TypeExpr::new_hole();
+ }
+ } else {
+ lex.error("Is not a type identifier");
+ }
+ if (lex.tok() == tok_underscore || lex.tok() == tok_comma || lex.tok() == tok_clpar) {
+ if (lex.tok() == tok_underscore) {
+ loc = lex.cur_location();
+ lex.next();
+ }
+ return createV(loc, "", arg_type);
+ }
+ lex.check(tok_identifier, "parameter name");
+ loc = lex.cur_location();
+ std::string_view arg_name = lex.cur_str();
+ lex.next();
+ return createV(loc, arg_name, arg_type);
+}
+
+AnyV parse_global_var_declaration(Lexer& lex) {
+ TypeExpr* declared_type = nullptr;
+ SrcLocation loc = lex.cur_location();
+ if (lex.tok() == tok_underscore) {
+ lex.next();
+ declared_type = TypeExpr::new_hole();
+ loc = lex.cur_location();
+ } else if (lex.tok() != tok_identifier) {
+ declared_type = parse_type(lex, nullptr);
+ }
+ lex.check(tok_identifier, "global variable name");
+ std::string_view var_name = lex.cur_str();
+ lex.next();
+ return createV(loc, var_name, declared_type);
+}
+
+AnyV parse_expr(Lexer& lex);
+
+AnyV parse_constant_declaration(Lexer& lex) {
+ TypeExpr *declared_type = nullptr;
+ if (lex.tok() == tok_int) {
+ declared_type = TypeExpr::new_atomic(TypeExpr::_Int);
+ lex.next();
+ } else if (lex.tok() == tok_slice) {
+ declared_type = TypeExpr::new_atomic(TypeExpr::_Slice);
+ lex.next();
+ }
+ lex.check(tok_identifier, "constant name");
+ SrcLocation loc = lex.cur_location();
+ std::string_view const_name = lex.cur_str();
+ lex.next();
+ lex.expect(tok_assign, "'='");
+ AnyV init_value = parse_expr(lex);
+ return createV(loc, const_name, declared_type, init_value);
+}
+
+AnyV parse_argument_list(Lexer& lex, V forall_list) {
+ SrcLocation loc = lex.cur_location();
+ std::vector args;
+ lex.expect(tok_oppar, "argument list");
+ if (lex.tok() != tok_clpar) {
+ args.push_back(parse_argument(lex, forall_list));
+ while (lex.tok() == tok_comma) {
+ lex.next();
+ args.push_back(parse_argument(lex, forall_list));
+ }
+ }
+ lex.expect(tok_clpar, "')'");
+ return createV(loc, std::move(args));
+}
+
+AnyV parse_constant_declaration_list(Lexer& lex) {
+ std::vector consts;
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_const, "'const'");
+ while (true) {
+ consts.push_back(parse_constant_declaration(lex));
+ if (lex.tok() != tok_comma) {
+ break;
+ }
+ lex.expect(tok_comma, "','");
+ }
+ lex.expect(tok_semicolon, "';'");
+ return createV(loc, std::move(consts));
+}
+
+AnyV parse_global_var_declaration_list(Lexer& lex) {
+ std::vector globals;
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_global, "'global'");
+ while (true) {
+ globals.push_back(parse_global_var_declaration(lex));
+ if (lex.tok() != tok_comma) {
+ break;
+ }
+ lex.expect(tok_comma, "','");
+ }
+ lex.expect(tok_semicolon, "';'");
+ return createV(loc, std::move(globals));
+}
+
+// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _
+AnyV parse_expr100(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ if (lex.tok() == tok_oppar) {
+ lex.next();
+ if (lex.tok() == tok_clpar) {
+ lex.next();
+ return createV(loc, {});
+ }
+ AnyV res = parse_expr(lex);
+ if (lex.tok() == tok_clpar) {
+ lex.next();
+ return createV(loc, res);
+ }
+ std::vector items;
+ bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)`
+ items.emplace_back(res);
+ while (lex.tok() == tok_comma) {
+ lex.next();
+ AnyV item = parse_expr(lex);
+ if (is_type_expression != (item->type == ast_type_expression)) {
+ lex.error("mixing type and non-type expressions inside the same tuple");
+ }
+ items.emplace_back(item);
+ }
+ lex.expect(tok_clpar, "')'");
+ if (is_type_expression) {
+ std::vector types;
+ types.reserve(items.size());
+ for (AnyV item : items) {
+ types.emplace_back(item->as()->declared_type);
+ }
+ return createV(loc, TypeExpr::new_tensor(std::move(types)));
+ }
+ return createV(loc, std::move(items));
+ }
+ if (lex.tok() == tok_opbracket) {
+ lex.next();
+ if (lex.tok() == tok_clbracket) {
+ lex.next();
+ return createV(loc, {});
+ }
+ AnyV res = parse_expr(lex);
+ std::vector items;
+ bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)`
+ items.emplace_back(res);
+ while (lex.tok() == tok_comma) {
+ lex.next();
+ AnyV item = parse_expr(lex);
+ if (is_type_expression != (item->type == ast_type_expression)) {
+ lex.error("mixing type and non-type expressions inside the same tuple");
+ }
+ items.emplace_back(item);
+ }
+ lex.expect(tok_clbracket, "']'");
+ if (is_type_expression) {
+ std::vector types;
+ types.reserve(items.size());
+ for (AnyV item : items) {
+ types.emplace_back(item->as()->declared_type);
+ }
+ return createV(loc, TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types))));
+ }
+ return createV(loc, std::move(items));
+ }
+ TokenType t = lex.tok();
+ if (t == tok_int_const) {
+ std::string_view int_val = lex.cur_str();
+ lex.next();
+ return createV(loc, int_val);
+ }
+ if (t == tok_string_const) {
+ std::string_view str_val = lex.cur_str();
+ lex.next();
+ char modifier = 0;
+ if (lex.tok() == tok_string_modifier) {
+ modifier = lex.cur_str()[0];
+ lex.next();
+ }
+ return createV(loc, str_val, modifier);
+ }
+ if (t == tok_underscore) {
+ lex.next();
+ return createV(loc);
+ }
+ if (t == tok_var) {
+ lex.next();
+ return createV(loc, TypeExpr::new_hole());
+ }
+ if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_cont || t == tok_tuple) {
+ lex.next();
+ return createV(loc, TypeExpr::new_atomic(t));
+ }
+ if (t == tok_true || t == tok_false) {
+ lex.next();
+ return createV(loc, t == tok_true);
+ }
+ if (t == tok_nil) {
+ lex.next();
+ return createV(loc);
+ }
+ if (t == tok_identifier) {
+ std::string_view str_val = lex.cur_str();
+ lex.next();
+ return createV(loc, str_val);
+ }
+ lex.expect(tok_identifier, "identifier");
+ return nullptr;
+}
+
+// parse E { E }
+AnyV parse_expr90(Lexer& lex) {
+ AnyV res = parse_expr100(lex);
+ while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && lex.cur_str()[0] != '.' && lex.cur_str()[0] != '~')) {
+ if (const auto* v_type_expr = res->try_as()) {
+ AnyV dest = parse_expr100(lex);
+ return createV(v_type_expr->loc, v_type_expr->declared_type, dest);
+ } else {
+ AnyV arg = parse_expr100(lex);
+ return createV(res->loc, res, arg);
+ }
+ }
+ return res;
+}
+
+// parse E { .method E | ~method E }
+AnyV parse_expr80(Lexer& lex) {
+ AnyV lhs = parse_expr90(lex);
+ while (lex.tok() == tok_identifier && (lex.cur_str()[0] == '.' || lex.cur_str()[0] == '~')) {
+ std::string_view method_name = lex.cur_str();
+ SrcLocation loc = lex.cur_location();
+ lex.next();
+ const ASTNodeBase *arg = parse_expr100(lex);
+ lhs = createV(loc, method_name, lhs, arg);
+ }
+ return lhs;
+}
+
+// parse [ ~ | - | + ] E
+AnyV parse_expr75(Lexer& lex) {
+ TokenType t = lex.tok();
+ if (t == tok_bitwise_not || t == tok_minus || t == tok_plus) {
+ SrcLocation loc = lex.cur_location();
+ std::string_view operator_name = lex.cur_str();
+ lex.next();
+ AnyV rhs = parse_expr75(lex);
+ return createV(loc, operator_name, t, rhs);
+ } else {
+ return parse_expr80(lex);
+ }
+}
+
+// parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E }
+AnyV parse_expr30(Lexer& lex) {
+ AnyV lhs = parse_expr75(lex);
+ TokenType t = lex.tok();
+ while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || t == tok_divC ||
+ t == tok_divR || t == tok_modC || t == tok_modR) {
+ SrcLocation loc = lex.cur_location();
+ std::string_view operator_name = lex.cur_str();
+ lex.next();
+ AnyV rhs = parse_expr75(lex);
+ lhs = createV(loc, operator_name, t, lhs, rhs);
+ t = lex.tok();
+ }
+ return lhs;
+}
+
+// parse E { (+ | -) E }
+AnyV parse_expr20(Lexer& lex) {
+ AnyV lhs = parse_expr30(lex);
+ TokenType t = lex.tok();
+ while (t == tok_minus || t == tok_plus) {
+ SrcLocation loc = lex.cur_location();
+ std::string_view operator_name = lex.cur_str();
+ lex.next();
+ AnyV rhs = parse_expr30(lex);
+ lhs = createV(loc, operator_name, t, lhs, rhs);
+ t = lex.tok();
+ }
+ return lhs;
+}
+
+// parse E { ( << | >> | ~>> | ^>> ) E }
+AnyV parse_expr17(Lexer& lex) {
+ AnyV lhs = parse_expr20(lex);
+ TokenType t = lex.tok();
+ while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) {
+ SrcLocation loc = lex.cur_location();
+ std::string_view operator_name = lex.cur_str();
+ lex.next();
+ AnyV rhs = parse_expr20(lex);
+ diagnose_addition_in_bitshift(loc, operator_name, rhs);
+ lhs = createV(loc, operator_name, t, lhs, rhs);
+ t = lex.tok();
+ }
+ return lhs;
+}
+
+// parse E [ (== | < | > | <= | >= | != | <=> ) E ]
+AnyV parse_expr15(Lexer& lex) {
+ AnyV lhs = parse_expr17(lex);
+ TokenType t = lex.tok();
+ if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) {
+ SrcLocation loc = lex.cur_location();
+ std::string_view operator_name = lex.cur_str();
+ lex.next();
+ AnyV rhs = parse_expr17(lex);
+ lhs = createV(loc, operator_name, t, lhs, rhs);
+ }
+ return lhs;
+}
+
+// parse E { ( & | `|` | ^ ) E }
+AnyV parse_expr14(Lexer& lex) {
+ AnyV lhs = parse_expr15(lex);
+ TokenType t = lex.tok();
+ while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) {
+ SrcLocation loc = lex.cur_location();
+ std::string_view operator_name = lex.cur_str();
+ lex.next();
+ AnyV rhs = parse_expr15(lex);
+ diagnose_bitwise_precedence(loc, operator_name, lhs, rhs);
+ lhs = createV(loc, operator_name, t, lhs, rhs);
+ t = lex.tok();
+ }
+ return lhs;
+}
+
+// parse E [ ? E : E ]
+AnyV parse_expr13(Lexer& lex) {
+ AnyV res = parse_expr14(lex);
+ if (lex.tok() == tok_question) {
+ SrcLocation loc = lex.cur_location();
+ lex.next();
+ AnyV when_true = parse_expr(lex);
+ lex.expect(tok_colon, "':'");
+ AnyV when_false = parse_expr13(lex);
+ return createV(loc, res, when_true, when_false);
+ }
+ return res;
+}
+
+// parse LE1 (= | += | -= | ... ) E2
+AnyV parse_expr10(Lexer& lex) {
+ AnyV lhs = parse_expr13(lex);
+ TokenType t = lex.tok();
+ if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC ||
+ t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC ||
+ t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor ||
+ t == tok_assign) {
+ SrcLocation loc = lex.cur_location();
+ std::string_view operator_name = lex.cur_str();
+ lex.next();
+ AnyV rhs = parse_expr10(lex);
+ return createV(loc, operator_name, t, lhs, rhs);
+ }
+ return lhs;
+}
+
+AnyV parse_expr(Lexer& lex) {
+ return parse_expr10(lex);
+}
+
+AnyV parse_return_stmt(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_return, "'return'");
+ AnyV child = parse_expr(lex);
+ lex.expect(tok_semicolon, "';'");
+ return createV(loc, child);
+}
+
+AnyV parse_statement(Lexer& lex);
+
+V parse_sequence(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_opbrace, "'{'");
+ std::vector items;
+ while (lex.tok() != tok_clbrace) {
+ items.push_back(parse_statement(lex));
+ }
+ SrcLocation loc_end = lex.cur_location();
+ lex.expect(tok_clbrace, "'}'");
+ return createV(loc, loc_end, items);
+}
+
+AnyV parse_repeat_statement(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_repeat, "'repeat'");
+ AnyV cond = parse_expr(lex);
+ V body = parse_sequence(lex);
+ return createV(loc, cond, body);
+}
+
+AnyV parse_while_statement(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_while, "'while'");
+ AnyV cond = parse_expr(lex);
+ V body = parse_sequence(lex);
+ return createV(loc, cond, body);
+}
+
+ASTNodeBase* parse_do_until_statement(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_do, "'do'");
+ V body = parse_sequence(lex);
+ lex.expect(tok_until, "'until'");
+ AnyV cond = parse_expr(lex);
+ return createV(loc, body, cond);
+}
+
+AnyV parse_try_catch_statement(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_try, "'try'");
+ V try_body = parse_sequence(lex);
+ lex.expect(tok_catch, "'catch'");
+ AnyV catch_expr = parse_expr(lex);
+ V catch_body = parse_sequence(lex);
+ return createV(loc, try_body, catch_expr, catch_body);
+}
+
+AnyV parse_if_statement(Lexer& lex, bool is_ifnot) {
+ SrcLocation loc = lex.cur_location();
+ lex.next();
+ AnyV cond = parse_expr(lex);
+ V if_body = parse_sequence(lex);
+ V else_body = nullptr;
+ if (lex.tok() == tok_else) {
+ lex.next();
+ else_body = parse_sequence(lex);
+ } else if (lex.tok() == tok_elseif) {
+ AnyV v_inner_if = parse_if_statement(lex, false);
+ else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if});
+ } else if (lex.tok() == tok_elseifnot) {
+ AnyV v_inner_if = parse_if_statement(lex, true);
+ else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if});
+ } else {
+ else_body = createV(lex.cur_location(), lex.cur_location(), {});
+ }
+ return createV(loc, is_ifnot, cond, if_body, else_body);
+}
+
+AnyV parse_statement(Lexer& lex) {
+ switch (lex.tok()) {
+ case tok_return:
+ return parse_return_stmt(lex);
+ case tok_opbrace:
+ return parse_sequence(lex);
+ case tok_repeat:
+ return parse_repeat_statement(lex);
+ case tok_if:
+ return parse_if_statement(lex, false);
+ case tok_ifnot:
+ return parse_if_statement(lex, true);
+ case tok_do:
+ return parse_do_until_statement(lex);
+ case tok_while:
+ return parse_while_statement(lex);
+ case tok_try:
+ return parse_try_catch_statement(lex);
+ case tok_semicolon: {
+ lex.next();
+ return createV;
+ }
+ default: {
+ AnyV expr = parse_expr(lex);
+ lex.expect(tok_semicolon, "';'");
+ return expr;
+ }
+ }
+}
+
+AnyV parse_func_body(Lexer& lex) {
+ return parse_sequence(lex);
+}
+
+AnyV parse_asm_func_body(Lexer& lex, V arg_list) {
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_asm, "'asm'");
+ size_t n_args = arg_list->size();
+ if (n_args > 16) {
+ throw ParseError{loc, "assembler built-in function can have at most 16 arguments"};
+ }
+ std::vector arg_order, ret_order;
+ if (lex.tok() == tok_oppar) {
+ lex.next();
+ while (lex.tok() == tok_identifier || lex.tok() == tok_int_const) {
+ int arg_idx = arg_list->lookup_idx(lex.cur_str());
+ if (arg_idx == -1) {
+ lex.error("argument name expected");
+ }
+ arg_order.push_back(arg_idx);
+ lex.next();
+ }
+ if (lex.tok() == tok_mapsto) {
+ lex.next();
+ while (lex.tok() == tok_int_const) {
+ int ret_idx = std::atoi(static_cast(lex.cur_str()).c_str());
+ ret_order.push_back(ret_idx);
+ lex.next();
+ }
+ }
+ lex.expect(tok_clpar, "')'");
+ }
+ std::vector asm_commands;
+ lex.check(tok_string_const, "\"ASM COMMAND\"");
+ while (lex.tok() == tok_string_const) {
+ std::string_view asm_command = lex.cur_str();
+ asm_commands.push_back(createV(lex.cur_location(), asm_command, 0));
+ lex.next();
+ }
+ lex.expect(tok_semicolon, "';'");
+ return createV(loc, std::move(arg_order), std::move(ret_order), std::move(asm_commands));
+}
+
+AnyV parse_forall(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ std::vector forall_items;
+ lex.expect(tok_forall, "'forall'");
+ int idx = 0;
+ while (true) {
+ lex.check(tok_identifier, "T expected");
+ std::string_view nameT = lex.cur_str();
+ TypeExpr* type = TypeExpr::new_var(idx++);
+ forall_items.emplace_back(createV(lex.cur_location(), type, static_cast(nameT)));
+ lex.next();
+ if (lex.tok() != tok_comma) {
+ break;
+ }
+ lex.next();
+ }
+ lex.expect(tok_mapsto, "'->'");
+ return createV{loc, std::move(forall_items)};
+}
+
+AnyV parse_function_declaration(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ V forall_list = nullptr;
+ bool is_get_method = false;
+ bool is_builtin = false;
+ bool marked_as_inline = false;
+ bool marked_as_inline_ref = false;
+ if (lex.tok() == tok_forall) {
+ forall_list = parse_forall(lex)->as();
+ } else if (lex.tok() == tok_get) {
+ is_get_method = true;
+ lex.next();
+ }
+ TypeExpr* ret_type = parse_type(lex, forall_list);
+ lex.check(tok_identifier, "function name identifier expected");
+ std::string func_name = static_cast(lex.cur_str());
+ lex.next();
+ V arg_list = parse_argument_list(lex, forall_list)->as();
+ bool marked_as_pure = false;
+ if (lex.tok() == tok_impure) {
+ static bool warning_shown = false;
+ if (!warning_shown) {
+ lex.cur_location().show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure");
+ warning_shown = true;
+ }
+ lex.next();
+ } else if (lex.tok() == tok_pure) {
+ marked_as_pure = true;
+ lex.next();
+ }
+ if (lex.tok() == tok_inline) {
+ marked_as_inline = true;
+ lex.next();
+ } else if (lex.tok() == tok_inlineref) {
+ marked_as_inline_ref = true;
+ lex.next();
+ }
+ V method_id = nullptr;
+ if (lex.tok() == tok_method_id) {
+ if (is_get_method) {
+ lex.error("both `get` and `method_id` are not allowed");
+ }
+ lex.next();
+ if (lex.tok() == tok_oppar) { // method_id(N)
+ lex.next();
+ lex.check(tok_int_const, "number");
+ std::string_view int_val = lex.cur_str();
+ method_id = createV(lex.cur_location(), int_val);
+ lex.next();
+ lex.expect(tok_clpar, "')'");
+ } else {
+ static bool warning_shown = false;
+ if (!warning_shown) {
+ lex.cur_location().show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`");
+ warning_shown = true;
+ }
+ is_get_method = true;
+ }
+ }
+
+ AnyV body = nullptr;
+
+ if (lex.tok() == tok_builtin) {
+ is_builtin = true;
+ body = createV;
+ lex.next();
+ lex.expect(tok_semicolon, "';'");
+ } else if (lex.tok() == tok_semicolon) {
+ // todo this is just a prototype, remove this "feature" in the future
+ lex.next();
+ body = createV;
+ } else if (lex.tok() == tok_opbrace) {
+ body = parse_func_body(lex);
+ } else if (lex.tok() == tok_asm) {
+ body = parse_asm_func_body(lex, arg_list);
+ } else {
+ lex.expect(tok_opbrace, "function body block");
+ }
+
+ auto f_declaration = createV(loc, func_name, arg_list, body);
+ f_declaration->ret_type = ret_type;
+ f_declaration->forall_list = forall_list;
+ f_declaration->marked_as_pure = marked_as_pure;
+ f_declaration->marked_as_get_method = is_get_method;
+ f_declaration->marked_as_builtin = is_builtin;
+ f_declaration->marked_as_inline = marked_as_inline;
+ f_declaration->marked_as_inline_ref = marked_as_inline_ref;
+ f_declaration->method_id = method_id;
+ return f_declaration;
+}
+
+AnyV parse_pragma(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ lex.next_special(tok_pragma_name, "pragma name");
+ std::string_view pragma_name = lex.cur_str();
+ if (pragma_name == "version") {
+ lex.next();
+ TokenType cmp_tok = lex.tok();
+ bool valid = cmp_tok == tok_gt || cmp_tok == tok_geq || cmp_tok == tok_lt || cmp_tok == tok_leq || cmp_tok == tok_eq || cmp_tok == tok_bitwise_xor;
+ if (!valid) {
+ lex.error("invalid comparison operator");
+ }
+ lex.next_special(tok_semver, "semver");
+ std::string_view semver = lex.cur_str();
+ lex.next();
+ lex.expect(tok_semicolon, "';'");
+ return createV(loc, cmp_tok, semver);
+ }
+ lex.next();
+ lex.expect(tok_semicolon, "';'");
+ return createV(loc, pragma_name);
+}
+
+AnyV parse_include_statement(Lexer& lex) {
+ SrcLocation loc = lex.cur_location();
+ lex.expect(tok_include, "#include");
+ lex.check(tok_string_const, "source file name");
+ std::string_view rel_filename = lex.cur_str();
+ if (rel_filename.empty()) {
+ lex.error("imported file name is an empty string");
+ }
+ lex.next();
+ lex.expect(tok_semicolon, "';'");
+ return createV(loc, rel_filename);
+}
+
+// the main (exported) function
+AnyV parse_src_file_to_ast(SrcFile* file) {
+ file->was_parsed = true;
+
+ std::vector toplevel_declarations;
+ Lexer lex(file);
+ while (!lex.is_eof()) {
+ if (lex.tok() == tok_pragma) {
+ toplevel_declarations.push_back(parse_pragma(lex));
+ } else if (lex.tok() == tok_include) {
+ toplevel_declarations.push_back(parse_include_statement(lex));
+ } else if (lex.tok() == tok_global) {
+ toplevel_declarations.push_back(parse_global_var_declaration_list(lex));
+ } else if (lex.tok() == tok_const) {
+ toplevel_declarations.push_back(parse_constant_declaration_list(lex));
+ } else {
+ toplevel_declarations.push_back(parse_function_declaration(lex));
+ }
+ }
+ return createV(file, std::move(toplevel_declarations));
+}
+
+} // namespace tolk
diff --git a/tolk/ast-from-tokens.h b/tolk/ast-from-tokens.h
new file mode 100644
index 00000000..65b82ad6
--- /dev/null
+++ b/tolk/ast-from-tokens.h
@@ -0,0 +1,27 @@
+/*
+ This file is part of TON Blockchain Library.
+
+ TON Blockchain Library is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ TON Blockchain Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with TON Blockchain Library. If not, see .
+*/
+#pragma once
+
+#include "src-file.h"
+
+namespace tolk {
+
+struct ASTNodeBase;
+
+const ASTNodeBase* parse_src_file_to_ast(SrcFile* file);
+
+} // namespace tolk
diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h
new file mode 100644
index 00000000..feae5616
--- /dev/null
+++ b/tolk/ast-replacer.h
@@ -0,0 +1,155 @@
+/*
+ This file is part of TON Blockchain Library.
+
+ TON Blockchain Library is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ TON Blockchain Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with TON Blockchain Library. If not, see .
+*/
+#pragma once
+
+#include "ast.h"
+#include "platform-utils.h"
+
+/*
+ * A module of implementing traversing a vertex tree and replacing any vertex to another.
+ * For example, to replace "beginCell()" call to "begin_cell()" in a function body (in V)
+ * regardless of the place this call is performed, you need to iterate over all the function AST,
+ * to find ast_function_call(beginCell), create ast_function_call(begin_cell) instead and to replace
+ * a pointer inside its parent.
+ * Inheriting from ASTVisitor makes this task quite simple, without any boilerplate.
+ *
+ * If you need just to traverse a vertex tree without replacing vertices,
+ * consider another api: ast-visitor.h.
+ */
+
+namespace tolk {
+
+class ASTReplacer {
+protected:
+ GNU_ATTRIBUTE_ALWAYS_INLINE static AnyV replace_children(const ASTNodeLeaf* v) {
+ return v;
+ }
+
+ GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeUnary* v) {
+ auto* v_mutable = const_cast(v);
+ v_mutable->child = replace(v_mutable->child);
+ return v_mutable;
+ }
+
+ GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeBinary* v) {
+ auto* v_mutable = const_cast(v);
+ v_mutable->lhs = replace(v->lhs);
+ v_mutable->rhs = replace(v->rhs);
+ return v_mutable;
+ }
+
+ GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeVararg* v) {
+ auto* v_mutable = const_cast(v);
+ for (AnyV& child : v_mutable->children) {
+ child = replace(child);
+ }
+ return v_mutable;
+ }
+
+public:
+ virtual ~ASTReplacer() = default;
+
+ virtual AnyV replace(AnyV v) = 0;
+};
+
+class ASTReplacerInFunctionBody : public ASTReplacer {
+protected:
+ using parent = ASTReplacerInFunctionBody;
+
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+ virtual AnyV replace(V v) { return replace_children(v); }
+
+ AnyV replace(AnyV v) final {
+ switch (v->type) {
+ case ast_empty: return replace(v->as());
+ case ast_identifier: return replace(v->as());
+ case ast_int_const: return replace(v->as());
+ case ast_string_const: return replace(v->as());
+ case ast_bool_const: return replace(v->as());
+ case ast_nil_tuple: return replace(v->as());
+ case ast_function_call: return replace(v->as());
+ case ast_parenthesized_expr: return replace(v->as());
+ case ast_underscore: return replace(v->as());
+ case ast_type_expression: return replace(v->as());
+ case ast_variable_declaration: return replace(v->as());
+ case ast_tensor: return replace(v->as());
+ case ast_tensor_square: return replace(v->as());
+ case ast_dot_tilde_call: return replace(v->as());
+ case ast_unary_operator: return replace(v->as());
+ case ast_binary_operator: return replace(v->as());
+ case ast_ternary_operator: return replace(v->as());
+ case ast_return_statement: return replace(v->as());
+ case ast_sequence: return replace(v->as());
+ case ast_repeat_statement: return replace(v->as());
+ case ast_while_statement: return replace(v->as());
+ case ast_do_until_statement: return replace(v->as());
+ case ast_try_catch_statement: return replace(v->as());
+ case ast_if_statement: return replace(v->as());
+ case ast_asm_body: return replace(v->as());
+ default:
+ throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::visit");
+ }
+ }
+
+public:
+ void start_replacing_in_function(V v) {
+ replace(v->get_body());
+ }
+};
+
+class ASTReplacerAllFunctionsInFile : public ASTReplacerInFunctionBody {
+protected:
+ using parent = ASTReplacerAllFunctionsInFile;
+
+ virtual bool should_enter_function(V v) = 0;
+
+public:
+ void start_replacing_in_file(V v_file) {
+ for (AnyV v : v_file->get_toplevel_declarations()) {
+ if (auto v_function = v->try_as()) {
+ if (should_enter_function(v_function)) {
+ replace(v_function->get_body());
+ }
+ }
+ }
+ }
+};
+
+} // namespace tolk
diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h
new file mode 100644
index 00000000..399017a7
--- /dev/null
+++ b/tolk/ast-stringifier.h
@@ -0,0 +1,233 @@
+/*
+ This file is part of TON Blockchain Library.
+
+ TON Blockchain Library is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ TON Blockchain Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with TON Blockchain Library. If not, see .
+*/
+#pragma once
+
+#ifdef TOLK_DEBUG
+
+#include "ast.h"
+#include "ast-visitor.h"
+#include
+
+/*
+ * ASTStringifier is used to print out the whole vertex tree in a human-readable format.
+ * To stringify any vertex, call v->debug_print(), which uses this class.
+ */
+
+namespace tolk {
+
+class ASTStringifier final : public ASTVisitor {
+ constexpr static std::pair name_pairs[] = {
+ {ast_empty, "ast_empty"},
+ {ast_identifier, "ast_identifier"},
+ {ast_int_const, "ast_int_const"},
+ {ast_string_const, "ast_string_const"},
+ {ast_bool_const, "ast_bool_const"},
+ {ast_nil_tuple, "ast_nil_tuple"},
+ {ast_function_call, "ast_function_call"},
+ {ast_parenthesized_expr, "ast_parenthesized_expr"},
+ {ast_global_var_declaration, "ast_global_var_declaration"},
+ {ast_global_var_declaration_list, "ast_global_var_declaration_list"},
+ {ast_constant_declaration, "ast_constant_declaration"},
+ {ast_constant_declaration_list, "ast_constant_declaration_list"},
+ {ast_underscore, "ast_underscore"},
+ {ast_type_expression, "ast_type_expression"},
+ {ast_variable_declaration, "ast_variable_declaration"},
+ {ast_tensor, "ast_tensor"},
+ {ast_tensor_square, "ast_tensor_square"},
+ {ast_dot_tilde_call, "ast_dot_tilde_call"},
+ {ast_unary_operator, "ast_unary_operator"},
+ {ast_binary_operator, "ast_binary_operator"},
+ {ast_ternary_operator, "ast_ternary_operator"},
+ {ast_return_statement, "ast_return_statement"},
+ {ast_sequence, "ast_sequence"},
+ {ast_repeat_statement, "ast_repeat_statement"},
+ {ast_while_statement, "ast_while_statement"},
+ {ast_do_until_statement, "ast_do_until_statement"},
+ {ast_try_catch_statement, "ast_try_catch_statement"},
+ {ast_if_statement, "ast_if_statement"},
+ {ast_forall_item, "ast_forall_item"},
+ {ast_forall_list, "ast_forall_list"},
+ {ast_argument, "ast_argument"},
+ {ast_argument_list, "ast_argument_list"},
+ {ast_asm_body, "ast_asm_body"},
+ {ast_function_declaration, "ast_function_declaration"},
+ {ast_pragma_no_arg, "ast_pragma_no_arg"},
+ {ast_pragma_version, "ast_pragma_version"},
+ {ast_include_statement, "ast_include_statement"},
+ {ast_tolk_file, "ast_tolk_file"},
+ };
+
+ template
+ constexpr static const char* ast_node_type_to_string() {
+ static_assert(std::size(name_pairs) == ast_tolk_file + 1, "name_pairs needs to be updated");
+ return name_pairs[node_type].second;
+ }
+
+ int depth = 0;
+ std::string out;
+ bool colored = false;
+
+ template
+ void handle_vertex(V v) {
+ out += std::string(depth * 2, ' ');
+ out += ast_node_type_to_string();
+ if (std::string postfix = specific_str(v); !postfix.empty()) {
+ out += colored ? " \x1b[34m" : " // ";
+ out += postfix;
+ out += colored ? "\x1b[0m" : "";
+ }
+ out += '\n';
+ depth++;
+ visit_children(v);
+ depth--;
+ }
+
+ static std::string specific_str(AnyV node) {
+ switch (node->type) {
+ case ast_identifier:
+ return static_cast(node->as()->name);
+ case ast_int_const:
+ return static_cast(node->as()->int_val);
+ case ast_string_const:
+ if (char modifier = node->as()->modifier) {
+ return "\"" + static_cast(node->as()->str_val) + "\"" + std::string(1, modifier);
+ } else {
+ return "\"" + static_cast(node->as()->str_val) + "\"";
+ }
+ case ast_global_var_declaration:
+ return static_cast(node->as()->var_name);
+ case ast_constant_declaration:
+ return static_cast(node->as()->const_name);
+ case ast_type_expression: {
+ std::ostringstream os;
+ os << node->as()->declared_type;
+ return os.str();
+ }
+ case ast_variable_declaration: {
+ std::ostringstream os;
+ os << node->as()->declared_type;
+ return os.str();
+ }
+ case ast_dot_tilde_call:
+ return static_cast(node->as()->method_name);
+ case ast_unary_operator:
+ return static_cast(node->as()->operator_name);
+ case ast_binary_operator:
+ return static_cast(node->as()->operator_name);
+ case ast_sequence:
+ return "↓" + std::to_string(node->as()->get_items().size());
+ case ast_if_statement:
+ return node->as()->is_ifnot ? "ifnot" : "";
+ case ast_argument: {
+ std::ostringstream os;
+ os << node->as()->arg_type;
+ return static_cast(node->as()->arg_name) + ": " + os.str();
+ }
+ case ast_function_declaration: {
+ std::string arg_names;
+ for (int i = 0; i < node->as()->get_num_args(); i++) {
+ if (!arg_names.empty())
+ arg_names += ",";
+ arg_names += node->as()->get_arg(i)->arg_name;
+ }
+ return "fun " + node->as()->name + "(" + arg_names + ")";
+ }
+ case ast_pragma_no_arg:
+ return static_cast(node->as()->pragma_name);
+ case ast_pragma_version:
+ return static_cast(node->as()->semver);
+ case ast_include_statement:
+ return static_cast(node->as()->file_name);
+ case ast_tolk_file:
+ return node->as()->file->rel_filename;
+ default:
+ return {};
+ }
+ }
+
+public:
+ explicit ASTStringifier(bool colored) : colored(colored) {
+ }
+
+ std::string to_string_with_children(AnyV v) {
+ out.clear();
+ visit(v);
+ return std::move(out);
+ }
+
+ static std::string to_string_without_children(AnyV v) {
+ std::string result = ast_node_type_to_string(v->type);
+ if (std::string postfix = specific_str(v); !postfix.empty()) {
+ result += ' ';
+ result += specific_str(v);
+ }
+ return result;
+ }
+
+ static const char* ast_node_type_to_string(ASTNodeType node_type) {
+ return name_pairs[node_type].second;
+ }
+
+ void visit(AnyV v) override {
+ switch (v->type) {
+ case ast_empty: return handle_vertex(v->as());
+ case ast_identifier: return handle_vertex(v->as());
+ case ast_int_const: return handle_vertex(v->as());
+ case ast_string_const: return handle_vertex(v->as());
+ case ast_bool_const: return handle_vertex(v->as());
+ case ast_nil_tuple: return handle_vertex(v->as());
+ case ast_function_call: return handle_vertex(v->as());
+ case ast_parenthesized_expr: return handle_vertex(v->as());
+ case ast_global_var_declaration: return handle_vertex(v->as());
+ case ast_global_var_declaration_list: return handle_vertex(v->as());
+ case ast_constant_declaration: return handle_vertex(v->as());
+ case ast_constant_declaration_list: return handle_vertex(v->as());
+ case ast_underscore: return handle_vertex(v->as());
+ case ast_type_expression: return handle_vertex(v->as());
+ case ast_variable_declaration: return handle_vertex(v->as());
+ case ast_tensor: return handle_vertex(v->as());
+ case ast_tensor_square: return handle_vertex(v->as());
+ case ast_dot_tilde_call: return handle_vertex(v->as());
+ case ast_unary_operator: return handle_vertex(v->as());
+ case ast_binary_operator: return handle_vertex(v->as());
+ case ast_ternary_operator: return handle_vertex(v->as());
+ case ast_return_statement: return handle_vertex(v->as());
+ case ast_sequence: return handle_vertex(v->as());
+ case ast_repeat_statement: return handle_vertex(v->as());
+ case ast_while_statement: return handle_vertex(v->as());
+ case ast_do_until_statement: return handle_vertex(v->as());
+ case ast_try_catch_statement: return handle_vertex(v->as());
+ case ast_if_statement: return handle_vertex(v->as());
+ case ast_forall_item: return handle_vertex(v->as());
+ case ast_forall_list: return handle_vertex(v->as());
+ case ast_argument: return handle_vertex(v->as());
+ case ast_argument_list: return handle_vertex(v->as());
+ case ast_asm_body: return handle_vertex(v->as());
+ case ast_function_declaration: return handle_vertex(v->as());
+ case ast_pragma_no_arg: return handle_vertex(v->as());
+ case ast_pragma_version: return handle_vertex(v->as());
+ case ast_include_statement: return handle_vertex(v->as());
+ case ast_tolk_file: return handle_vertex(v->as());
+ default:
+ throw UnexpectedASTNodeType(v, "ASTStringifier::visit");
+ }
+ }
+};
+
+} // namespace tolk
+
+#endif // TOLK_DEBUG
diff --git a/tolk/ast-to-legacy.cpp b/tolk/ast-to-legacy.cpp
new file mode 100644
index 00000000..8b0e7810
--- /dev/null
+++ b/tolk/ast-to-legacy.cpp
@@ -0,0 +1,1438 @@
+/*
+ This file is part of TON Blockchain Library.
+
+ TON Blockchain Library is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ TON Blockchain Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with TON Blockchain Library. If not, see .
+*/
+#include "ast-to-legacy.h"
+#include "ast.h"
+#include "ast-visitor.h"
+#include "ast-from-tokens.h" // todo should be deleted
+#include "compiler-state.h"
+#include "src-file.h"
+#include "tolk.h"
+#include "td/utils/crypto.h"
+#include "common/refint.h"
+#include "openssl/digest.hpp"
+#include "block/block.h"
+#include "block-parse.h"
+
+/*
+ * In this module, we convert modern AST representation to legacy representation
+ * (global state, Expr, CodeBlob, etc.) to make the rest of compiling process remain unchanged for now.
+ * Since time goes, I'll gradually get rid of legacy, since most of the code analysis
+ * should be done at AST level.
+ */
+
+namespace tolk {
+
+static int calc_sym_idx(std::string_view sym_name) {
+ return G.symbols.lookup_add(sym_name);
+}
+
+
+Expr* process_expr(AnyV v, CodeBlob& code, bool nv = false);
+
+static SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) {
+ SymValCodeFunc* res = new SymValCodeFunc{G.glob_func_cnt, func_type, marked_as_pure};
+#ifdef TOLK_DEBUG
+ res->name = func_sym->name();
+#endif
+ func_sym->value = res;
+ G.glob_func.push_back(func_sym);
+ G.glob_func_cnt++;
+ return res;
+}
+
+static bool check_global_func(SrcLocation loc, sym_idx_t func_name) {
+ SymDef* def = lookup_symbol(func_name);
+ if (!def) {
+ throw ParseError(loc, "undefined symbol `" + G.symbols.get_name(func_name) + "`");
+ return false;
+ }
+ SymVal* val = dynamic_cast(def->value);
+ if (!val) {
+ throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no value and no type");
+ return false;
+ } else if (!val->get_type()) {
+ throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no type, possibly not a function");
+ return false;
+ } else {
+ return true;
+ }
+}
+
+static Expr* make_func_apply(Expr* fun, Expr* x) {
+ Expr* res{nullptr};
+ if (fun->cls == Expr::_GlobFunc) {
+ if (x->cls == Expr::_Tensor) {
+ res = new Expr{Expr::_Apply, fun->sym, x->args};
+ } else {
+ res = new Expr{Expr::_Apply, fun->sym, {x}};
+ }
+ res->flags = Expr::_IsRvalue | (fun->flags & Expr::_IsImpure);
+ } else {
+ res = new Expr{Expr::_VarApply, {fun, x}};
+ res->flags = Expr::_IsRvalue;
+ }
+ return res;
+}
+
+static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_sym) {
+ if (!v_usage->loc.is_symbol_from_same_or_builtin_file(used_sym->loc)) {
+ const SrcFile* declared_in = used_sym->loc.get_src_file();
+ bool has_import = false;
+ for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) {
+ if (import_stmt.imported_file == declared_in) {
+ has_import = true;
+ }
+ }
+ if (!has_import) {
+ v_usage->error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?");
+ }
+ }
+}
+
+Expr* process_expr(V v, CodeBlob& code, bool nv) {
+ TokenType t = v->tok;
+ std::string operator_name = static_cast(v->operator_name);
+
+ if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC ||
+ t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC ||
+ t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) {
+ Expr* x = process_expr(v->get_lhs(), code, nv);
+ x->chk_lvalue();
+ x->chk_rvalue();
+ sym_idx_t name = G.symbols.lookup_add("^_" + operator_name + "_");
+ check_global_func(v->loc, name);
+ Expr* y = process_expr(v->get_rhs(), code, false);
+ y->chk_rvalue();
+ Expr* z = new Expr{Expr::_Apply, name, {x, y}};
+ z->here = v->loc;
+ z->set_val(t);
+ z->flags = Expr::_IsRvalue;
+ z->deduce_type();
+ Expr* res = new Expr{Expr::_Letop, {x->copy(), z}};
+ res->here = v->loc;
+ res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue;
+ res->set_val(t);
+ res->deduce_type();
+ return res;
+ }
+ if (t == tok_assign) {
+ Expr* x = process_expr(v->get_lhs(), code, nv);
+ x->chk_lvalue();
+ Expr* y = process_expr(v->get_rhs(), code, false);
+ y->chk_rvalue();
+ x->predefine_vars();
+ x->define_new_vars(code);
+ Expr* res = new Expr{Expr::_Letop, {x, y}};
+ res->here = v->loc;
+ res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue;
+ res->set_val(t);
+ res->deduce_type();
+ return res;
+ }
+ if (t == tok_minus || t == tok_plus ||
+ t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor ||
+ t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship ||
+ t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR ||
+ t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod ||
+ t == tok_divC || t == tok_divR || t == tok_modC || t == tok_modR) {
+ Expr* res = process_expr(v->get_lhs(), code, nv);
+ res->chk_rvalue();
+ sym_idx_t name = G.symbols.lookup_add("_" + operator_name + "_");
+ check_global_func(v->loc, name);
+ Expr* x = process_expr(v->get_rhs(), code, false);
+ x->chk_rvalue();
+ res = new Expr{Expr::_Apply, name, {res, x}};
+ res->here = v->loc;
+ res->set_val(t);
+ res->flags = Expr::_IsRvalue;
+ res->deduce_type();
+ return res;
+ }
+
+ v->error("unsupported binary operator");
+}
+
+Expr* process_expr(V v, CodeBlob& code) {
+ TokenType t = v->tok;
+ sym_idx_t name = G.symbols.lookup_add(static_cast(v->operator_name) + "_");
+ check_global_func(v->loc, name);
+ Expr* x = process_expr(v->get_rhs(), code, false);
+ x->chk_rvalue();
+
+ // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1)
+ // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later)
+ // in FunC, it was:
+ // `var fst = -1;` // is constantly 1
+ // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on
+ // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number),
+ // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here
+ if (x->cls == Expr::_Const) {
+ if (t == tok_bitwise_not) {
+ x->intval = ~x->intval;
+ } else if (t == tok_minus) {
+ x->intval = -x->intval;
+ }
+ if (!x->intval->signed_fits_bits(257)) {
+ v->error("integer overflow");
+ }
+ return x;
+ }
+
+ auto res = new Expr{Expr::_Apply, name, {x}};
+ res->here = v->loc;
+ res->set_val(t);
+ res->flags = Expr::_IsRvalue;
+ res->deduce_type();
+ return res;
+}
+
+Expr* process_expr(V v, CodeBlob& code, bool nv) {
+ Expr* res = process_expr(v->get_lhs(), code, nv);
+ bool modify = v->method_name[0] == '~';
+ Expr* obj = res;
+ if (modify) {
+ obj->chk_lvalue();
+ } else {
+ obj->chk_rvalue();
+ }
+ sym_idx_t name = calc_sym_idx(v->method_name);
+ const SymDef* sym = lookup_symbol(name);
+ if (!sym || !dynamic_cast(sym->value)) {
+ sym_idx_t name1 = G.symbols.lookup(v->method_name.substr(1));
+ if (name1) {
+ const SymDef* sym1 = lookup_symbol(name1);
+ if (sym1 && dynamic_cast