diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index a47c7614..5306354d 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -5,8 +5,10 @@ set(TOLK_SOURCE lexer.cpp symtable.cpp compiler-state.cpp + ast.cpp + ast-from-tokens.cpp + ast-to-legacy.cpp unify-types.cpp - parse-tolk.cpp abscode.cpp gen-abscode.cpp analyzer.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index c028a531..0702b1b9 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -25,8 +25,8 @@ namespace tolk { * */ -TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc) - : v_type(_type), idx(_idx), cls(_cls), coord(0), where(loc) { +TmpVar::TmpVar(var_idx_t _idx, bool _is_tmp_unnamed, TypeExpr* _type, SymDef* sym, SrcLocation loc) + : v_type(_type), idx(_idx), is_tmp_unnamed(_is_tmp_unnamed), coord(0), where(loc) { if (sym) { name = sym->sym_idx; sym->value->idx = _idx; @@ -59,9 +59,9 @@ void TmpVar::dump(std::ostream& os) const { } void TmpVar::show(std::ostream& os, int omit_idx) const { - if (cls & _Named) { + if (!is_tmp_unnamed) { os << G.symbols.get_name(name); - if (omit_idx && (omit_idx >= 2 || (cls & _UniqueName))) { + if (omit_idx >= 2) { return; } } @@ -474,8 +474,8 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation location) { - vars.emplace_back(var_cnt, cls, var_type, sym, location); +var_idx_t CodeBlob::create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation location) { + vars.emplace_back(var_cnt, is_tmp_unnamed, var_type, sym, location); if (sym) { sym->value->idx = var_cnt; } @@ -492,7 +492,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) { SymDef* arg_sym; SrcLocation arg_loc; std::tie(arg_type, arg_sym, arg_loc) = par; - list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, arg_loc)); + list.push_back(create_var(arg_sym == nullptr, arg_type, arg_sym, arg_loc)); } emplace_back(loc, Op::_Import, list); in_var_cnt = var_cnt; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 91b66ae9..cefa83b9 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -46,7 +46,7 @@ int CodeBlob::split_vars(bool strict) { if (k != 1) { var.coord = ~((n << 8) + k); for (int i = 0; i < k; i++) { - auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where); + auto v = create_var(vars[j].is_tmp_unnamed, comp_types[i], 0, vars[j].where); tolk_assert(v == n + i); tolk_assert(vars[v].idx == v); vars[v].name = vars[j].name; diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp new file mode 100644 index 00000000..38657684 --- /dev/null +++ b/tolk/ast-from-tokens.cpp @@ -0,0 +1,877 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "ast-from-tokens.h" +#include "ast.h" +#include "platform-utils.h" +#include "type-expr.h" + +/* + * Here we construct AST for a tolk file. + * While constructing, no global state is modified. + * Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on. + * There was no way to perform any more or less semantic analysis. + * Implementing AST gives a giant advance for future modifications and stability. + */ + +namespace tolk { + +// given a token, determine whether it's <, or >, or similar +static bool is_comparison_binary_op(TokenType tok) { + return tok == tok_lt || tok == tok_gt || tok == tok_leq || tok == tok_geq || tok == tok_eq || tok == tok_neq || tok == tok_spaceship; +} + +// same as above, but to detect bitwise operators: & | ^ +// (in Tolk, they are used as logical ones due to absence of a boolean type and && || operators) +static bool is_bitwise_binary_op(TokenType tok) { + return tok == tok_bitwise_and || tok == tok_bitwise_or || tok == tok_bitwise_xor; +} + +// same as above, but to detect addition/subtraction +static bool is_add_or_sub_binary_op(TokenType tok) { + return tok == tok_plus || tok == tok_minus; +} + +// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected) +// it would better be a warning, but we decided to make it a strict error +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_lower_precedence(SrcLocation loc, std::string_view op_lower, std::string_view op_higher) { + std::string name_lower = static_cast(op_lower); + std::string name_higher = static_cast(op_higher); + throw ParseError(loc, name_lower + " has lower precedence than " + name_higher + + ", probably this code won't work as you expected. " + "Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error."); +} + +// fire an error for a case "arg1 & arg2 | arg3" +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_mix_bitwise_and_or(SrcLocation loc, std::string_view op1, std::string_view op2) { + std::string name1 = static_cast(op1); + std::string name2 = static_cast(op2); + throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis" + ", probably this code won't work as you expected. " + "Use parenthesis to emphasize operator precedence."); +} + +// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence +// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected +// the only way to suppress this error for the programmer is to use parenthesis +// (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator}, +// that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis) +static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyV lhs, AnyV rhs) { + // handle "flags & 0xFF != 0" (rhs = "0xFF != 0") + if (rhs->type == ast_binary_operator && is_comparison_binary_op(rhs->as()->tok)) { + fire_error_lower_precedence(loc, operator_name, rhs->as()->operator_name); + } + + // handle "0 != flags & 0xFF" (lhs = "0 != flags") + if (lhs->type == ast_binary_operator && is_comparison_binary_op(lhs->as()->tok)) { + fire_error_lower_precedence(loc, operator_name, lhs->as()->operator_name); + } + + // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") + if (lhs->type == ast_binary_operator && is_bitwise_binary_op(lhs->as()->tok) && lhs->as()->operator_name != operator_name) { + fire_error_mix_bitwise_and_or(loc, lhs->as()->operator_name, operator_name); + } +} + +// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) +static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyV rhs) { + if (rhs->type == ast_binary_operator && is_add_or_sub_binary_op(rhs->as()->tok)) { + fire_error_lower_precedence(loc, bitshift_operator_name, rhs->as()->operator_name); + } +} + +/* + * + * PARSE SOURCE + * + */ + +// TE ::= TA | TA -> TE +// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] +TypeExpr* parse_type(Lexer& lex, V forall_list); + +TypeExpr* parse_type1(Lexer& lex, V forall_list) { + switch (lex.tok()) { + case tok_int: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Int); + case tok_cell: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Cell); + case tok_slice: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Slice); + case tok_builder: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Builder); + case tok_cont: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Cont); + case tok_tuple: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Tuple); + case tok_var: + case tok_underscore: + lex.next(); + return TypeExpr::new_hole(); + case tok_identifier: { + if (int idx = forall_list ? forall_list->lookup_idx(lex.cur_str()) : -1; idx != -1) { + lex.next(); + return forall_list->get_item(idx)->created_type; + } + lex.error("Is not a type identifier"); + } + default: + break; + } + TokenType c; + if (lex.tok() == tok_opbracket) { + lex.next(); + c = tok_clbracket; + } else { + lex.expect(tok_oppar, ""); + c = tok_clpar; + } + if (lex.tok() == c) { + lex.next(); + return c == tok_clpar ? TypeExpr::new_unit() : TypeExpr::new_tuple({}); + } + auto t1 = parse_type(lex, forall_list); + if (lex.tok() == tok_clpar) { + lex.expect(c, c == tok_clpar ? "')'" : "']'"); + return t1; + } + std::vector tlist{1, t1}; + while (lex.tok() == tok_comma) { + lex.next(); + tlist.push_back(parse_type(lex, forall_list)); + } + lex.expect(c, c == tok_clpar ? "')'" : "']'"); + return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); +} + +TypeExpr* parse_type(Lexer& lex, V forall_list) { + TypeExpr* res = parse_type1(lex, forall_list); + if (lex.tok() == tok_mapsto) { + lex.next(); + TypeExpr* to = parse_type(lex, forall_list); + return TypeExpr::new_map(res, to); + } + return res; +} + +AnyV parse_argument(Lexer& lex, V forall_list) { + TypeExpr* arg_type = nullptr; + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_underscore) { + lex.next(); + if (lex.tok() == tok_comma || lex.tok() == tok_clpar) { + return createV(loc, "", TypeExpr::new_hole()); + } + arg_type = TypeExpr::new_hole(); + loc = lex.cur_location(); + } else if (lex.tok() != tok_identifier) { // int, cell, [X], etc. + arg_type = parse_type(lex, forall_list); + } else if (lex.tok() == tok_identifier) { + if (forall_list && forall_list->lookup_idx(lex.cur_str()) != -1) { + arg_type = parse_type(lex, forall_list); + } else { + arg_type = TypeExpr::new_hole(); + } + } else { + lex.error("Is not a type identifier"); + } + if (lex.tok() == tok_underscore || lex.tok() == tok_comma || lex.tok() == tok_clpar) { + if (lex.tok() == tok_underscore) { + loc = lex.cur_location(); + lex.next(); + } + return createV(loc, "", arg_type); + } + lex.check(tok_identifier, "parameter name"); + loc = lex.cur_location(); + std::string_view arg_name = lex.cur_str(); + lex.next(); + return createV(loc, arg_name, arg_type); +} + +AnyV parse_global_var_declaration(Lexer& lex) { + TypeExpr* declared_type = nullptr; + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_underscore) { + lex.next(); + declared_type = TypeExpr::new_hole(); + loc = lex.cur_location(); + } else if (lex.tok() != tok_identifier) { + declared_type = parse_type(lex, nullptr); + } + lex.check(tok_identifier, "global variable name"); + std::string_view var_name = lex.cur_str(); + lex.next(); + return createV(loc, var_name, declared_type); +} + +AnyV parse_expr(Lexer& lex); + +AnyV parse_constant_declaration(Lexer& lex) { + TypeExpr *declared_type = nullptr; + if (lex.tok() == tok_int) { + declared_type = TypeExpr::new_atomic(TypeExpr::_Int); + lex.next(); + } else if (lex.tok() == tok_slice) { + declared_type = TypeExpr::new_atomic(TypeExpr::_Slice); + lex.next(); + } + lex.check(tok_identifier, "constant name"); + SrcLocation loc = lex.cur_location(); + std::string_view const_name = lex.cur_str(); + lex.next(); + lex.expect(tok_assign, "'='"); + AnyV init_value = parse_expr(lex); + return createV(loc, const_name, declared_type, init_value); +} + +AnyV parse_argument_list(Lexer& lex, V forall_list) { + SrcLocation loc = lex.cur_location(); + std::vector args; + lex.expect(tok_oppar, "argument list"); + if (lex.tok() != tok_clpar) { + args.push_back(parse_argument(lex, forall_list)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_argument(lex, forall_list)); + } + } + lex.expect(tok_clpar, "')'"); + return createV(loc, std::move(args)); +} + +AnyV parse_constant_declaration_list(Lexer& lex) { + std::vector consts; + SrcLocation loc = lex.cur_location(); + lex.expect(tok_const, "'const'"); + while (true) { + consts.push_back(parse_constant_declaration(lex)); + if (lex.tok() != tok_comma) { + break; + } + lex.expect(tok_comma, "','"); + } + lex.expect(tok_semicolon, "';'"); + return createV(loc, std::move(consts)); +} + +AnyV parse_global_var_declaration_list(Lexer& lex) { + std::vector globals; + SrcLocation loc = lex.cur_location(); + lex.expect(tok_global, "'global'"); + while (true) { + globals.push_back(parse_global_var_declaration(lex)); + if (lex.tok() != tok_comma) { + break; + } + lex.expect(tok_comma, "','"); + } + lex.expect(tok_semicolon, "';'"); + return createV(loc, std::move(globals)); +} + +// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ +AnyV parse_expr100(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_oppar) { + lex.next(); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, {}); + } + AnyV res = parse_expr(lex); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, res); + } + std::vector items; + bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)` + items.emplace_back(res); + while (lex.tok() == tok_comma) { + lex.next(); + AnyV item = parse_expr(lex); + if (is_type_expression != (item->type == ast_type_expression)) { + lex.error("mixing type and non-type expressions inside the same tuple"); + } + items.emplace_back(item); + } + lex.expect(tok_clpar, "')'"); + if (is_type_expression) { + std::vector types; + types.reserve(items.size()); + for (AnyV item : items) { + types.emplace_back(item->as()->declared_type); + } + return createV(loc, TypeExpr::new_tensor(std::move(types))); + } + return createV(loc, std::move(items)); + } + if (lex.tok() == tok_opbracket) { + lex.next(); + if (lex.tok() == tok_clbracket) { + lex.next(); + return createV(loc, {}); + } + AnyV res = parse_expr(lex); + std::vector items; + bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)` + items.emplace_back(res); + while (lex.tok() == tok_comma) { + lex.next(); + AnyV item = parse_expr(lex); + if (is_type_expression != (item->type == ast_type_expression)) { + lex.error("mixing type and non-type expressions inside the same tuple"); + } + items.emplace_back(item); + } + lex.expect(tok_clbracket, "']'"); + if (is_type_expression) { + std::vector types; + types.reserve(items.size()); + for (AnyV item : items) { + types.emplace_back(item->as()->declared_type); + } + return createV(loc, TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types)))); + } + return createV(loc, std::move(items)); + } + TokenType t = lex.tok(); + if (t == tok_int_const) { + std::string_view int_val = lex.cur_str(); + lex.next(); + return createV(loc, int_val); + } + if (t == tok_string_const) { + std::string_view str_val = lex.cur_str(); + lex.next(); + char modifier = 0; + if (lex.tok() == tok_string_modifier) { + modifier = lex.cur_str()[0]; + lex.next(); + } + return createV(loc, str_val, modifier); + } + if (t == tok_underscore) { + lex.next(); + return createV(loc); + } + if (t == tok_var) { + lex.next(); + return createV(loc, TypeExpr::new_hole()); + } + if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_cont || t == tok_tuple) { + lex.next(); + return createV(loc, TypeExpr::new_atomic(t)); + } + if (t == tok_true || t == tok_false) { + lex.next(); + return createV(loc, t == tok_true); + } + if (t == tok_nil) { + lex.next(); + return createV(loc); + } + if (t == tok_identifier) { + std::string_view str_val = lex.cur_str(); + lex.next(); + return createV(loc, str_val); + } + lex.expect(tok_identifier, "identifier"); + return nullptr; +} + +// parse E { E } +AnyV parse_expr90(Lexer& lex) { + AnyV res = parse_expr100(lex); + while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && lex.cur_str()[0] != '.' && lex.cur_str()[0] != '~')) { + if (const auto* v_type_expr = res->try_as()) { + AnyV dest = parse_expr100(lex); + return createV(v_type_expr->loc, v_type_expr->declared_type, dest); + } else { + AnyV arg = parse_expr100(lex); + return createV(res->loc, res, arg); + } + } + return res; +} + +// parse E { .method E | ~method E } +AnyV parse_expr80(Lexer& lex) { + AnyV lhs = parse_expr90(lex); + while (lex.tok() == tok_identifier && (lex.cur_str()[0] == '.' || lex.cur_str()[0] == '~')) { + std::string_view method_name = lex.cur_str(); + SrcLocation loc = lex.cur_location(); + lex.next(); + const ASTNodeBase *arg = parse_expr100(lex); + lhs = createV(loc, method_name, lhs, arg); + } + return lhs; +} + +// parse [ ~ | - | + ] E +AnyV parse_expr75(Lexer& lex) { + TokenType t = lex.tok(); + if (t == tok_bitwise_not || t == tok_minus || t == tok_plus) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr75(lex); + return createV(loc, operator_name, t, rhs); + } else { + return parse_expr80(lex); + } +} + +// parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E } +AnyV parse_expr30(Lexer& lex) { + AnyV lhs = parse_expr75(lex); + TokenType t = lex.tok(); + while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || t == tok_divC || + t == tok_divR || t == tok_modC || t == tok_modR) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr75(lex); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E { (+ | -) E } +AnyV parse_expr20(Lexer& lex) { + AnyV lhs = parse_expr30(lex); + TokenType t = lex.tok(); + while (t == tok_minus || t == tok_plus) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr30(lex); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E { ( << | >> | ~>> | ^>> ) E } +AnyV parse_expr17(Lexer& lex) { + AnyV lhs = parse_expr20(lex); + TokenType t = lex.tok(); + while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr20(lex); + diagnose_addition_in_bitshift(loc, operator_name, rhs); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E [ (== | < | > | <= | >= | != | <=> ) E ] +AnyV parse_expr15(Lexer& lex) { + AnyV lhs = parse_expr17(lex); + TokenType t = lex.tok(); + if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr17(lex); + lhs = createV(loc, operator_name, t, lhs, rhs); + } + return lhs; +} + +// parse E { ( & | `|` | ^ ) E } +AnyV parse_expr14(Lexer& lex) { + AnyV lhs = parse_expr15(lex); + TokenType t = lex.tok(); + while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr15(lex); + diagnose_bitwise_precedence(loc, operator_name, lhs, rhs); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E [ ? E : E ] +AnyV parse_expr13(Lexer& lex) { + AnyV res = parse_expr14(lex); + if (lex.tok() == tok_question) { + SrcLocation loc = lex.cur_location(); + lex.next(); + AnyV when_true = parse_expr(lex); + lex.expect(tok_colon, "':'"); + AnyV when_false = parse_expr13(lex); + return createV(loc, res, when_true, when_false); + } + return res; +} + +// parse LE1 (= | += | -= | ... ) E2 +AnyV parse_expr10(Lexer& lex) { + AnyV lhs = parse_expr13(lex); + TokenType t = lex.tok(); + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || + t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC || + t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor || + t == tok_assign) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr10(lex); + return createV(loc, operator_name, t, lhs, rhs); + } + return lhs; +} + +AnyV parse_expr(Lexer& lex) { + return parse_expr10(lex); +} + +AnyV parse_return_stmt(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_return, "'return'"); + AnyV child = parse_expr(lex); + lex.expect(tok_semicolon, "';'"); + return createV(loc, child); +} + +AnyV parse_statement(Lexer& lex); + +V parse_sequence(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_opbrace, "'{'"); + std::vector items; + while (lex.tok() != tok_clbrace) { + items.push_back(parse_statement(lex)); + } + SrcLocation loc_end = lex.cur_location(); + lex.expect(tok_clbrace, "'}'"); + return createV(loc, loc_end, items); +} + +AnyV parse_repeat_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_repeat, "'repeat'"); + AnyV cond = parse_expr(lex); + V body = parse_sequence(lex); + return createV(loc, cond, body); +} + +AnyV parse_while_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_while, "'while'"); + AnyV cond = parse_expr(lex); + V body = parse_sequence(lex); + return createV(loc, cond, body); +} + +ASTNodeBase* parse_do_until_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_do, "'do'"); + V body = parse_sequence(lex); + lex.expect(tok_until, "'until'"); + AnyV cond = parse_expr(lex); + return createV(loc, body, cond); +} + +AnyV parse_try_catch_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_try, "'try'"); + V try_body = parse_sequence(lex); + lex.expect(tok_catch, "'catch'"); + AnyV catch_expr = parse_expr(lex); + V catch_body = parse_sequence(lex); + return createV(loc, try_body, catch_expr, catch_body); +} + +AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { + SrcLocation loc = lex.cur_location(); + lex.next(); + AnyV cond = parse_expr(lex); + V if_body = parse_sequence(lex); + V else_body = nullptr; + if (lex.tok() == tok_else) { + lex.next(); + else_body = parse_sequence(lex); + } else if (lex.tok() == tok_elseif) { + AnyV v_inner_if = parse_if_statement(lex, false); + else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); + } else if (lex.tok() == tok_elseifnot) { + AnyV v_inner_if = parse_if_statement(lex, true); + else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); + } else { + else_body = createV(lex.cur_location(), lex.cur_location(), {}); + } + return createV(loc, is_ifnot, cond, if_body, else_body); +} + +AnyV parse_statement(Lexer& lex) { + switch (lex.tok()) { + case tok_return: + return parse_return_stmt(lex); + case tok_opbrace: + return parse_sequence(lex); + case tok_repeat: + return parse_repeat_statement(lex); + case tok_if: + return parse_if_statement(lex, false); + case tok_ifnot: + return parse_if_statement(lex, true); + case tok_do: + return parse_do_until_statement(lex); + case tok_while: + return parse_while_statement(lex); + case tok_try: + return parse_try_catch_statement(lex); + case tok_semicolon: { + lex.next(); + return createV; + } + default: { + AnyV expr = parse_expr(lex); + lex.expect(tok_semicolon, "';'"); + return expr; + } + } +} + +AnyV parse_func_body(Lexer& lex) { + return parse_sequence(lex); +} + +AnyV parse_asm_func_body(Lexer& lex, V arg_list) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_asm, "'asm'"); + size_t n_args = arg_list->size(); + if (n_args > 16) { + throw ParseError{loc, "assembler built-in function can have at most 16 arguments"}; + } + std::vector arg_order, ret_order; + if (lex.tok() == tok_oppar) { + lex.next(); + while (lex.tok() == tok_identifier || lex.tok() == tok_int_const) { + int arg_idx = arg_list->lookup_idx(lex.cur_str()); + if (arg_idx == -1) { + lex.error("argument name expected"); + } + arg_order.push_back(arg_idx); + lex.next(); + } + if (lex.tok() == tok_mapsto) { + lex.next(); + while (lex.tok() == tok_int_const) { + int ret_idx = std::atoi(static_cast(lex.cur_str()).c_str()); + ret_order.push_back(ret_idx); + lex.next(); + } + } + lex.expect(tok_clpar, "')'"); + } + std::vector asm_commands; + lex.check(tok_string_const, "\"ASM COMMAND\""); + while (lex.tok() == tok_string_const) { + std::string_view asm_command = lex.cur_str(); + asm_commands.push_back(createV(lex.cur_location(), asm_command, 0)); + lex.next(); + } + lex.expect(tok_semicolon, "';'"); + return createV(loc, std::move(arg_order), std::move(ret_order), std::move(asm_commands)); +} + +AnyV parse_forall(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + std::vector forall_items; + lex.expect(tok_forall, "'forall'"); + int idx = 0; + while (true) { + lex.check(tok_identifier, "T expected"); + std::string_view nameT = lex.cur_str(); + TypeExpr* type = TypeExpr::new_var(idx++); + forall_items.emplace_back(createV(lex.cur_location(), type, static_cast(nameT))); + lex.next(); + if (lex.tok() != tok_comma) { + break; + } + lex.next(); + } + lex.expect(tok_mapsto, "'->'"); + return createV{loc, std::move(forall_items)}; +} + +AnyV parse_function_declaration(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + V forall_list = nullptr; + bool is_get_method = false; + bool is_builtin = false; + bool marked_as_inline = false; + bool marked_as_inline_ref = false; + if (lex.tok() == tok_forall) { + forall_list = parse_forall(lex)->as(); + } else if (lex.tok() == tok_get) { + is_get_method = true; + lex.next(); + } + TypeExpr* ret_type = parse_type(lex, forall_list); + lex.check(tok_identifier, "function name identifier expected"); + std::string func_name = static_cast(lex.cur_str()); + lex.next(); + V arg_list = parse_argument_list(lex, forall_list)->as(); + bool marked_as_pure = false; + if (lex.tok() == tok_impure) { + static bool warning_shown = false; + if (!warning_shown) { + lex.cur_location().show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure"); + warning_shown = true; + } + lex.next(); + } else if (lex.tok() == tok_pure) { + marked_as_pure = true; + lex.next(); + } + if (lex.tok() == tok_inline) { + marked_as_inline = true; + lex.next(); + } else if (lex.tok() == tok_inlineref) { + marked_as_inline_ref = true; + lex.next(); + } + V method_id = nullptr; + if (lex.tok() == tok_method_id) { + if (is_get_method) { + lex.error("both `get` and `method_id` are not allowed"); + } + lex.next(); + if (lex.tok() == tok_oppar) { // method_id(N) + lex.next(); + lex.check(tok_int_const, "number"); + std::string_view int_val = lex.cur_str(); + method_id = createV(lex.cur_location(), int_val); + lex.next(); + lex.expect(tok_clpar, "')'"); + } else { + static bool warning_shown = false; + if (!warning_shown) { + lex.cur_location().show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`"); + warning_shown = true; + } + is_get_method = true; + } + } + + AnyV body = nullptr; + + if (lex.tok() == tok_builtin) { + is_builtin = true; + body = createV; + lex.next(); + lex.expect(tok_semicolon, "';'"); + } else if (lex.tok() == tok_semicolon) { + // todo this is just a prototype, remove this "feature" in the future + lex.next(); + body = createV; + } else if (lex.tok() == tok_opbrace) { + body = parse_func_body(lex); + } else if (lex.tok() == tok_asm) { + body = parse_asm_func_body(lex, arg_list); + } else { + lex.expect(tok_opbrace, "function body block"); + } + + auto f_declaration = createV(loc, func_name, arg_list, body); + f_declaration->ret_type = ret_type; + f_declaration->forall_list = forall_list; + f_declaration->marked_as_pure = marked_as_pure; + f_declaration->marked_as_get_method = is_get_method; + f_declaration->marked_as_builtin = is_builtin; + f_declaration->marked_as_inline = marked_as_inline; + f_declaration->marked_as_inline_ref = marked_as_inline_ref; + f_declaration->method_id = method_id; + return f_declaration; +} + +AnyV parse_pragma(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.next_special(tok_pragma_name, "pragma name"); + std::string_view pragma_name = lex.cur_str(); + if (pragma_name == "version") { + lex.next(); + TokenType cmp_tok = lex.tok(); + bool valid = cmp_tok == tok_gt || cmp_tok == tok_geq || cmp_tok == tok_lt || cmp_tok == tok_leq || cmp_tok == tok_eq || cmp_tok == tok_bitwise_xor; + if (!valid) { + lex.error("invalid comparison operator"); + } + lex.next_special(tok_semver, "semver"); + std::string_view semver = lex.cur_str(); + lex.next(); + lex.expect(tok_semicolon, "';'"); + return createV(loc, cmp_tok, semver); + } + lex.next(); + lex.expect(tok_semicolon, "';'"); + return createV(loc, pragma_name); +} + +AnyV parse_include_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_include, "#include"); + lex.check(tok_string_const, "source file name"); + std::string_view rel_filename = lex.cur_str(); + if (rel_filename.empty()) { + lex.error("imported file name is an empty string"); + } + lex.next(); + lex.expect(tok_semicolon, "';'"); + return createV(loc, rel_filename); +} + +// the main (exported) function +AnyV parse_src_file_to_ast(SrcFile* file) { + file->was_parsed = true; + + std::vector toplevel_declarations; + Lexer lex(file); + while (!lex.is_eof()) { + if (lex.tok() == tok_pragma) { + toplevel_declarations.push_back(parse_pragma(lex)); + } else if (lex.tok() == tok_include) { + toplevel_declarations.push_back(parse_include_statement(lex)); + } else if (lex.tok() == tok_global) { + toplevel_declarations.push_back(parse_global_var_declaration_list(lex)); + } else if (lex.tok() == tok_const) { + toplevel_declarations.push_back(parse_constant_declaration_list(lex)); + } else { + toplevel_declarations.push_back(parse_function_declaration(lex)); + } + } + return createV(file, std::move(toplevel_declarations)); +} + +} // namespace tolk diff --git a/tolk/ast-from-tokens.h b/tolk/ast-from-tokens.h new file mode 100644 index 00000000..65b82ad6 --- /dev/null +++ b/tolk/ast-from-tokens.h @@ -0,0 +1,27 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" + +namespace tolk { + +struct ASTNodeBase; + +const ASTNodeBase* parse_src_file_to_ast(SrcFile* file); + +} // namespace tolk diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h new file mode 100644 index 00000000..feae5616 --- /dev/null +++ b/tolk/ast-replacer.h @@ -0,0 +1,155 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" +#include "platform-utils.h" + +/* + * A module of implementing traversing a vertex tree and replacing any vertex to another. + * For example, to replace "beginCell()" call to "begin_cell()" in a function body (in V) + * regardless of the place this call is performed, you need to iterate over all the function AST, + * to find ast_function_call(beginCell), create ast_function_call(begin_cell) instead and to replace + * a pointer inside its parent. + * Inheriting from ASTVisitor makes this task quite simple, without any boilerplate. + * + * If you need just to traverse a vertex tree without replacing vertices, + * consider another api: ast-visitor.h. + */ + +namespace tolk { + +class ASTReplacer { +protected: + GNU_ATTRIBUTE_ALWAYS_INLINE static AnyV replace_children(const ASTNodeLeaf* v) { + return v; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeUnary* v) { + auto* v_mutable = const_cast(v); + v_mutable->child = replace(v_mutable->child); + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeBinary* v) { + auto* v_mutable = const_cast(v); + v_mutable->lhs = replace(v->lhs); + v_mutable->rhs = replace(v->rhs); + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeVararg* v) { + auto* v_mutable = const_cast(v); + for (AnyV& child : v_mutable->children) { + child = replace(child); + } + return v_mutable; + } + +public: + virtual ~ASTReplacer() = default; + + virtual AnyV replace(AnyV v) = 0; +}; + +class ASTReplacerInFunctionBody : public ASTReplacer { +protected: + using parent = ASTReplacerInFunctionBody; + + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + + AnyV replace(AnyV v) final { + switch (v->type) { + case ast_empty: return replace(v->as()); + case ast_identifier: return replace(v->as()); + case ast_int_const: return replace(v->as()); + case ast_string_const: return replace(v->as()); + case ast_bool_const: return replace(v->as()); + case ast_nil_tuple: return replace(v->as()); + case ast_function_call: return replace(v->as()); + case ast_parenthesized_expr: return replace(v->as()); + case ast_underscore: return replace(v->as()); + case ast_type_expression: return replace(v->as()); + case ast_variable_declaration: return replace(v->as()); + case ast_tensor: return replace(v->as()); + case ast_tensor_square: return replace(v->as()); + case ast_dot_tilde_call: return replace(v->as()); + case ast_unary_operator: return replace(v->as()); + case ast_binary_operator: return replace(v->as()); + case ast_ternary_operator: return replace(v->as()); + case ast_return_statement: return replace(v->as()); + case ast_sequence: return replace(v->as()); + case ast_repeat_statement: return replace(v->as()); + case ast_while_statement: return replace(v->as()); + case ast_do_until_statement: return replace(v->as()); + case ast_try_catch_statement: return replace(v->as()); + case ast_if_statement: return replace(v->as()); + case ast_asm_body: return replace(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::visit"); + } + } + +public: + void start_replacing_in_function(V v) { + replace(v->get_body()); + } +}; + +class ASTReplacerAllFunctionsInFile : public ASTReplacerInFunctionBody { +protected: + using parent = ASTReplacerAllFunctionsInFile; + + virtual bool should_enter_function(V v) = 0; + +public: + void start_replacing_in_file(V v_file) { + for (AnyV v : v_file->get_toplevel_declarations()) { + if (auto v_function = v->try_as()) { + if (should_enter_function(v_function)) { + replace(v_function->get_body()); + } + } + } + } +}; + +} // namespace tolk diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h new file mode 100644 index 00000000..399017a7 --- /dev/null +++ b/tolk/ast-stringifier.h @@ -0,0 +1,233 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#ifdef TOLK_DEBUG + +#include "ast.h" +#include "ast-visitor.h" +#include + +/* + * ASTStringifier is used to print out the whole vertex tree in a human-readable format. + * To stringify any vertex, call v->debug_print(), which uses this class. + */ + +namespace tolk { + +class ASTStringifier final : public ASTVisitor { + constexpr static std::pair name_pairs[] = { + {ast_empty, "ast_empty"}, + {ast_identifier, "ast_identifier"}, + {ast_int_const, "ast_int_const"}, + {ast_string_const, "ast_string_const"}, + {ast_bool_const, "ast_bool_const"}, + {ast_nil_tuple, "ast_nil_tuple"}, + {ast_function_call, "ast_function_call"}, + {ast_parenthesized_expr, "ast_parenthesized_expr"}, + {ast_global_var_declaration, "ast_global_var_declaration"}, + {ast_global_var_declaration_list, "ast_global_var_declaration_list"}, + {ast_constant_declaration, "ast_constant_declaration"}, + {ast_constant_declaration_list, "ast_constant_declaration_list"}, + {ast_underscore, "ast_underscore"}, + {ast_type_expression, "ast_type_expression"}, + {ast_variable_declaration, "ast_variable_declaration"}, + {ast_tensor, "ast_tensor"}, + {ast_tensor_square, "ast_tensor_square"}, + {ast_dot_tilde_call, "ast_dot_tilde_call"}, + {ast_unary_operator, "ast_unary_operator"}, + {ast_binary_operator, "ast_binary_operator"}, + {ast_ternary_operator, "ast_ternary_operator"}, + {ast_return_statement, "ast_return_statement"}, + {ast_sequence, "ast_sequence"}, + {ast_repeat_statement, "ast_repeat_statement"}, + {ast_while_statement, "ast_while_statement"}, + {ast_do_until_statement, "ast_do_until_statement"}, + {ast_try_catch_statement, "ast_try_catch_statement"}, + {ast_if_statement, "ast_if_statement"}, + {ast_forall_item, "ast_forall_item"}, + {ast_forall_list, "ast_forall_list"}, + {ast_argument, "ast_argument"}, + {ast_argument_list, "ast_argument_list"}, + {ast_asm_body, "ast_asm_body"}, + {ast_function_declaration, "ast_function_declaration"}, + {ast_pragma_no_arg, "ast_pragma_no_arg"}, + {ast_pragma_version, "ast_pragma_version"}, + {ast_include_statement, "ast_include_statement"}, + {ast_tolk_file, "ast_tolk_file"}, + }; + + template + constexpr static const char* ast_node_type_to_string() { + static_assert(std::size(name_pairs) == ast_tolk_file + 1, "name_pairs needs to be updated"); + return name_pairs[node_type].second; + } + + int depth = 0; + std::string out; + bool colored = false; + + template + void handle_vertex(V v) { + out += std::string(depth * 2, ' '); + out += ast_node_type_to_string(); + if (std::string postfix = specific_str(v); !postfix.empty()) { + out += colored ? " \x1b[34m" : " // "; + out += postfix; + out += colored ? "\x1b[0m" : ""; + } + out += '\n'; + depth++; + visit_children(v); + depth--; + } + + static std::string specific_str(AnyV node) { + switch (node->type) { + case ast_identifier: + return static_cast(node->as()->name); + case ast_int_const: + return static_cast(node->as()->int_val); + case ast_string_const: + if (char modifier = node->as()->modifier) { + return "\"" + static_cast(node->as()->str_val) + "\"" + std::string(1, modifier); + } else { + return "\"" + static_cast(node->as()->str_val) + "\""; + } + case ast_global_var_declaration: + return static_cast(node->as()->var_name); + case ast_constant_declaration: + return static_cast(node->as()->const_name); + case ast_type_expression: { + std::ostringstream os; + os << node->as()->declared_type; + return os.str(); + } + case ast_variable_declaration: { + std::ostringstream os; + os << node->as()->declared_type; + return os.str(); + } + case ast_dot_tilde_call: + return static_cast(node->as()->method_name); + case ast_unary_operator: + return static_cast(node->as()->operator_name); + case ast_binary_operator: + return static_cast(node->as()->operator_name); + case ast_sequence: + return "↓" + std::to_string(node->as()->get_items().size()); + case ast_if_statement: + return node->as()->is_ifnot ? "ifnot" : ""; + case ast_argument: { + std::ostringstream os; + os << node->as()->arg_type; + return static_cast(node->as()->arg_name) + ": " + os.str(); + } + case ast_function_declaration: { + std::string arg_names; + for (int i = 0; i < node->as()->get_num_args(); i++) { + if (!arg_names.empty()) + arg_names += ","; + arg_names += node->as()->get_arg(i)->arg_name; + } + return "fun " + node->as()->name + "(" + arg_names + ")"; + } + case ast_pragma_no_arg: + return static_cast(node->as()->pragma_name); + case ast_pragma_version: + return static_cast(node->as()->semver); + case ast_include_statement: + return static_cast(node->as()->file_name); + case ast_tolk_file: + return node->as()->file->rel_filename; + default: + return {}; + } + } + +public: + explicit ASTStringifier(bool colored) : colored(colored) { + } + + std::string to_string_with_children(AnyV v) { + out.clear(); + visit(v); + return std::move(out); + } + + static std::string to_string_without_children(AnyV v) { + std::string result = ast_node_type_to_string(v->type); + if (std::string postfix = specific_str(v); !postfix.empty()) { + result += ' '; + result += specific_str(v); + } + return result; + } + + static const char* ast_node_type_to_string(ASTNodeType node_type) { + return name_pairs[node_type].second; + } + + void visit(AnyV v) override { + switch (v->type) { + case ast_empty: return handle_vertex(v->as()); + case ast_identifier: return handle_vertex(v->as()); + case ast_int_const: return handle_vertex(v->as()); + case ast_string_const: return handle_vertex(v->as()); + case ast_bool_const: return handle_vertex(v->as()); + case ast_nil_tuple: return handle_vertex(v->as()); + case ast_function_call: return handle_vertex(v->as()); + case ast_parenthesized_expr: return handle_vertex(v->as()); + case ast_global_var_declaration: return handle_vertex(v->as()); + case ast_global_var_declaration_list: return handle_vertex(v->as()); + case ast_constant_declaration: return handle_vertex(v->as()); + case ast_constant_declaration_list: return handle_vertex(v->as()); + case ast_underscore: return handle_vertex(v->as()); + case ast_type_expression: return handle_vertex(v->as()); + case ast_variable_declaration: return handle_vertex(v->as()); + case ast_tensor: return handle_vertex(v->as()); + case ast_tensor_square: return handle_vertex(v->as()); + case ast_dot_tilde_call: return handle_vertex(v->as()); + case ast_unary_operator: return handle_vertex(v->as()); + case ast_binary_operator: return handle_vertex(v->as()); + case ast_ternary_operator: return handle_vertex(v->as()); + case ast_return_statement: return handle_vertex(v->as()); + case ast_sequence: return handle_vertex(v->as()); + case ast_repeat_statement: return handle_vertex(v->as()); + case ast_while_statement: return handle_vertex(v->as()); + case ast_do_until_statement: return handle_vertex(v->as()); + case ast_try_catch_statement: return handle_vertex(v->as()); + case ast_if_statement: return handle_vertex(v->as()); + case ast_forall_item: return handle_vertex(v->as()); + case ast_forall_list: return handle_vertex(v->as()); + case ast_argument: return handle_vertex(v->as()); + case ast_argument_list: return handle_vertex(v->as()); + case ast_asm_body: return handle_vertex(v->as()); + case ast_function_declaration: return handle_vertex(v->as()); + case ast_pragma_no_arg: return handle_vertex(v->as()); + case ast_pragma_version: return handle_vertex(v->as()); + case ast_include_statement: return handle_vertex(v->as()); + case ast_tolk_file: return handle_vertex(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTStringifier::visit"); + } + } +}; + +} // namespace tolk + +#endif // TOLK_DEBUG diff --git a/tolk/ast-to-legacy.cpp b/tolk/ast-to-legacy.cpp new file mode 100644 index 00000000..8b0e7810 --- /dev/null +++ b/tolk/ast-to-legacy.cpp @@ -0,0 +1,1438 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "ast-to-legacy.h" +#include "ast.h" +#include "ast-visitor.h" +#include "ast-from-tokens.h" // todo should be deleted +#include "compiler-state.h" +#include "src-file.h" +#include "tolk.h" +#include "td/utils/crypto.h" +#include "common/refint.h" +#include "openssl/digest.hpp" +#include "block/block.h" +#include "block-parse.h" + +/* + * In this module, we convert modern AST representation to legacy representation + * (global state, Expr, CodeBlob, etc.) to make the rest of compiling process remain unchanged for now. + * Since time goes, I'll gradually get rid of legacy, since most of the code analysis + * should be done at AST level. + */ + +namespace tolk { + +static int calc_sym_idx(std::string_view sym_name) { + return G.symbols.lookup_add(sym_name); +} + + +Expr* process_expr(AnyV v, CodeBlob& code, bool nv = false); + +static SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) { + SymValCodeFunc* res = new SymValCodeFunc{G.glob_func_cnt, func_type, marked_as_pure}; +#ifdef TOLK_DEBUG + res->name = func_sym->name(); +#endif + func_sym->value = res; + G.glob_func.push_back(func_sym); + G.glob_func_cnt++; + return res; +} + +static bool check_global_func(SrcLocation loc, sym_idx_t func_name) { + SymDef* def = lookup_symbol(func_name); + if (!def) { + throw ParseError(loc, "undefined symbol `" + G.symbols.get_name(func_name) + "`"); + return false; + } + SymVal* val = dynamic_cast(def->value); + if (!val) { + throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no value and no type"); + return false; + } else if (!val->get_type()) { + throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no type, possibly not a function"); + return false; + } else { + return true; + } +} + +static Expr* make_func_apply(Expr* fun, Expr* x) { + Expr* res{nullptr}; + if (fun->cls == Expr::_GlobFunc) { + if (x->cls == Expr::_Tensor) { + res = new Expr{Expr::_Apply, fun->sym, x->args}; + } else { + res = new Expr{Expr::_Apply, fun->sym, {x}}; + } + res->flags = Expr::_IsRvalue | (fun->flags & Expr::_IsImpure); + } else { + res = new Expr{Expr::_VarApply, {fun, x}}; + res->flags = Expr::_IsRvalue; + } + return res; +} + +static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_sym) { + if (!v_usage->loc.is_symbol_from_same_or_builtin_file(used_sym->loc)) { + const SrcFile* declared_in = used_sym->loc.get_src_file(); + bool has_import = false; + for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { + if (import_stmt.imported_file == declared_in) { + has_import = true; + } + } + if (!has_import) { + v_usage->error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + } + } +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + TokenType t = v->tok; + std::string operator_name = static_cast(v->operator_name); + + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || + t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC || + t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { + Expr* x = process_expr(v->get_lhs(), code, nv); + x->chk_lvalue(); + x->chk_rvalue(); + sym_idx_t name = G.symbols.lookup_add("^_" + operator_name + "_"); + check_global_func(v->loc, name); + Expr* y = process_expr(v->get_rhs(), code, false); + y->chk_rvalue(); + Expr* z = new Expr{Expr::_Apply, name, {x, y}}; + z->here = v->loc; + z->set_val(t); + z->flags = Expr::_IsRvalue; + z->deduce_type(); + Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; + res->here = v->loc; + res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; + res->set_val(t); + res->deduce_type(); + return res; + } + if (t == tok_assign) { + Expr* x = process_expr(v->get_lhs(), code, nv); + x->chk_lvalue(); + Expr* y = process_expr(v->get_rhs(), code, false); + y->chk_rvalue(); + x->predefine_vars(); + x->define_new_vars(code); + Expr* res = new Expr{Expr::_Letop, {x, y}}; + res->here = v->loc; + res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; + res->set_val(t); + res->deduce_type(); + return res; + } + if (t == tok_minus || t == tok_plus || + t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor || + t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship || + t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR || + t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || + t == tok_divC || t == tok_divR || t == tok_modC || t == tok_modR) { + Expr* res = process_expr(v->get_lhs(), code, nv); + res->chk_rvalue(); + sym_idx_t name = G.symbols.lookup_add("_" + operator_name + "_"); + check_global_func(v->loc, name); + Expr* x = process_expr(v->get_rhs(), code, false); + x->chk_rvalue(); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = v->loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; + } + + v->error("unsupported binary operator"); +} + +Expr* process_expr(V v, CodeBlob& code) { + TokenType t = v->tok; + sym_idx_t name = G.symbols.lookup_add(static_cast(v->operator_name) + "_"); + check_global_func(v->loc, name); + Expr* x = process_expr(v->get_rhs(), code, false); + x->chk_rvalue(); + + // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) + // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later) + // in FunC, it was: + // `var fst = -1;` // is constantly 1 + // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on + // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), + // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here + if (x->cls == Expr::_Const) { + if (t == tok_bitwise_not) { + x->intval = ~x->intval; + } else if (t == tok_minus) { + x->intval = -x->intval; + } + if (!x->intval->signed_fits_bits(257)) { + v->error("integer overflow"); + } + return x; + } + + auto res = new Expr{Expr::_Apply, name, {x}}; + res->here = v->loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + Expr* res = process_expr(v->get_lhs(), code, nv); + bool modify = v->method_name[0] == '~'; + Expr* obj = res; + if (modify) { + obj->chk_lvalue(); + } else { + obj->chk_rvalue(); + } + sym_idx_t name = calc_sym_idx(v->method_name); + const SymDef* sym = lookup_symbol(name); + if (!sym || !dynamic_cast(sym->value)) { + sym_idx_t name1 = G.symbols.lookup(v->method_name.substr(1)); + if (name1) { + const SymDef* sym1 = lookup_symbol(name1); + if (sym1 && dynamic_cast(sym1->value)) { + name = name1; + sym = sym1; + } + } + } + check_global_func(v->loc, name); + if (G.is_verbosity(2)) { + std::cerr << "using symbol `" << G.symbols.get_name(name) << "` for method call of " << v->method_name << std::endl; + } + sym = lookup_symbol(name); + SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; + if (!val) { + v->error("undefined method call"); + } + Expr* x = process_expr(v->get_arg(), code, false); + x->chk_rvalue(); + if (x->cls == Expr::_Tensor) { + res = new Expr{Expr::_Apply, name, {obj}}; + res->args.insert(res->args.end(), x->args.begin(), x->args.end()); + } else { + res = new Expr{Expr::_Apply, name, {obj, x}}; + } + res->here = v->loc; + res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure); + res->deduce_type(); + if (modify) { + auto tmp = res; + res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; + res->here = v->loc; + res->flags = tmp->flags; + res->set_val(name); + res->deduce_type(); + } + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + Expr* cond = process_expr(v->get_cond(), code, nv); + cond->chk_rvalue(); + Expr* x = process_expr(v->get_when_true(), code, false); + x->chk_rvalue(); + Expr* y = process_expr(v->get_when_false(), code, false); + y->chk_rvalue(); + Expr* res = new Expr{Expr::_CondExpr, {cond, x, y}}; + res->here = v->loc; + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + Expr* res = process_expr(v->get_called_f(), code, nv); + Expr* x = process_expr(v->get_called_arg(), code, false); + x->chk_rvalue(); + res = make_func_apply(res, x); + res->here = v->loc; + res->deduce_type(); + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + if (v->empty()) { + Expr* res = new Expr{Expr::_Tensor, {}}; + res->flags = Expr::_IsRvalue; + res->here = v->loc; + res->e_type = TypeExpr::new_unit(); + return res; + } + + Expr* res = process_expr(v->get_item(0), code, nv); + std::vector type_list; + type_list.push_back(res->e_type); + int f = res->flags; + res = new Expr{Expr::_Tensor, {res}}; + for (int i = 1; i < v->size(); ++i) { + Expr* x = process_expr(v->get_item(i), code, nv); + res->pb_arg(x); + f &= x->flags; + type_list.push_back(x->e_type); + } + res->here = v->loc; + res->flags = f; + res->e_type = TypeExpr::new_tensor(std::move(type_list)); + return res; +} + +Expr* process_expr(V v, CodeBlob& code) { + Expr* x = process_expr(v->get_variable_or_list(), code, true); + x->chk_lvalue(); // chk_lrvalue() ? + Expr* res = new Expr{Expr::_TypeApply, {x}}; + res->e_type = v->declared_type; + res->here = v->loc; + try { + unify(res->e_type, x->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot transform expression of type " << x->e_type << " to explicitly requested type " << res->e_type + << ": " << ue; + v->error(os.str()); + } + res->flags = x->flags; + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + if (v->empty()) { + Expr* res = new Expr{Expr::_Tensor, {}}; + res->flags = Expr::_IsRvalue; + res->here = v->loc; + res->e_type = TypeExpr::new_unit(); + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = Expr::_IsRvalue; + res->here = v->loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + return res; + } + + Expr* res = process_expr(v->get_item(0), code, nv); + std::vector type_list; + type_list.push_back(res->e_type); + int f = res->flags; + res = new Expr{Expr::_Tensor, {res}}; + for (int i = 1; i < v->size(); ++i) { + Expr* x = process_expr(v->get_item(i), code, nv); + res->pb_arg(x); + f &= x->flags; + type_list.push_back(x->e_type); + } + res->here = v->loc; + res->flags = f; + res->e_type = TypeExpr::new_tensor(std::move(type_list), false); + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = f; + res->here = v->loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + return res; +} + +Expr* process_expr(V v) { + Expr* res = new Expr{Expr::_Const, v->loc}; + res->flags = Expr::_IsRvalue; + res->intval = td::string_to_int256(static_cast(v->int_val)); + if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { + v->error("invalid integer constant"); + } + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); + return res; +} + +Expr* process_expr(V v) { + std::string str = static_cast(v->str_val); + Expr* res; + switch (v->modifier) { + case 0: + case 's': + case 'a': + res = new Expr{Expr::_SliceConst, v->loc}; + res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); + break; + case 'u': + case 'h': + case 'H': + case 'c': + res = new Expr{Expr::_Const, v->loc}; + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); + break; + default: + v->error("invalid string modifier '" + std::string(1, v->modifier) + "'"); + } + res->flags = Expr::_IsRvalue; + switch (v->modifier) { + case 0: { + res->strval = td::hex_encode(str); + break; + } + case 's': { + res->strval = str; + unsigned char buff[128]; + int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); + if (bits < 0) { + v->error("Invalid hex bitstring constant '" + str + "'"); + } + break; + } + case 'a': { // MsgAddressInt + // todo rewrite stdaddress parsing (if done, CMake dep "ton_crypto" can be replaced with "ton_crypto_core") + block::StdAddress a; + if (a.parse_addr(str)) { + res->strval = block::tlb::MsgAddressInt().pack_std_address(a)->as_bitslice().to_hex(); + } else { + v->error("invalid standard address '" + str + "'"); + } + break; + } + case 'u': { + res->intval = td::hex_string_to_int256(td::hex_encode(str)); + if (str.empty()) { + v->error("empty integer ascii-constant"); + } + if (res->intval.is_null()) { + v->error("too long integer ascii-constant"); + } + break; + } + case 'h': + case 'H': { + unsigned char hash[32]; + digest::hash_str(hash, str.data(), str.size()); + res->intval = td::bits_to_refint(hash, (v->modifier == 'h') ? 32 : 256, false); + break; + } + case 'c': { + res->intval = td::make_refint(td::crc32(td::Slice{str})); + break; + } + default: + __builtin_unreachable(); + } + return res; +} + +Expr* process_expr(V v) { + SymDef* sym = lookup_symbol(calc_sym_idx(v->bool_val ? "true" : "false")); + tolk_assert(sym); + Expr* res = new Expr{Expr::_Apply, sym, {}}; + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; +} + +Expr* process_expr([[maybe_unused]] V v) { + SymDef* sym = lookup_symbol(calc_sym_idx("nil")); + tolk_assert(sym); + Expr* res = new Expr{Expr::_Apply, sym, {}}; + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; +} + +Expr* process_expr(V v, bool nv) { + SymDef* sym = lookup_symbol(calc_sym_idx(v->name)); + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(v, sym); + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_GlobVar, v->loc}; + res->e_type = val->get_type(); + res->sym = sym; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; + return res; + } + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(v, sym); + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_None, v->loc}; + res->flags = Expr::_IsRvalue; + if (val->get_kind() == SymValConst::IntConst) { + res->cls = Expr::_Const; + res->intval = val->get_int_value(); + res->e_type = TypeExpr::new_atomic(tok_int); + } else if (val->get_kind() == SymValConst::SliceConst) { + res->cls = Expr::_SliceConst; + res->strval = val->get_str_value(); + res->e_type = TypeExpr::new_atomic(tok_slice); + } else { + v->error("Invalid symbolic constant type"); + } + return res; + } + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(v, sym); + } + Expr* res = new Expr{Expr::_Var, v->loc}; + if (nv) { + res->val = ~calc_sym_idx(v->name); + res->e_type = TypeExpr::new_hole(); + res->flags = Expr::_IsLvalue; + // std::cerr << "defined new variable " << lex.cur().str << " : " << res->e_type << std::endl; + } else { + if (!sym) { + check_global_func(v->loc, calc_sym_idx(v->name)); + sym = lookup_symbol(calc_sym_idx(v->name)); + } + res->sym = sym; + SymVal* val = nullptr; + bool impure = false; + if (sym) { + val = dynamic_cast(sym->value); + } + if (!val) { + v->error("undefined identifier '" + static_cast(v->name) + "'"); + } + if (val->kind == SymValKind::_Func) { + res->e_type = val->get_type(); + res->cls = Expr::_GlobFunc; + impure = !dynamic_cast(val)->is_marked_as_pure(); + } else { + tolk_assert(val->idx >= 0); + res->val = val->idx; + res->e_type = val->get_type(); + // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; + } + // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0); + } + res->deduce_type(); + return res; +} + +Expr* process_expr(AnyV v, CodeBlob& code, bool nv) { + switch (v->type) { + case ast_binary_operator: + return process_expr(v->as(), code, nv); + case ast_unary_operator: + return process_expr(v->as(), code); + case ast_dot_tilde_call: + return process_expr(v->as(), code, nv); + case ast_ternary_operator: + return process_expr(v->as(), code, nv); + case ast_function_call: + return process_expr(v->as(), code, nv); + case ast_parenthesized_expr: + return process_expr(v->as()->get_expr(), code, nv); + case ast_variable_declaration: + return process_expr(v->as(), code); + case ast_tensor: + return process_expr(v->as(), code, nv); + case ast_tensor_square: + return process_expr(v->as(), code, nv); + case ast_int_const: + return process_expr(v->as()); + case ast_string_const: + return process_expr(v->as()); + case ast_bool_const: + return process_expr(v->as()); + case ast_nil_tuple: + return process_expr(v->as()); + case ast_identifier: + return process_expr(v->as(), nv); + + case ast_underscore: { + Expr* res = new Expr{Expr::_Hole, v->loc}; + res->val = -1; + res->flags = Expr::_IsLvalue; + res->e_type = TypeExpr::new_hole(); + return res; + } + case ast_type_expression: { + Expr* res = new Expr{Expr::_Type, v->loc}; + res->flags = Expr::_IsType; + res->e_type = v->as()->declared_type; + return res; + } + default: + throw UnexpectedASTNodeType(v, "process_expr"); + } +} + +namespace blk_fl { +enum { end = 1, ret = 2, empty = 4 }; +typedef int val; +constexpr val init = end | empty; +void combine(val& x, const val y) { + x |= y & ret; + x &= y | ~(end | empty); +} +void combine_parallel(val& x, const val y) { + x &= y | ~(ret | empty); + x |= y & end; +} +} // namespace blk_fl + +blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_return_value(), code); + expr->chk_rvalue(); + try { + // std::cerr << "in return: "; + unify(expr->e_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; + v->error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); + return blk_fl::ret; +} + +void append_implicit_ret_stmt(V v, CodeBlob& code) { + TypeExpr* ret_type = TypeExpr::new_unit(); + try { + // std::cerr << "in implicit return: "; + unify(ret_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with implicit end-of-block return type " << ret_type << ": " << ue; + throw ParseError(v->loc_end, os.str()); + } + code.emplace_back(v->loc_end, Op::_Return); +} + +blk_fl::val process_stmt(AnyV v, CodeBlob& code); + +blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { + if (!no_new_scope) { + open_scope(v->loc); + } + blk_fl::val res = blk_fl::init; + bool warned = false; + for (AnyV item : v->get_items()) { + if (!(res & blk_fl::end) && !warned) { + item->loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, process_stmt(item, code)); + } + if (!no_new_scope) { + close_scope(); + } + return res; +} + +blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + v->get_cond()->error("repeat count value is not a singleton"); + } + Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); + code.push_set_cur(repeat_op.block0); + blk_fl::val res = process_vertex(v->get_body(), code); + code.close_pop_cur(v->get_body()->loc_end); + return res | blk_fl::end; +} + +blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + Op& while_op = code.emplace_back(v->loc, Op::_While); + code.push_set_cur(while_op.block0); + while_op.left = expr->pre_compile(code); + code.close_pop_cur(v->get_body()->loc); + if (while_op.left.size() != 1) { + v->get_cond()->error("while condition value is not a singleton"); + } + code.push_set_cur(while_op.block1); + blk_fl::val res1 = process_vertex(v->get_body(), code); + code.close_pop_cur(v->get_body()->loc_end); + return res1 | blk_fl::end; +} + +blk_fl::val process_vertex(V v, CodeBlob& code) { + Op& until_op = code.emplace_back(v->loc, Op::_Until); + code.push_set_cur(until_op.block0); + open_scope(v->loc); + blk_fl::val res = process_vertex(v->get_body(), code, true); + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + close_scope(); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`until` condition value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + until_op.left = expr->pre_compile(code); + code.close_pop_cur(v->get_body()->loc_end); + if (until_op.left.size() != 1) { + v->get_cond()->error("`until` condition value is not a singleton"); + } + return res & ~blk_fl::empty; +} + +blk_fl::val process_vertex(V v, CodeBlob& code) { + code.require_callxargs = true; + Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); + code.push_set_cur(try_catch_op.block0); + blk_fl::val res0 = process_vertex(v->get_try_body(), code); + code.close_pop_cur(v->get_try_body()->loc_end); + code.push_set_cur(try_catch_op.block1); + open_scope(v->get_catch_expr()->loc); + Expr* expr = process_expr(v->get_catch_expr(), code, true); + expr->chk_lvalue(); + TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); + try { + unify(expr->e_type, tvm_error_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`catch` arguments have incorrect type " << expr->e_type << ": " << ue; + v->get_catch_expr()->error(os.str()); + } + expr->predefine_vars(); + expr->define_new_vars(code); + try_catch_op.left = expr->pre_compile(code); + tolk_assert(try_catch_op.left.size() == 2 || try_catch_op.left.size() == 1); + blk_fl::val res1 = process_vertex(v->get_catch_body(), code); + close_scope(); + code.close_pop_cur(v->get_catch_body()->loc_end); + blk_fl::combine_parallel(res0, res1); + return res0; +} + +blk_fl::val process_vertex(V v, CodeBlob& code, TokenType first_lex = tok_if) { + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + v->get_cond()->error("condition value is not a singleton"); + } + Op& if_op = code.emplace_back(v->loc, Op::_If, tmp_vars); + code.push_set_cur(if_op.block0); + blk_fl::val res1 = process_vertex(v->get_if_body(), code); + blk_fl::val res2 = blk_fl::init; + code.close_pop_cur(v->get_if_body()->loc_end); + code.push_set_cur(if_op.block1); + res2 = process_vertex(v->get_else_body(), code); + code.close_pop_cur(v->get_else_body()->loc_end); + if (v->is_ifnot) { + std::swap(if_op.block0, if_op.block1); + } + blk_fl::combine_parallel(res1, res2); + return res1; +} + +blk_fl::val process_stmt(AnyV v, CodeBlob& code) { + switch (v->type) { + case ast_return_statement: + return process_vertex(v->as(), code); + case ast_sequence: + return process_vertex(v->as(), code); + case ast_empty: + return blk_fl::init; + case ast_repeat_statement: + return process_vertex(v->as(), code); + case ast_if_statement: + return process_vertex(v->as(), code); + case ast_do_until_statement: + return process_vertex(v->as(), code); + case ast_while_statement: + return process_vertex(v->as(), code); + case ast_try_catch_statement: + return process_vertex(v->as(), code); + default: { + auto expr = process_expr(v, code); + expr->chk_rvalue(); + expr->pre_compile(code); + return blk_fl::end; + } + } +} + +FormalArg process_vertex(V v, int fa_idx) { + if (v->arg_name.empty()) { + return std::make_tuple(v->arg_type, (SymDef*)nullptr, v->loc); + } + if (G.prohibited_var_names.count(static_cast(v->arg_name))) { + v->error("symbol `" + static_cast(v->arg_name) + "` cannot be redefined as a variable"); + } + SymDef* new_sym_def = define_symbol(calc_sym_idx(v->arg_name), true, v->loc); + if (!new_sym_def) { + v->error("cannot define symbol"); + } + if (new_sym_def->value) { + v->error("redefined argument"); + } + new_sym_def->value = new SymVal{SymValKind::_Param, fa_idx, v->arg_type}; + return std::make_tuple(v->arg_type, new_sym_def, v->loc); +} + +CodeBlob* process_vertex(V v_body, V arg_list, TypeExpr* ret_type, bool marked_as_pure) { + CodeBlob* blob = new CodeBlob{ret_type}; + if (marked_as_pure) { + blob->flags |= CodeBlob::_ForbidImpure; + } + FormalArgList legacy_arg_list; + for (int i = 0; i < arg_list->size(); ++i) { + legacy_arg_list.emplace_back(process_vertex(arg_list->get_arg(i), i)); + } + blob->import_params(std::move(legacy_arg_list)); + blk_fl::val res = blk_fl::init; + bool warned = false; + for (AnyV item : v_body->get_items()) { + if (!(res & blk_fl::end) && !warned) { + item->loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, process_stmt(item, *blob)); + } + if (res & blk_fl::end) { + append_implicit_ret_stmt(v_body, *blob); + } + blob->close_blk(v_body->loc_end); + return blob; +} + +SymValAsmFunc* process_vertex(V v_body, TypeExpr* func_type, V arg_list, TypeExpr* ret_type, + bool marked_as_pure) { + int cnt = arg_list->size(); + int width = ret_type->get_width(); + if (width < 0 || width > 16) { + v_body->error("return type of an assembler built-in function must have a well-defined fixed width"); + } + if (cnt > 16) { + v_body->error("assembler built-in function must have at most 16 arguments"); + } + std::vector cum_arg_width; + cum_arg_width.push_back(0); + int tot_width = 0; + for (int i = 0; i < cnt; ++i) { + V arg = arg_list->get_arg(i); + int arg_width = arg->arg_type->get_width(); + if (arg_width < 0 || arg_width > 16) { + arg->error("parameters of an assembler built-in function must have a well-defined fixed width"); + } + cum_arg_width.push_back(tot_width += arg_width); + } + std::vector asm_ops; + std::vector arg_order, ret_order; + if (!v_body->arg_order.empty()) { + if (static_cast(v_body->arg_order.size()) != cnt) { + v_body->error("arg_order of asm function must specify all arguments"); + } + std::vector visited(cnt, false); + for (int i = 0; i < cnt; ++i) { + int j = v_body->arg_order[i]; + if (visited[j]) { + v_body->error("arg_order of asm function contains duplicates"); + } + visited[j] = true; + int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; + while (c1 < c2) { + arg_order.push_back(c1++); + } + } + tolk_assert(arg_order.size() == (unsigned)tot_width); + } + if (!v_body->ret_order.empty()) { + if (static_cast(v_body->ret_order.size()) != width) { + v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width)); + } + std::vector visited(width, false); + for (int i = 0; i < width; ++i) { + int j = v_body->ret_order[i]; + if (j < 0 || j >= width || visited[j]) { + v_body->error("ret_order contains invalid integer, not in range 0 .. width-1"); + } + visited[j] = true; + } + ret_order = v_body->ret_order; + } + for (AnyV v_child : v_body->get_asm_commands()) { + std::string_view ops = v_child->as()->str_val; // \n\n... + std::string op; + for (const char& c : ops) { + if (c == '\n' || c == '\r') { + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + op.clear(); + } + } else { + op.push_back(c); + } + } + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + } + } + std::string crc_s; + for (const AsmOp& asm_op : asm_ops) { + crc_s += asm_op.op; + } + crc_s.push_back(!marked_as_pure); + for (const int& x : arg_order) { + crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); + } + for (const int& x : ret_order) { + crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); + } + auto res = new SymValAsmFunc{func_type, std::move(asm_ops), marked_as_pure}; + res->arg_order = std::move(arg_order); + res->ret_order = std::move(ret_order); + res->crc = td::crc64(crc_s); + return res; +} + +// if a function looks like `T f(...args) { return anotherF(...args); }`, +// set a bit to flags +// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)` +void detect_if_function_just_wraps_another(SymValCodeFunc* v_current, const td::RefInt256 &method_id) { + const std::string& function_name = v_current->code->name; + + // in "AST" representation, the first is Op::_Import (input arguments, even if none) + const auto& op_import = v_current->code->ops; + tolk_assert(op_import && op_import->cl == Op::_Import); + + // then Op::_Call (anotherF) + const Op* op_call = op_import->next.get(); + if (!op_call || op_call->cl != Op::_Call) + return; + tolk_assert(op_call->left.size() == 1); + + const auto& op_return = op_call->next; + if (!op_return || op_return->cl != Op::_Return || op_return->left.size() != 1) + return; + + bool indices_expected = op_import->left.size() == op_call->left[0] && op_call->left[0] == op_return->left[0]; + if (!indices_expected) + return; + + const SymDef* f_called = op_call->fun_ref; + const SymValFunc* v_called = dynamic_cast(f_called->value); + if (!v_called) + return; + + // `return` must use all arguments, e.g. `return (_0,_2,_1)`, not `return (_0,_1,_1)` + int args_used_mask = 0; + for (var_idx_t arg_idx : op_call->right) { + args_used_mask |= 1 << arg_idx; + } + if (args_used_mask != (1 << op_call->right.size()) - 1) + return; + + // detect getters (having method_id), they should not be treated as wrappers + // v_current->method_id will be assigned later; todo refactor function parsing completely, it's weird + // moreover, `recv_external()` and others are also exported, but FunC is unaware of method_id + // (it's assigned by Fift later) + // so, for now, just handle "special" function names, the same as in Asm.fif + if (!method_id.is_null()) + return; + if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" || + function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install") + return; + + // all types must be strictly defined (on mismatch, a compilation error will be triggered anyway) + if (v_called->sym_type->has_unknown_inside() || v_current->sym_type->has_unknown_inside()) + return; + // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome + if (v_current->get_arg_type()->get_width() != op_call->right.size()) + return; + // 'return true;' (false, nil) are (surprisingly) also function calls + if (f_called->name() == "true" || f_called->name() == "false" || f_called->name() == "nil") + return; + // if an original is marked `pure`, and this one doesn't, it's okay; just check for inline_ref storage + if (v_current->is_inline_ref()) + return; + + // ok, f_current is a wrapper + v_current->flags |= SymValFunc::flagWrapsAnotherF; + if (G.is_verbosity(2)) { + std::cerr << function_name << " -> " << f_called->name() << std::endl; + } +} + +static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { + unsigned int crc = td::crc16(static_cast(func_name)); + return td::make_refint((crc & 0xffff) | 0x10000); +} + +void process_vertex(V v_function) { + open_scope(v_function->loc); + std::vector type_vars; + if (v_function->forall_list) { + type_vars.reserve(v_function->forall_list->size()); + for (int idx = 0; idx < v_function->forall_list->size(); ++idx) { + type_vars.emplace_back(v_function->forall_list->get_item(idx)->created_type); + } + } + std::string func_name = v_function->name; + int func_sym_idx = calc_sym_idx(func_name); + int flags_inline = 0; + if (v_function->marked_as_inline) { + flags_inline = SymValFunc::flagInline; + } else if (v_function->marked_as_inline_ref) { + flags_inline = SymValFunc::flagInlineRef; + } + td::RefInt256 method_id; + if (v_function->method_id) { + method_id = td::string_to_int256(static_cast(v_function->method_id->int_val)); + if (method_id.is_null()) { + v_function->method_id->error("invalid integer constant"); + } + } else if (v_function->marked_as_get_method) { + method_id = calculate_method_id_by_func_name(func_name); + for (const SymDef* other : G.glob_get_methods) { + if (!td::cmp(dynamic_cast(other->value)->method_id, method_id)) { + v_function->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name + "` produce the same hash. Consider renaming one of these functions."); + } + } + } + TypeExpr* arg_list_type = nullptr; + if (int n_args = v_function->get_num_args()) { + std::vector arg_types; + arg_types.reserve(n_args); + for (int idx = 0; idx < n_args; ++idx) { + arg_types.emplace_back(v_function->get_arg(idx)->arg_type); + } + arg_list_type = TypeExpr::new_tensor(std::move(arg_types)); + } else { + arg_list_type = TypeExpr::new_unit(); + } + TypeExpr* func_type = TypeExpr::new_map(arg_list_type, v_function->ret_type); + if (!type_vars.empty()) { + func_type = TypeExpr::new_forall(std::move(type_vars), func_type); + } + if (v_function->marked_as_builtin) { + const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); + const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; + if (!func_val || !func_val->is_builtin()) { + v_function->error("`builtin` used for non-builtin function"); + } +#ifdef TOLK_DEBUG + // in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility + if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != v_function->marked_as_pure) { + v_function->error("declaration for `builtin` function doesn't match an actual one"); + } +#endif + close_scope(); + return; + } + if (G.is_verbosity(1)) { + std::cerr << "fun " << func_name << " : " << func_type << std::endl; + } + SymDef* func_sym = define_global_symbol(func_sym_idx, 0, v_function->loc); + tolk_assert(func_sym); + SymValFunc* func_sym_val = dynamic_cast(func_sym->value); + if (func_sym->value) { + // todo remove all about pre-declarations and prototypes + if (func_sym->value->kind != SymValKind::_Func || !func_sym_val) { + v_function->error("was not defined as a function before"); + } + try { + unify(func_sym_val->sym_type, func_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous type of function " << func_name << " : " << func_sym_val->sym_type + << " cannot be unified with new type " << func_type << ": " << ue; + v_function->error(os.str()); + } + } + if (v_function->get_body()->type == ast_empty) { + make_new_glob_func(func_sym, func_type, v_function->marked_as_pure); + } else if (const auto* v_seq = v_function->get_body()->try_as()) { + if (dynamic_cast(func_sym_val)) { + v_function->error("function `" + func_name + "` has been already defined as an assembler built-in"); + } + SymValCodeFunc* func_sym_code; + if (func_sym_val) { + func_sym_code = dynamic_cast(func_sym_val); + if (!func_sym_code) { + v_function->error("function `" + func_name + "` has been already defined in an yet-unknown way"); + } + } else { + func_sym_code = make_new_glob_func(func_sym, func_type, v_function->marked_as_pure); + } + if (func_sym_code->code) { + v_function->error("redefinition of function `" + func_name + "`"); + } + if (v_function->marked_as_pure && v_function->ret_type->get_width() == 0) { + v_function->error("a pure function should return something, otherwise it will be optimized out anyway"); + } + CodeBlob* code = process_vertex(v_seq, v_function->get_arg_list(), v_function->ret_type, v_function->marked_as_pure); + code->name = func_name; + code->loc = v_function->loc; + func_sym_code->code = code; + // todo it should be done not here, it should be on ast level, it should work when functions are declared swapped + detect_if_function_just_wraps_another(func_sym_code, method_id); + } else if (const auto* v_asm = v_function->get_body()->try_as()) { + SymValAsmFunc* asm_func = process_vertex(v_asm, func_type, v_function->get_arg_list(), v_function->ret_type, v_function->marked_as_pure); +#ifdef TOLK_DEBUG + asm_func->name = func_name; +#endif + if (func_sym_val) { + if (dynamic_cast(func_sym_val)) { + v_function->error("function `" + func_name + "` was already declared as an ordinary function"); + } + SymValAsmFunc* asm_func_old = dynamic_cast(func_sym_val); + if (asm_func_old) { + if (asm_func->crc != asm_func_old->crc) { + v_function->error("redefinition of built-in assembler function `" + func_name + "`"); + } + } else { + v_function->error("redefinition of previously (somehow) defined function `" + func_name + "`"); + } + } + func_sym->value = asm_func; + } + if (method_id.not_null()) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + v_function->error("cannot set method id for unknown function `" + func_name + "`"); + } + if (val->method_id.is_null()) { + val->method_id = std::move(method_id); + } else if (td::cmp(val->method_id, method_id) != 0) { + v_function->error("integer method identifier for `" + func_name + "` changed from " + + val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string()); + } + } + if (flags_inline) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + v_function->error("cannot set unknown function `" + func_name + "` as an inline"); + } + if (!val->is_inline() && !val->is_inline_ref()) { + val->flags |= flags_inline; + } else if ((val->flags & (SymValFunc::flagInline | SymValFunc::flagInlineRef)) != flags_inline) { + v_function->error("inline mode for `" + func_name + "` changed with respect to a previous declaration"); + } + } + if (v_function->marked_as_get_method) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + v_function->error("cannot set unknown function `" + func_name + "` as a get method"); + } + val->flags |= SymValFunc::flagGetMethod; + G.glob_get_methods.push_back(func_sym); + } + close_scope(); +} + +td::Result locate_source_file(const std::string& rel_filename) { + td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); + if (path.is_error()) { + return path.move_as_error(); + } + + std::string abs_filename = path.move_as_ok(); + if (SrcFile* file = G.all_src_files.find_file(abs_filename)) { + return file; // file was already parsed (imported from somewhere else) + } + + td::Result text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str()); + if (text.is_error()) { + return text.move_as_error(); + } + + return G.all_src_files.register_file(rel_filename, abs_filename, text.move_as_ok()); +} + +void process_vertex(V v) { + std::string_view pragma_name = v->pragma_name; + if (pragma_name == G.pragma_allow_post_modification.name()) { + G.pragma_allow_post_modification.enable(v->loc); + } else if (pragma_name == G.pragma_compute_asm_ltr.name()) { + G.pragma_compute_asm_ltr.enable(v->loc); + } else if (pragma_name == G.pragma_remove_unused_functions.name()) { + G.pragma_remove_unused_functions.enable(v->loc); + } else { + v->error("unknown pragma name"); + } +} + +void process_vertex(V v) { + char op = '='; bool eq = false; + TokenType cmp_tok = v->cmp_tok; + if (cmp_tok == tok_gt || cmp_tok == tok_geq) { + op = '>'; + eq = cmp_tok == tok_geq; + } else if (cmp_tok == tok_lt || cmp_tok == tok_leq) { + op = '<'; + eq = cmp_tok == tok_leq; + } else if (cmp_tok == tok_eq) { + op = '='; + } else if (cmp_tok == tok_bitwise_xor) { + op = '^'; + } else { + v->error("invalid comparison operator"); + } + std::string_view pragma_value = v->semver; + int sem_ver[3] = {0, 0, 0}; + char segs = 1; + auto stoi = [&](std::string_view s) { + auto R = td::to_integer_safe(static_cast(s)); + if (R.is_error()) { + v->error("invalid semver format"); + } + return R.move_as_ok(); + }; + std::istringstream iss_value(static_cast(pragma_value)); + for (int idx = 0; idx < 3; idx++) { + std::string s{"0"}; + std::getline(iss_value, s, '.'); + sem_ver[idx] = stoi(s); + } + // End reading semver from source code + int tolk_ver[3] = {0, 0, 0}; + std::istringstream iss(tolk_version); + for (int idx = 0; idx < 3; idx++) { + std::string s; + std::getline(iss, s, '.'); + tolk_ver[idx] = stoi(s); + } + // End parsing embedded semver + bool match = true; + switch (op) { + case '=': + if ((tolk_ver[0] != sem_ver[0]) || + (tolk_ver[1] != sem_ver[1]) || + (tolk_ver[2] != sem_ver[2])) { + match = false; + } + break; + case '>': + if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || + ((tolk_ver[0] < sem_ver[0])) ) { + match = false; + } + break; + case '<': + if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || + ((tolk_ver[0] > sem_ver[0])) ) { + match = false; + } + break; + case '^': + if ( ((segs == 3) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) + || ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) + || ((segs == 1) && ((tolk_ver[0] < sem_ver[0]))) ) { + match = false; + } + break; + default: + __builtin_unreachable(); + } + if (!match) { + v->error("Tolk version " + tolk_version + " does not satisfy this condition"); + } +} + +void process_vertex(V v, SrcFile* current_file) { + std::string rel_filename = static_cast(v->file_name); + if (size_t rc = current_file->rel_filename.rfind('/'); rc != std::string::npos) { + rel_filename = current_file->rel_filename.substr(0, rc + 1) + rel_filename; + } + + td::Result locate_res = locate_source_file(rel_filename); + if (locate_res.is_error()) { + v->error("Failed to import: " + locate_res.move_as_error().message().str()); + } + + SrcFile* imported_file = locate_res.move_as_ok(); + current_file->imports.emplace_back(SrcFile::ImportStatement{imported_file}); + if (!imported_file->was_parsed) { + // todo it's wrong, but ok for now + process_file_ast(parse_src_file_to_ast(imported_file)); + } +} + +void process_vertex(V v) { + AnyV init_value = v->get_init_value(); + SymDef* sym_def = define_global_symbol(calc_sym_idx(v->const_name), false, v->loc); + if (!sym_def) { + v->error("cannot define global symbol"); + } + if (sym_def->value) { + v->error("symbol already exists"); + } + CodeBlob code; + Expr* x = process_expr(init_value, code, false); + if (!x->is_rvalue()) { + v->get_init_value()->error("expression is not strictly Rvalue"); + } + if (v->declared_type && !v->declared_type->equals_to(x->e_type)) { + v->error("expression type does not match declared type"); + } + SymValConst* new_value = nullptr; + if (x->cls == Expr::_Const) { // Integer constant + new_value = new SymValConst{G.const_cnt++, x->intval}; + } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) + new_value = new SymValConst{G.const_cnt++, x->strval}; + } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) + code.emplace_back(v->loc, Op::_Import, std::vector()); + auto tmp_vars = x->pre_compile(code); + code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); + code.emplace_back(v->loc, Op::_Nop); + // It is REQUIRED to execute "optimizations" as in tolk.cpp + code.simplify_var_types(); + code.prune_unreachable_code(); + code.split_vars(true); + for (int i = 0; i < 16; i++) { + code.compute_used_code_vars(); + code.fwd_analyze(); + code.prune_unreachable_code(); + } + code.mark_noreturn(); + AsmOpList out_list(0, &code.vars); + code.generate_code(out_list); + if (out_list.list_.size() != 1) { + init_value->error("precompiled expression must result in single operation"); + } + auto op = out_list.list_[0]; + if (!op.is_const()) { + init_value->error("precompiled expression must result in compilation time constant"); + } + if (op.origin.is_null() || !op.origin->is_valid()) { + init_value->error("precompiled expression did not result in a valid integer constant"); + } + new_value = new SymValConst{G.const_cnt++, op.origin}; + } else { + init_value->error("integer or slice literal or constant expected"); + } + sym_def->value = new_value; +} + +void process_vertex(V v) { + TypeExpr* var_type = v->declared_type; + SymDef* sym_def = define_global_symbol(calc_sym_idx(v->var_name), false, v->loc); + if (!sym_def) { + v->error("cannot define global symbol"); + } + if (sym_def->value) { + auto val = dynamic_cast(sym_def->value); + if (!val) { + v->error("symbol cannot be redefined as a global variable"); + } + try { + unify(var_type, val->sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot unify new type " << var_type << " of global variable `" << sym_def->name() + << "` with its previous type " << val->sym_type << ": " << ue; + v->error(os.str()); + } + } else { + sym_def->value = new SymValGlobVar{G.glob_var_cnt++, var_type}; +#ifdef TOLK_DEBUG + dynamic_cast(sym_def->value)->name = v->var_name; +#endif + G.glob_vars.push_back(sym_def); + } +} + +class FileToLegacyVisitor final : public ASTVisitorToplevelDeclarations { + SrcFile* current_file; + + // todo inline here all these + void on_pragma_no_arg(V v) override { + process_vertex(v); + } + + void on_pragma_version(V v) override { + process_vertex(v); + } + + void on_include_statement(V v) override { + process_vertex(v, current_file); + } + + void on_function_declaration(V v) override { + process_vertex(v); + } + + void on_constant_declaration(V v) override { + process_vertex(v); + } + + void on_global_var_declaration(V v) override { + process_vertex(v); + } + +public: + explicit FileToLegacyVisitor(SrcFile* file) : current_file(file) { + } +}; + +void process_file_ast(AnyV file_ast) { + auto v = file_ast->try_as(); + if (!v) { + throw UnexpectedASTNodeType(file_ast, "process_file_ast"); + } + + const SrcFile* file = v->file; + if (!file->is_stdlib_file()) { + // v->debug_print(); + G.generated_from += file->rel_filename; + G.generated_from += ", "; + } + + FileToLegacyVisitor(const_cast(file)).start_visiting_file(v); +} + +} // namespace tolk diff --git a/tolk/ast-to-legacy.h b/tolk/ast-to-legacy.h new file mode 100644 index 00000000..f7660f20 --- /dev/null +++ b/tolk/ast-to-legacy.h @@ -0,0 +1,28 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" + +namespace tolk { + +struct SrcFile; + +void process_file_ast(AnyV file_ast); + +} // namespace tolk + diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h new file mode 100644 index 00000000..237a79f1 --- /dev/null +++ b/tolk/ast-visitor.h @@ -0,0 +1,199 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" +#include "platform-utils.h" + +/* + * A module implementing base functionality of read-only traversing a vertex tree. + * Since a vertex in general doesn't store a vector of children, iterating is possible only for concrete node_type. + * E.g., for ast_if_statement, visit nodes cond, if-body and else-body. For ast_string_const, nothing. And so on. + * Visitors below are helpers to inherit from and handle specific vertex types. + * + * Note, that absence of "children" in ASTNodeBase is not a drawback. Instead, it encourages you to think + * about types and match the type system. + * + * The visitor is read-only, it does not modify visited nodes (except if you purposely call mutating methods). + * For example, if you want to replace "beginCell()" call with "begin_cell", a visitor isn't enough for you. + * To replace vertices, consider another API: ast-replacer.h. + */ + +namespace tolk { + +class ASTVisitor { +protected: + GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTNodeLeaf* v) { + static_cast(v); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeUnary* v) { + visit(v->child); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeBinary* v) { + visit(v->lhs); + visit(v->rhs); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeVararg* v) { + for (AnyV child : v->children) { + visit(child); + } + } + + virtual void visit(AnyV v) = 0; + +public: + virtual ~ASTVisitor() = default; +}; + +class ASTVisitorFunctionBody : public ASTVisitor { +protected: + using parent = ASTVisitorFunctionBody; + + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + + void visit(AnyV v) final { + switch (v->type) { + case ast_empty: return visit(v->as()); + case ast_identifier: return visit(v->as()); + case ast_int_const: return visit(v->as()); + case ast_string_const: return visit(v->as()); + case ast_bool_const: return visit(v->as()); + case ast_nil_tuple: return visit(v->as()); + case ast_function_call: return visit(v->as()); + case ast_parenthesized_expr: return visit(v->as()); + case ast_underscore: return visit(v->as()); + case ast_type_expression: return visit(v->as()); + case ast_variable_declaration: return visit(v->as()); + case ast_tensor: return visit(v->as()); + case ast_tensor_square: return visit(v->as()); + case ast_dot_tilde_call: return visit(v->as()); + case ast_unary_operator: return visit(v->as()); + case ast_binary_operator: return visit(v->as()); + case ast_ternary_operator: return visit(v->as()); + case ast_return_statement: return visit(v->as()); + case ast_sequence: return visit(v->as()); + case ast_repeat_statement: return visit(v->as()); + case ast_while_statement: return visit(v->as()); + case ast_do_until_statement: return visit(v->as()); + case ast_try_catch_statement: return visit(v->as()); + case ast_if_statement: return visit(v->as()); + case ast_asm_body: return visit(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTVisitorFunctionBody::visit"); + } + } + +public: + void start_visiting_function(V v_function) { + visit(v_function->get_body()); + } +}; + +class ASTVisitorAllFunctionsInFile : public ASTVisitorFunctionBody { +protected: + using parent = ASTVisitorAllFunctionsInFile; + + virtual bool should_enter_function(V v) = 0; + +public: + void start_visiting_file(V v_file) { + for (AnyV v : v_file->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + if (should_enter_function(v_func)) { + visit(v_func->get_body()); + } + } + } + } +}; + +class ASTVisitorToplevelDeclarations : public ASTVisitor { +protected: + using parent = ASTVisitorToplevelDeclarations; + + virtual void on_pragma_no_arg(V v) = 0; + virtual void on_pragma_version(V v) = 0; + virtual void on_include_statement(V v) = 0; + virtual void on_constant_declaration(V v) = 0; + virtual void on_global_var_declaration(V v) = 0; + virtual void on_function_declaration(V v) = 0; + + void visit(AnyV v) final { + switch (v->type) { + case ast_pragma_no_arg: + on_pragma_no_arg(v->as()); + break; + case ast_pragma_version: + on_pragma_version(v->as()); + break; + case ast_include_statement: + on_include_statement(v->as()); + break; + case ast_constant_declaration_list: + for (const auto& v_decl : v->as()->get_declarations()) { + on_constant_declaration(v_decl->as()); + } + break; + case ast_global_var_declaration_list: + for (const auto& v_decl : v->as()->get_declarations()) { + on_global_var_declaration(v_decl->as()); + } + break; + case ast_function_declaration: + on_function_declaration(v->as()); + break; + default: + throw UnexpectedASTNodeType(v, "ASTVisitorToplevelDeclarations::visit"); + } + } + +public: + void start_visiting_file(V v_file) { + for (AnyV v : v_file->get_toplevel_declarations()) { + visit(v); + } + } +}; + +} // namespace tolk diff --git a/tolk/ast.cpp b/tolk/ast.cpp new file mode 100644 index 00000000..ec060c58 --- /dev/null +++ b/tolk/ast.cpp @@ -0,0 +1,70 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "ast.h" +#include "ast-stringifier.h" +#include + +namespace tolk { + +static_assert(sizeof(ASTNodeBase) == 12); + +#ifdef TOLK_DEBUG + +std::string ASTNodeBase::to_debug_string(bool colored) const { + ASTStringifier s(colored); + return s.to_string_with_children(this); +} + +void ASTNodeBase::debug_print() const { + std::cerr << to_debug_string(true) << std::endl; +} + +#endif // TOLK_DEBUG + +UnexpectedASTNodeType::UnexpectedASTNodeType(AnyV v_unexpected, const char* place_where): v_unexpected(v_unexpected) { + message = "Unexpected ASTNodeType "; +#ifdef TOLK_DEBUG + message += ASTStringifier::ast_node_type_to_string(v_unexpected->type); + message += " "; +#endif + message += "in "; + message += place_where; +} + +void ASTNodeBase::error(const std::string& err_msg) const { + throw ParseError(loc, err_msg); +} + +int Vertex::lookup_idx(std::string_view nameT) const { + for (size_t idx = 0; idx < children.size(); ++idx) { + if (children[idx] && children[idx]->as()->nameT == nameT) { + return static_cast(idx); + } + } + return -1; +} + +int Vertex::lookup_idx(std::string_view arg_name) const { + for (size_t idx = 0; idx < children.size(); ++idx) { + if (children[idx] && children[idx]->as()->arg_name == arg_name) { + return static_cast(idx); + } + } + return -1; +} + +} // namespace tolk diff --git a/tolk/ast.h b/tolk/ast.h new file mode 100644 index 00000000..c3fe1394 --- /dev/null +++ b/tolk/ast.h @@ -0,0 +1,567 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include +#include "platform-utils.h" +#include "src-file.h" +#include "type-expr.h" +#include "lexer.h" + +/* + * Here we introduce AST representation of Tolk source code. + * Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on. + * There was no way to perform any more or less semantic analysis. + * In Tolk, I've implemented parsing .tolk files into AST at first, and then converting this AST + * into legacy representation (see ast-to-legacy.cpp). + * In the future, more and more code analysis will be moved out of legacy to AST-level. + * + * From the user's point of view, all AST vertices are constant. All API is based on constancy. + * Even though fields of vertex structs are public, they can't be modified, since vertices are accepted by const ref. + * Generally, there are two ways of accepting a vertex: + * * AnyV (= const ASTNodeBase*) + * the only you can do with this vertex is to see v->type (ASTNodeType) and to cast via v->as() + * * V (= const Vertex*) + * a specific type of vertex, you can use its fields and methods + * There is one way of creating a vertex: + * * createV(...constructor_args) (= new Vertex(...)) + * vertices are currently created on a heap, without any custom memory arena, just allocated and never deleted + * + * Having AnyV and knowing its node_type, a call + * v->as() + * will return a typed vertex. + * There is also a shorthand v->try_as() which returns V or nullptr if types don't match: + * if (auto v_int = v->try_as()) + * Note, that there casts are NOT DYNAMIC. ASTNode is not a virtual base, it has no vtable. + * So, as<...>() is just a compile-time casting, without any runtime overhead. + * + * Note, that ASTNodeBase doesn't store any vector of children. That's why there is no way to loop over + * a random (unknown) vertex. Only a concrete Vertex stores its children (if any). + * Hence, to iterate over a custom vertex (e.g., a function body), one should inherit some kind of ASTVisitor. + * Besides read-only visiting, there is a "visit and replace" pattern. + * See ast-visitor.h and ast-replacer.h. + */ + +namespace tolk { + +enum ASTNodeType { + ast_empty, + ast_identifier, + ast_int_const, + ast_string_const, + ast_bool_const, + ast_nil_tuple, + ast_function_call, + ast_parenthesized_expr, + ast_global_var_declaration, + ast_global_var_declaration_list, + ast_constant_declaration, + ast_constant_declaration_list, + ast_underscore, + ast_type_expression, + ast_variable_declaration, + ast_tensor, + ast_tensor_square, + ast_dot_tilde_call, + ast_unary_operator, + ast_binary_operator, + ast_ternary_operator, + ast_return_statement, + ast_sequence, + ast_repeat_statement, + ast_while_statement, + ast_do_until_statement, + ast_try_catch_statement, + ast_if_statement, + ast_forall_item, + ast_forall_list, + ast_argument, + ast_argument_list, + ast_asm_body, + ast_function_declaration, + ast_pragma_no_arg, + ast_pragma_version, + ast_include_statement, + ast_tolk_file, +}; + +struct ASTNodeBase; + +using AnyV = const ASTNodeBase*; + +template +struct Vertex; + +template +using V = const Vertex*; + +#define createV new Vertex + +struct UnexpectedASTNodeType final : std::exception { + AnyV v_unexpected; + std::string message; + + explicit UnexpectedASTNodeType(AnyV v_unexpected, const char* place_where); + + const char* what() const noexcept override { + return message.c_str(); + } +}; + +// --------------------------------------------------------- + +struct ASTNodeBase { + const ASTNodeType type; + const SrcLocation loc; + + ASTNodeBase(ASTNodeType type, SrcLocation loc) : type(type), loc(loc) {} + + template + V as() const { +#ifdef TOLK_DEBUG + if (type != node_type) { + throw Fatal("v->as<...> to wrong node_type"); + } +#endif + return static_cast>(this); + } + + template + V try_as() const { + return type == node_type ? static_cast>(this) : nullptr; + } + + #ifdef TOLK_DEBUG + std::string to_debug_string() const { return to_debug_string(false); } + std::string to_debug_string(bool colored) const; + void debug_print() const; +#endif + + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD + void error(const std::string& err_msg) const; +}; + +struct ASTNodeLeaf : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + ASTNodeLeaf(ASTNodeType type, SrcLocation loc) + : ASTNodeBase(type, loc) {} +}; + +struct ASTNodeUnary : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyV child; + + ASTNodeUnary(ASTNodeType type, SrcLocation loc, AnyV child) + : ASTNodeBase(type, loc), child(child) {} +}; + +struct ASTNodeBinary : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyV lhs; + AnyV rhs; + + ASTNodeBinary(ASTNodeType type, SrcLocation loc, AnyV lhs, AnyV rhs) + : ASTNodeBase(type, loc), lhs(lhs), rhs(rhs) {} +}; + +struct ASTNodeVararg : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + std::vector children; + + ASTNodeVararg(ASTNodeType type, SrcLocation loc, std::vector children) + : ASTNodeBase(type, loc), children(std::move(children)) {} + +public: + int size() const { return static_cast(children.size()); } + bool empty() const { return children.empty(); } +}; + +// --------------------------------------------------------- + +template<> +struct Vertex final : ASTNodeLeaf { + Vertex() + : ASTNodeLeaf(ast_empty, SrcLocation()) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view name; + + Vertex(SrcLocation loc, std::string_view name) + : ASTNodeLeaf(ast_identifier, loc), name(name) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view int_val; + + Vertex(SrcLocation loc, std::string_view int_val) + : ASTNodeLeaf(ast_int_const, loc), int_val(int_val) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view str_val; + char modifier; + + Vertex(SrcLocation loc, std::string_view str_val, char modifier) + : ASTNodeLeaf(ast_string_const, loc), str_val(str_val), modifier(modifier) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + bool bool_val; + + Vertex(SrcLocation loc, bool bool_val) + : ASTNodeLeaf(ast_bool_const, loc), bool_val(bool_val) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_nil_tuple, loc) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + // even for f(1,2,3), f (lhs) is called with a single arg (tensor "(1,2,3)") (rhs) + AnyV get_called_f() const { return lhs; } + AnyV get_called_arg() const { return rhs; } + + Vertex(SrcLocation loc, AnyV lhs_f, AnyV arg) + : ASTNodeBinary(ast_function_call, loc, lhs_f, arg) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + AnyV get_expr() const { return child; } + + Vertex(SrcLocation loc, AnyV expr) + : ASTNodeUnary(ast_parenthesized_expr, loc, expr) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view var_name; + TypeExpr* declared_type; // may be nullptr + + Vertex(SrcLocation loc, std::string_view var_name, TypeExpr* declared_type) + : ASTNodeLeaf(ast_global_var_declaration, loc), var_name(var_name), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_declarations() const { return children; } + + Vertex(SrcLocation loc, std::vector declarations) + : ASTNodeVararg(ast_global_var_declaration_list, loc, std::move(declarations)) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + std::string_view const_name; + TypeExpr* declared_type; // may be nullptr + + AnyV get_init_value() const { return child; } + + Vertex(SrcLocation loc, std::string_view const_name, TypeExpr* declared_type, AnyV init_value) + : ASTNodeUnary(ast_constant_declaration, loc, init_value), const_name(const_name), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_declarations() const { return children; } + + Vertex(SrcLocation loc, std::vector declarations) + : ASTNodeVararg(ast_constant_declaration_list, loc, std::move(declarations)) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_underscore, loc) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + TypeExpr* declared_type; + + Vertex(SrcLocation loc, TypeExpr* declared_type) + : ASTNodeLeaf(ast_type_expression, loc), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + TypeExpr* declared_type; + + AnyV get_variable_or_list() const { return child; } // identifier, tuple, tensor + + Vertex(SrcLocation loc, TypeExpr* declared_type, AnyV dest) + : ASTNodeUnary(ast_variable_declaration, loc, dest), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTNodeVararg(ast_tensor, loc, std::move(items)) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTNodeVararg(ast_tensor_square, loc, std::move(items)) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + std::string_view method_name; // starts with . or ~ + + AnyV get_lhs() const { return lhs; } + AnyV get_arg() const { return rhs; } + + Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, AnyV rhs) + : ASTNodeBinary(ast_dot_tilde_call, loc, lhs, rhs), method_name(method_name) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + std::string_view operator_name; + TokenType tok; + + AnyV get_rhs() const { return child; } + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV rhs) + : ASTNodeUnary(ast_unary_operator, loc, rhs), operator_name(operator_name), tok(tok) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + std::string_view operator_name; + TokenType tok; + + AnyV get_lhs() const { return lhs; } + AnyV get_rhs() const { return rhs; } + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV lhs, AnyV rhs) + : ASTNodeBinary(ast_binary_operator, loc, lhs, rhs), operator_name(operator_name), tok(tok) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + AnyV get_cond() const { return children.at(0); } + AnyV get_when_true() const { return children.at(1); } + AnyV get_when_false() const { return children.at(2); } + + Vertex(SrcLocation loc, AnyV cond, AnyV when_true, AnyV when_false) + : ASTNodeVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} +}; + +template<> +struct Vertex : ASTNodeUnary { + AnyV get_return_value() const { return child; } + + Vertex(SrcLocation loc, AnyV child) + : ASTNodeUnary(ast_return_statement, loc, child) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + SrcLocation loc_end; + + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, SrcLocation loc_end, std::vector items) + : ASTNodeVararg(ast_sequence, loc, std::move(items)), loc_end(loc_end) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_cond() const { return lhs; } + auto get_body() const { return rhs->as(); } + + Vertex(SrcLocation loc, AnyV cond, V body) + : ASTNodeBinary(ast_repeat_statement, loc, cond, body) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_cond() const { return lhs; } + auto get_body() const { return rhs->as(); } + + Vertex(SrcLocation loc, AnyV cond, V body) + : ASTNodeBinary(ast_while_statement, loc, cond, body) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + auto get_body() const { return lhs->as(); } + AnyV get_cond() const { return rhs; } + + Vertex(SrcLocation loc, V body, AnyV cond) + : ASTNodeBinary(ast_do_until_statement, loc, body, cond) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + auto get_try_body() const { return children.at(0)->as(); } + AnyV get_catch_expr() const { return children.at(1); } // it's a tensor + auto get_catch_body() const { return children.at(2)->as(); } + + Vertex(SrcLocation loc, V try_body, AnyV catch_expr, V catch_body) + : ASTNodeVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + bool is_ifnot; + + AnyV get_cond() const { return children.at(0); } + auto get_if_body() const { return children.at(1)->as(); } + auto get_else_body() const { return children.at(2)->as(); } // always exists (when else omitted, it's empty) + + Vertex(SrcLocation loc, bool is_ifnot, AnyV cond, V if_body, V else_body) + : ASTNodeVararg(ast_if_statement, loc, {cond, if_body, else_body}), is_ifnot(is_ifnot) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates + std::string nameT; + + Vertex(SrcLocation loc, TypeExpr* created_type, std::string nameT) + : ASTNodeLeaf(ast_forall_item, loc), created_type(created_type), nameT(std::move(nameT)) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + std::vector get_items() const { return children; } + auto get_item(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector forall_items) + : ASTNodeVararg(ast_forall_list, loc, std::move(forall_items)) {} + + int lookup_idx(std::string_view nameT) const; +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view arg_name; + TypeExpr* arg_type; + + Vertex(SrcLocation loc, std::string_view arg_name, TypeExpr* arg_type) + : ASTNodeLeaf(ast_argument, loc), arg_name(arg_name), arg_type(arg_type) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + std::vector arg_order; + std::vector ret_order; + + const std::vector& get_asm_commands() const { return children; } // ast_string_const[] + + Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) + : ASTNodeVararg(ast_asm_body, loc, std::move(asm_commands)), arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_args() const { return children; } + auto get_arg(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector args) + : ASTNodeVararg(ast_argument_list, loc, std::move(args)) {} + + int lookup_idx(std::string_view arg_name) const; +}; + +template<> +struct Vertex final : ASTNodeBinary { + int get_num_args() const { return lhs->as()->size(); } + auto get_arg_list() const { return lhs->as(); } + auto get_arg(int i) const { return lhs->as()->get_arg(i); } + AnyV get_body() const { return rhs; } // ast_sequence / ast_asm_body / ast_empty + + std::string name; + TypeExpr* ret_type = nullptr; + V forall_list = nullptr; + bool marked_as_pure = false; + bool marked_as_builtin = false; + bool marked_as_get_method = false; + bool marked_as_inline = false; + bool marked_as_inline_ref = false; + V method_id = nullptr; + + Vertex(SrcLocation loc, std::string name, V args, AnyV body) + : ASTNodeBinary(ast_function_declaration, loc, args, body), name(std::move(name)) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view pragma_name; + + Vertex(SrcLocation loc, std::string_view pragma_name) + : ASTNodeLeaf(ast_pragma_no_arg, loc), pragma_name(pragma_name) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + TokenType cmp_tok; + std::string_view semver; + + Vertex(SrcLocation loc, TokenType cmp_tok, std::string_view semver) + : ASTNodeLeaf(ast_pragma_version, loc), cmp_tok(cmp_tok), semver(semver) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view file_name; + + Vertex(SrcLocation loc, std::string_view file_name) + : ASTNodeLeaf(ast_include_statement, loc), file_name(file_name) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const SrcFile* const file; + + const std::vector& get_toplevel_declarations() const { return children; } + + Vertex(const SrcFile* file, std::vector toplevel_declarations) + : ASTNodeVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)), file(file) {} +}; + +} // namespace tolk diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 439228f4..ddcb2630 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -72,22 +72,6 @@ SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const return define_builtin_func_impl(name, new SymValAsmFunc{func_type, make_simple_compile(macro), arg_order, ret_order, !impure}); } -SymDef* force_autoapply(SymDef* def) { - if (def) { - auto val = dynamic_cast(def->value); - if (val) { - val->auto_apply = true; - } - } - return def; -} - -template -SymDef* define_builtin_const(std::string name, TypeExpr* const_type, Args&&... args) { - return force_autoapply( - define_builtin_func(name, TypeExpr::new_map(TypeExpr::new_unit(), const_type), std::forward(args)...)); -} - bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const { if (simple_compile) { @@ -1219,11 +1203,10 @@ void define_builtins() { define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6)); define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3)); define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7)); - define_builtin_const("true", Int, /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true)); - define_builtin_const("false", Int, /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false)); + define_builtin_func("true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true)); + define_builtin_func("false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false)); // define_builtin_func("null", Null, AsmOp::Const("PUSHNULL")); - define_builtin_const("nil", Tuple, AsmOp::Const("PUSHNULL")); - define_builtin_const("Nil", Tuple, AsmOp::Const("NIL")); + define_builtin_func("nil", TypeExpr::new_map(TypeExpr::new_unit(), Tuple), AsmOp::Const("PUSHNULL")); define_builtin_func("null?", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null); define_builtin_func("throw", impure_un_op, compile_throw, true); define_builtin_func("throw_if", impure_bin_op, std::bind(compile_cond_throw, _1, _2, true), true); diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp index ed937416..b95b434a 100644 --- a/tolk/gen-abscode.cpp +++ b/tolk/gen-abscode.cpp @@ -41,19 +41,7 @@ Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist) } } -void Expr::chk_rvalue(const Lexer& lex) const { - if (!is_rvalue()) { - lex.error_at("rvalue expected before `", "`"); - } -} - -void Expr::chk_lvalue(const Lexer& lex) const { - if (!is_lvalue()) { - lex.error_at("lvalue expected before `", "`"); - } -} - -bool Expr::deduce_type(const Lexer& lex) { +bool Expr::deduce_type() { if (e_type) { return true; } @@ -77,7 +65,7 @@ bool Expr::deduce_type(const Lexer& lex) { std::ostringstream os; os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " << fun_type->args[0] << ": " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = fun_type->args[1]; TypeExpr::remove_indirect(e_type); @@ -92,7 +80,7 @@ bool Expr::deduce_type(const Lexer& lex) { std::ostringstream os; os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type << ": " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = fun_type->args[1]; TypeExpr::remove_indirect(e_type); @@ -107,7 +95,7 @@ bool Expr::deduce_type(const Lexer& lex) { std::ostringstream os; os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " << args[0]->e_type << ": " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = args[0]->e_type; TypeExpr::remove_indirect(e_type); @@ -124,7 +112,7 @@ bool Expr::deduce_type(const Lexer& lex) { os << "cannot implicitly assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " << rhs_type << " in modifying method `" << G.symbols.get_name(val) << "` : " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = rhs_type->args[1]; TypeExpr::remove_indirect(e_type); @@ -139,7 +127,7 @@ bool Expr::deduce_type(const Lexer& lex) { } catch (UnifyError& ue) { std::ostringstream os; os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } try { unify(args[1]->e_type, args[2]->e_type); @@ -147,7 +135,7 @@ bool Expr::deduce_type(const Lexer& lex) { std::ostringstream os; os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " << args[2]->e_type << " : " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = args[1]->e_type; TypeExpr::remove_indirect(e_type); @@ -170,13 +158,13 @@ int Expr::define_new_vars(CodeBlob& code) { } case _Var: if (val < 0) { - val = code.create_var(TmpVar::_Named, e_type, sym, here); + val = code.create_var(false, e_type, sym, here); return 1; } break; case _Hole: if (val < 0) { - val = code.create_var(TmpVar::_Tmp, e_type, nullptr, here); + val = code.create_var(true, e_type, nullptr, here); } break; } @@ -279,7 +267,7 @@ std::vector pre_compile_tensor(const std::vector& args, CodeB res_lists[i] = args[i]->pre_compile(code, lval_globs); for (size_t j = 0; j < res_lists[i].size(); ++j) { TmpVar& var = code.vars.at(res_lists[i][j]); - if (!lval_globs && (var.cls & TmpVar::_Named)) { + if (!lval_globs && !var.is_tmp_unnamed) { var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable { if (!done) { done = true; diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 1a5fe569..a9682e52 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -361,19 +361,21 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (str == "asm") return tok_asm; if (str == "get") return tok_get; if (str == "try") return tok_try; + if (str == "nil") return tok_nil; break; case 4: if (str == "else") return tok_else; + if (str == "true") return tok_true; if (str == "pure") return tok_pure; if (str == "then") return tok_then; if (str == "cell") return tok_cell; if (str == "cont") return tok_cont; - if (str == "type") return tok_type; // todo unused token? break; case 5: if (str == "slice") return tok_slice; if (str == "tuple") return tok_tuple; if (str == "const") return tok_const; + if (str == "false") return tok_false; if (str == "while") return tok_while; if (str == "until") return tok_until; if (str == "catch") return tok_catch; @@ -427,7 +429,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (TokenType kw_tok = maybe_keyword(str_val)) { lex->add_token(kw_tok, str_val); } else { - G.symbols.lookup_add(static_cast(str_val)); + G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); } return true; @@ -453,7 +455,7 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); lex->skip_chars(1); - G.symbols.lookup_add(static_cast(str_val)); + G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); return true; } @@ -610,21 +612,12 @@ void Lexer::next_special(TokenType parse_next_as, const char* str_expected) { cur_token = tokens_circularbuf[++cur_token_idx & 7]; } -int Lexer::cur_sym_idx() const { - assert(tok() == tok_identifier); - return G.symbols.lookup_add(cur_str_std_string()); -} - void Lexer::error(const std::string& err_msg) const { throw ParseError(cur_location(), err_msg); } -void Lexer::error_at(const std::string& prefix, const std::string& suffix) const { - throw ParseError(cur_location(), prefix + cur_str_std_string() + suffix); -} - void Lexer::on_expect_call_failed(const char* str_expected) const { - throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + cur_str_std_string() + "`"); + throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + std::string(cur_str()) + "`"); } void lexer_init() { diff --git a/tolk/lexer.h b/tolk/lexer.h index 04fc025d..b24efa9d 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -31,6 +31,10 @@ enum TokenType { tok_identifier, + tok_true, + tok_false, + tok_nil, // todo "null" keyword is still absent, "nil" in FunC is an empty tuple + tok_plus, tok_minus, tok_mul, @@ -108,7 +112,6 @@ enum TokenType { tok_builder, tok_cont, tok_tuple, - tok_type, tok_mapsto, tok_forall, @@ -206,10 +209,8 @@ public: TokenType tok() const { return cur_token.type; } std::string_view cur_str() const { return cur_token.str_val; } - std::string cur_str_std_string() const { return static_cast(cur_token.str_val); } SrcLocation cur_location() const { return location; } const SrcFile* cur_file() const { return file; } - int cur_sym_idx() const; void next(); void next_special(TokenType parse_next_as, const char* str_expected); @@ -228,8 +229,6 @@ public: GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD void error(const std::string& err_msg) const; - GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD - void error_at(const std::string& prefix, const std::string& suffix) const; }; void lexer_init(); diff --git a/tolk/parse-tolk.cpp b/tolk/parse-tolk.cpp deleted file mode 100644 index 0b41152d..00000000 --- a/tolk/parse-tolk.cpp +++ /dev/null @@ -1,1983 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" -#include "platform-utils.h" -#include "compiler-state.h" -#include "td/utils/crypto.h" -#include "common/refint.h" -#include "openssl/digest.hpp" -#include "block/block.h" -#include "block-parse.h" - -namespace tolk { -using namespace std::literals::string_literals; - -inline bool is_dot_ident(sym_idx_t idx) { - return G.symbols.get_subclass(idx) == SymbolSubclass::dot_identifier; -} - -inline bool is_tilde_ident(sym_idx_t idx) { - return G.symbols.get_subclass(idx) == SymbolSubclass::tilde_identifier; -} - -inline bool is_special_ident(sym_idx_t idx) { - return G.symbols.get_subclass(idx) != SymbolSubclass::undef; -} - -// given Expr::_Apply (a function call / a variable call), determine whether it's <, or >, or similar -// (an expression `1 < 2` is expressed as `_<_(1,2)`, see builtins.cpp) -static bool is_comparison_binary_op(const Expr* e_apply) { - const std::string& name = e_apply->sym->name(); - const size_t len = name.size(); - if (len < 3 || len > 5 || name[0] != '_' || name[len-1] != '_') { - return false; // not "_<_" and similar - } - - char c1 = name[1]; - char c2 = name[2]; - // < > <= != == >= <=> - return (len == 3 && (c1 == '<' || c1 == '>')) || - (len == 4 && (c1 == '<' || c1 == '>' || c1 == '!' || c1 == '=') && c2 == '=') || - (len == 5 && (c1 == '<' && c2 == '=' && name[3] == '>')); -} - -// same as above, but to detect bitwise operators: & | ^ -// (in Tolk, they are used as logical ones due to absence of a boolean type and && || operators) -static bool is_bitwise_binary_op(const Expr* e_apply) { - const std::string& name = e_apply->sym->name(); - const size_t len = name.size(); - if (len != 3 || name[0] != '_' || name[len-1] != '_') { - return false; - } - - char c1 = name[1]; - return c1 == '&' || c1 == '|' || c1 == '^'; -} - -// same as above, but to detect addition/subtraction -static bool is_add_or_sub_binary_op(const Expr* e_apply) { - const std::string& name = e_apply->sym->name(); - const size_t len = name.size(); - if (len != 3 || name[0] != '_' || name[len-1] != '_') { - return false; - } - - char c1 = name[1]; - return c1 == '+' || c1 == '-'; -} - -static inline std::string get_builtin_operator_name(sym_idx_t sym_builtin) { - std::string underscored = G.symbols.get_name(sym_builtin); - return underscored.substr(1, underscored.size() - 2); -} - -// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected) -// it would better be a warning, but we decided to make it a strict error -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_lower_precedence(SrcLocation loc, sym_idx_t op_lower, sym_idx_t op_higher) { - std::string name_lower = get_builtin_operator_name(op_lower); - std::string name_higher = get_builtin_operator_name(op_higher); - throw ParseError(loc, name_lower + " has lower precedence than " + name_higher + - ", probably this code won't work as you expected. " - "Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error."); -} - -// fire an error for a case "arg1 & arg2 | arg3" -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_mix_bitwise_and_or(SrcLocation loc, sym_idx_t op1, sym_idx_t op2) { - std::string name1 = get_builtin_operator_name(op1); - std::string name2 = get_builtin_operator_name(op2); - throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis" - ", probably this code won't work as you expected. " - "Use parenthesis to emphasize operator precedence."); -} - -// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence -// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected -// the only way to suppress this error for the programmer is to use parenthesis -static void diagnose_bitwise_precedence(SrcLocation loc, sym_idx_t bitwise_sym, const Expr* lhs, const Expr* rhs) { - // handle "0 != flags & 0xFF" (lhs = "0 != flags") - if (!lhs->is_inside_parenthesis() && - lhs->cls == Expr::_Apply && lhs->e_type->is_int() && // fast false if 100% not - is_comparison_binary_op(lhs)) { - fire_error_lower_precedence(loc, bitwise_sym, lhs->sym->sym_idx); - // there is a tiny bug: "flags & _!=_(0xFF,0)" will also suggest to wrap rhs into parenthesis - } - - // handle "flags & 0xFF != 0" (rhs = "0xFF != 0") - if (!rhs->is_inside_parenthesis() && - rhs->cls == Expr::_Apply && rhs->e_type->is_int() && - is_comparison_binary_op(rhs)) { - fire_error_lower_precedence(loc, bitwise_sym, rhs->sym->sym_idx); - } - - // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") - if (!lhs->is_inside_parenthesis() && - lhs->cls == Expr::_Apply && lhs->e_type->is_int() && - is_bitwise_binary_op(lhs) && - lhs->sym->sym_idx != bitwise_sym) { - fire_error_mix_bitwise_and_or(loc, lhs->sym->sym_idx, bitwise_sym); - } -} - -// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) -static void diagnose_addition_in_bitshift(SrcLocation loc, sym_idx_t bitshift_sym, const Expr* rhs) { - if (!rhs->is_inside_parenthesis() && - rhs->cls == Expr::_Apply && rhs->e_type->is_int() && - is_add_or_sub_binary_op(rhs)) { - fire_error_lower_precedence(loc, bitshift_sym, rhs->sym->sym_idx); - } -} - -/* - * - * PARSE SOURCE - * - */ - -// TE ::= TA | TA -> TE -// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] -TypeExpr* parse_type(Lexer& lex); - -TypeExpr* parse_type1(Lexer& lex) { - switch (lex.tok()) { - case tok_int: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Int); - case tok_cell: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Cell); - case tok_slice: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Slice); - case tok_builder: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Builder); - case tok_cont: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Cont); - case tok_tuple: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Tuple); - case tok_var: - case tok_underscore: - lex.next(); - return TypeExpr::new_hole(); - case tok_identifier: { - auto sym = lookup_symbol(lex.cur_sym_idx()); - if (sym && dynamic_cast(sym->value)) { - auto val = dynamic_cast(sym->value); - lex.next(); - return val->get_type(); - } - lex.error_at("`", "` is not a type identifier"); - } - default: - break; - } - TokenType c; - if (lex.tok() == tok_opbracket) { - lex.next(); - c = tok_clbracket; - } else { - lex.expect(tok_oppar, ""); - c = tok_clpar; - } - if (lex.tok() == c) { - lex.next(); - return c == tok_clpar ? TypeExpr::new_unit() : TypeExpr::new_tuple({}); - } - auto t1 = parse_type(lex); - if (lex.tok() == tok_clpar) { - lex.expect(c, c == tok_clpar ? "')'" : "']'"); - return t1; - } - std::vector tlist{1, t1}; - while (lex.tok() == tok_comma) { - lex.next(); - tlist.push_back(parse_type(lex)); - } - lex.expect(c, c == tok_clpar ? "')'" : "']'"); - return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); -} - -TypeExpr* parse_type(Lexer& lex) { - auto res = parse_type1(lex); - if (lex.tok() == tok_mapsto) { - lex.next(); - auto to = parse_type(lex); - return TypeExpr::new_map(res, to); - } else { - return res; - } -} - -FormalArg parse_formal_arg(Lexer& lex, int fa_idx) { - TypeExpr* arg_type = 0; - SrcLocation loc = lex.cur_location(); - if (lex.tok() == tok_underscore) { - lex.next(); - if (lex.tok() == tok_comma || lex.tok() == tok_clpar) { - return std::make_tuple(TypeExpr::new_hole(), (SymDef*)nullptr, loc); - } - arg_type = TypeExpr::new_hole(); - loc = lex.cur_location(); - } else if (lex.tok() != tok_identifier) { - arg_type = parse_type(lex); - } else { - auto sym = lookup_symbol(lex.cur_sym_idx()); - if (sym && dynamic_cast(sym->value)) { - auto val = dynamic_cast(sym->value); - lex.next(); - arg_type = val->get_type(); - } else { - arg_type = TypeExpr::new_hole(); - } - } - if (lex.tok() == tok_underscore || lex.tok() == tok_comma || lex.tok() == tok_clpar) { - if (lex.tok() == tok_underscore) { - loc = lex.cur_location(); - lex.next(); - } - return std::make_tuple(arg_type, (SymDef*)nullptr, loc); - } - lex.check(tok_identifier, "formal parameter name"); - loc = lex.cur_location(); - if (G.prohibited_var_names.count(G.symbols.get_name(lex.cur_sym_idx()))) { - throw ParseError{ - loc, PSTRING() << "symbol `" << G.symbols.get_name(lex.cur_sym_idx()) << "` cannot be redefined as a variable"}; - } - SymDef* new_sym_def = define_symbol(lex.cur_sym_idx(), true, loc); - if (!new_sym_def) { - lex.error_at("cannot define symbol `", "`"); - } - if (new_sym_def->value) { - lex.error_at("redefined formal parameter `", "`"); - } - new_sym_def->value = new SymVal{SymValKind::_Param, fa_idx, arg_type}; - lex.next(); - return std::make_tuple(arg_type, new_sym_def, loc); -} - -void parse_global_var_decl(Lexer& lex) { - TypeExpr* var_type = 0; - SrcLocation loc = lex.cur_location(); - if (lex.tok() == tok_underscore) { - lex.next(); - var_type = TypeExpr::new_hole(); - loc = lex.cur_location(); - } else if (lex.tok() != tok_identifier) { - var_type = parse_type(lex); - } else { - auto sym = lookup_symbol(lex.cur_sym_idx()); - if (sym && dynamic_cast(sym->value)) { - auto val = dynamic_cast(sym->value); - lex.next(); - var_type = val->get_type(); - } else { - var_type = TypeExpr::new_hole(); - } - } - lex.check(tok_identifier, "global variable name"); - loc = lex.cur_location(); - SymDef* sym_def = define_global_symbol(lex.cur_sym_idx(), false, loc); - if (!sym_def) { - lex.error_at("cannot define global symbol `", "`"); - } - if (sym_def->value) { - auto val = dynamic_cast(sym_def->value); - if (!val) { - lex.error_at("symbol `", "` cannot be redefined as a global variable"); - } - try { - unify(var_type, val->sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot unify new type " << var_type << " of global variable `" << sym_def->name() - << "` with its previous type " << val->sym_type << ": " << ue; - lex.error(os.str()); - } - } else { - sym_def->value = new SymValGlobVar{G.glob_var_cnt++, var_type}; -#ifdef TOLK_DEBUG - dynamic_cast(sym_def->value)->name = lex.cur_str(); -#endif - G.glob_vars.push_back(sym_def); - } - lex.next(); -} - -Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv = false); - -void parse_const_decl(Lexer& lex) { - SrcLocation loc = lex.cur_location(); - int wanted_type = Expr::_None; - if (lex.tok() == tok_int) { - wanted_type = Expr::_Const; - lex.next(); - } else if (lex.tok() == tok_slice) { - wanted_type = Expr::_SliceConst; - lex.next(); - } - lex.check(tok_identifier, "constant name"); - loc = lex.cur_location(); - SymDef* sym_def = define_global_symbol(lex.cur_sym_idx(), false, loc); - if (!sym_def) { - lex.error_at("cannot define global symbol `", "`"); - } - if (sym_def->value) { // todo below it was a check (for duplicate include?) - lex.error_at("global symbol `", "` already exists"); - } - lex.next(); - if (lex.tok() != tok_assign) { - lex.error_at("expected = instead of ", ""); - } - lex.next(); - CodeBlob code; - // Handles processing and resolution of literals and consts - auto x = parse_expr(lex, code, false); // also does lex.next() ! - if (!x->is_rvalue()) { - lex.error("expression is not strictly Rvalue"); - } - if ((wanted_type == Expr::_Const) && (x->cls == Expr::_Apply)) - wanted_type = Expr::_None; // Apply is additionally checked to result in an integer - if ((wanted_type != Expr::_None) && (x->cls != wanted_type)) { - lex.error("expression type does not match wanted type"); - } - SymValConst* new_value = nullptr; - if (x->cls == Expr::_Const) { // Integer constant - new_value = new SymValConst{G.const_cnt++, x->intval}; - } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) - new_value = new SymValConst{G.const_cnt++, x->strval}; - } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) - code.emplace_back(loc, Op::_Import, std::vector()); - auto tmp_vars = x->pre_compile(code); - code.emplace_back(loc, Op::_Return, std::move(tmp_vars)); - code.emplace_back(loc, Op::_Nop); // This is neccessary to prevent SIGSEGV! - // It is REQUIRED to execute "optimizations" as in tolk.cpp - code.simplify_var_types(); - code.prune_unreachable_code(); - code.split_vars(true); - for (int i = 0; i < 16; i++) { - code.compute_used_code_vars(); - code.fwd_analyze(); - code.prune_unreachable_code(); - } - code.mark_noreturn(); - AsmOpList out_list(0, &code.vars); - code.generate_code(out_list); - if (out_list.list_.size() != 1) { - lex.error("precompiled expression must result in single operation"); - } - auto op = out_list.list_[0]; - if (!op.is_const()) { - lex.error("precompiled expression must result in compilation time constant"); - } - if (op.origin.is_null() || !op.origin->is_valid()) { - lex.error("precompiled expression did not result in a valid integer constant"); - } - new_value = new SymValConst{G.const_cnt++, op.origin}; - } else { - lex.error("integer or slice literal or constant expected"); - } - sym_def->value = new_value; -} - -FormalArgList parse_formal_args(Lexer& lex) { - FormalArgList args; - lex.expect(tok_oppar, "formal argument list"); - if (lex.tok() == tok_clpar) { - lex.next(); - return args; - } - int fa_idx = 0; - args.push_back(parse_formal_arg(lex, fa_idx++)); - while (lex.tok() == tok_comma) { - lex.next(); - args.push_back(parse_formal_arg(lex, fa_idx++)); - } - lex.expect(tok_clpar, "')'"); - return args; -} - -void parse_const_decls(Lexer& lex) { - lex.expect(tok_const, "'const'"); - while (true) { - parse_const_decl(lex); - if (lex.tok() != tok_comma) { - break; - } - lex.expect(tok_comma, "','"); - } - lex.expect(tok_semicolon, "';'"); -} - -TypeExpr* extract_total_arg_type(const FormalArgList& arg_list) { - if (arg_list.empty()) { - return TypeExpr::new_unit(); - } - if (arg_list.size() == 1) { - return std::get<0>(arg_list[0]); - } - std::vector type_list; - for (auto& x : arg_list) { - type_list.push_back(std::get<0>(x)); - } - return TypeExpr::new_tensor(std::move(type_list)); -} - -void parse_global_var_decls(Lexer& lex) { - lex.expect(tok_global, "'global'"); - while (true) { - parse_global_var_decl(lex); - if (lex.tok() != tok_comma) { - break; - } - lex.expect(tok_comma, "','"); - } - lex.expect(tok_semicolon, "';'"); -} - -SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) { - SymValCodeFunc* res = new SymValCodeFunc{G.glob_func_cnt, func_type, marked_as_pure}; -#ifdef TOLK_DEBUG - res->name = func_sym->name(); -#endif - func_sym->value = res; - G.glob_func.push_back(func_sym); - G.glob_func_cnt++; - return res; -} - -bool check_global_func(const Lexer& lex, sym_idx_t func_name) { - SymDef* def = lookup_symbol(func_name); - if (!def) { - lex.error("undefined symbol `" + G.symbols.get_name(func_name) + "`"); - return false; - } - SymVal* val = dynamic_cast(def->value); - if (!val) { - lex.error(std::string{"symbol `"} + G.symbols.get_name(func_name) + "` has no value and no type"); - return false; - } else if (!val->get_type()) { - lex.error(std::string{"symbol `"} + G.symbols.get_name(func_name) + "` has no type, possibly not a function"); - return false; - } else { - return true; - } -} - -Expr* make_func_apply(Expr* fun, Expr* x) { - Expr* res{nullptr}; - if (fun->cls == Expr::_GlobFunc) { - if (x->cls == Expr::_Tensor) { - res = new Expr{Expr::_Apply, fun->sym, x->args}; - } else { - res = new Expr{Expr::_Apply, fun->sym, {x}}; - } - res->flags = Expr::_IsRvalue | (fun->flags & Expr::_IsImpure); - } else { - res = new Expr{Expr::_VarApply, {fun, x}}; - res->flags = Expr::_IsRvalue; - } - return res; -} - -void check_import_exists_when_using_sym(const Lexer& lex, const SymDef* used_sym) { - if (!lex.cur_location().is_symbol_from_same_or_builtin_file(used_sym->loc)) { - const SrcFile* declared_in = used_sym->loc.get_src_file(); - bool has_import = false; - for (const SrcFile::ImportStatement& import_stmt : lex.cur_file()->imports) { - if (import_stmt.imported_file == declared_in) { - has_import = true; - } - } - if (!has_import) { - lex.error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); - } - } -} - -// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ -Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { - if (lex.tok() == tok_oppar || lex.tok() == tok_opbracket) { - bool tf = (lex.tok() == tok_opbracket); - TokenType clbr = (tf ? tok_clbracket : tok_clpar); - SrcLocation loc{lex.cur_location()}; - lex.next(); - if (lex.tok() == clbr) { - lex.next(); - Expr* res = new Expr{Expr::_Tensor, {}}; - res->flags = Expr::_IsRvalue; - res->here = loc; - res->e_type = TypeExpr::new_unit(); - if (tf) { - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = Expr::_IsRvalue; - res->here = loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - } - return res; - } - Expr* res = parse_expr(lex, code, nv); - if (lex.tok() == tok_clpar) { - lex.expect(clbr, clbr == tok_clbracket ? "']'" : "')'"); - res->flags |= Expr::_IsInsideParenthesis; - return res; - } - std::vector type_list; - type_list.push_back(res->e_type); - int f = res->flags; - res = new Expr{Expr::_Tensor, {res}}; - while (lex.tok() == tok_comma) { - lex.next(); - auto x = parse_expr(lex, code, nv); - res->pb_arg(x); - if ((f ^ x->flags) & Expr::_IsType) { - lex.error("mixing type and non-type expressions inside the same tuple"); - } - f &= x->flags; - type_list.push_back(x->e_type); - } - res->here = loc; - res->flags = f; - res->e_type = TypeExpr::new_tensor(std::move(type_list), !tf); - if (tf) { - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = f; - res->here = loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - } - lex.expect(clbr, clbr == tok_clbracket ? "']'" : "')'"); - return res; - } - TokenType t = lex.tok(); - if (t == tok_int_const) { - Expr* res = new Expr{Expr::_Const, lex.cur_location()}; - res->flags = Expr::_IsRvalue; - res->intval = td::string_to_int256(lex.cur_str_std_string()); - if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { - lex.error_at("invalid integer constant `", "`"); - } - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - lex.next(); - return res; - } - if (t == tok_string_const) { - std::string str = lex.cur_str_std_string(); - lex.next(); - char modifier = 0; - if (lex.tok() == tok_string_modifier) { - modifier = lex.cur_str()[0]; - lex.next(); - } - Expr* res; - switch (modifier) { - case 0: - case 's': - case 'a': - res = new Expr{Expr::_SliceConst, lex.cur_location()}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); - break; - case 'u': - case 'h': - case 'H': - case 'c': - res = new Expr{Expr::_Const, lex.cur_location()}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - break; - default: - lex.error("invalid string type `" + std::string(1, modifier) + "`"); - } - res->flags = Expr::_IsRvalue; - switch (modifier) { - case 0: { - res->strval = td::hex_encode(str); - break; - } - case 's': { - res->strval = str; - unsigned char buff[128]; - int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); - if (bits < 0) { - lex.error_at("Invalid hex bitstring constant `", "`"); - } - break; - } - case 'a': { // MsgAddressInt - // todo rewrite stdaddress parsing (if done, CMake dep "ton_crypto" can be replaced with "ton_crypto_core") - block::StdAddress a; - if (a.parse_addr(str)) { - res->strval = block::tlb::MsgAddressInt().pack_std_address(a)->as_bitslice().to_hex(); - } else { - lex.error_at("invalid standard address `", "`"); - } - break; - } - case 'u': { - res->intval = td::hex_string_to_int256(td::hex_encode(str)); - if (str.empty()) { - lex.error("empty integer ascii-constant"); - } - if (res->intval.is_null()) { - lex.error_at("too long integer ascii-constant `", "`"); - } - break; - } - case 'h': - case 'H': { - unsigned char hash[32]; - digest::hash_str(hash, str.data(), str.size()); - res->intval = td::bits_to_refint(hash, (modifier == 'h') ? 32 : 256, false); - break; - } - case 'c': { - res->intval = td::make_refint(td::crc32(td::Slice{str})); - break; - } - default: - __builtin_unreachable(); - } - return res; - } - if (t == tok_underscore) { - Expr* res = new Expr{Expr::_Hole, lex.cur_location()}; - res->val = -1; - res->flags = Expr::_IsLvalue; - res->e_type = TypeExpr::new_hole(); - lex.next(); - return res; - } - if (t == tok_var) { - Expr* res = new Expr{Expr::_Type, lex.cur_location()}; - res->flags = Expr::_IsType; - res->e_type = TypeExpr::new_hole(); - lex.next(); - return res; - } - if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_cont || t == tok_type || t == tok_tuple) { - Expr* res = new Expr{Expr::_Type, lex.cur_location()}; - res->flags = Expr::_IsType; - res->e_type = TypeExpr::new_atomic(t); - lex.next(); - return res; - } - if (t == tok_identifier) { - auto sym = lookup_symbol(lex.cur_sym_idx()); - if (sym && dynamic_cast(sym->value)) { - auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_Type, lex.cur_location()}; - res->flags = Expr::_IsType; - res->e_type = val->get_type(); - lex.next(); - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(lex, sym); - auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_GlobVar, lex.cur_location()}; - res->e_type = val->get_type(); - res->sym = sym; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; - lex.next(); - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(lex, sym); - auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_None, lex.cur_location()}; - res->flags = Expr::_IsRvalue; - if (val->get_kind() == SymValConst::IntConst) { - res->cls = Expr::_Const; - res->intval = val->get_int_value(); - res->e_type = TypeExpr::new_atomic(tok_int); - } - else if (val->get_kind() == SymValConst::SliceConst) { - res->cls = Expr::_SliceConst; - res->strval = val->get_str_value(); - res->e_type = TypeExpr::new_atomic(tok_slice); - } - else { - lex.error("Invalid symbolic constant type"); - } - lex.next(); - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(lex, sym); - } - bool auto_apply = false; - Expr* res = new Expr{Expr::_Var, lex.cur_location()}; - if (nv) { - res->val = ~lex.cur_sym_idx(); - res->e_type = TypeExpr::new_hole(); - res->flags = Expr::_IsLvalue; - // std::cerr << "defined new variable " << lex.cur().str << " : " << res->e_type << std::endl; - } else { - if (!sym) { - check_global_func(lex, lex.cur_sym_idx()); - sym = lookup_symbol(lex.cur_sym_idx()); - } - res->sym = sym; - SymVal* val = nullptr; - bool impure = false; - if (sym) { - val = dynamic_cast(sym->value); - } - if (!val) { - lex.error_at("undefined identifier `", "`"); - } else if (val->kind == SymValKind::_Func) { - res->e_type = val->get_type(); - res->cls = Expr::_GlobFunc; - auto_apply = val->auto_apply; - impure = !dynamic_cast(val)->is_marked_as_pure(); - } else if (val->idx < 0) { - lex.error_at("accessing variable `", "` being defined"); - } else { - res->val = val->idx; - res->e_type = val->get_type(); - // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; - } - // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0); - } - if (auto_apply) { - int impure = res->flags & Expr::_IsImpure; - delete res; - res = new Expr{Expr::_Apply, sym, {}}; - res->flags = Expr::_IsRvalue | impure; - } - res->deduce_type(lex); - lex.next(); - return res; - } - lex.expect(tok_identifier, "identifier"); - return nullptr; -} - -// parse E { E } -Expr* parse_expr90(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr100(lex, code, nv); - while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && !is_special_ident(lex.cur_sym_idx()))) { - if (res->is_type()) { - Expr* x = parse_expr100(lex, code, true); - x->chk_lvalue(lex); // chk_lrvalue() ? - TypeExpr* tp = res->e_type; - delete res; - res = new Expr{Expr::_TypeApply, {x}}; - res->e_type = tp; - res->here = lex.cur_location(); - try { - unify(res->e_type, x->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot transform expression of type " << x->e_type << " to explicitly requested type " << res->e_type - << ": " << ue; - lex.error(os.str()); - } - res->flags = x->flags; - } else { - Expr* x = parse_expr100(lex, code, false); - x->chk_rvalue(lex); - res = make_func_apply(res, x); - res->here = lex.cur_location(); - res->deduce_type(lex); - } - } - return res; -} - -// parse E { .method E | ~method E } -Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr90(lex, code, nv); - while (lex.tok() == tok_identifier && is_special_ident(lex.cur_sym_idx())) { - auto modify = is_tilde_ident(lex.cur_sym_idx()); - auto obj = res; - if (modify) { - obj->chk_lvalue(lex); - } else { - obj->chk_rvalue(lex); - } - SrcLocation loc = lex.cur_location(); - sym_idx_t name = lex.cur_sym_idx(); - auto sym = lookup_symbol(name); - if (!sym || !dynamic_cast(sym->value)) { - auto name1 = G.symbols.lookup(lex.cur_str().substr(1)); - if (name1) { - auto sym1 = lookup_symbol(name1); - if (sym1 && dynamic_cast(sym1->value)) { - name = name1; - sym = sym1; - } - } - } - check_global_func(lex, name); - if (G.is_verbosity(2)) { - std::cerr << "using symbol `" << G.symbols.get_name(name) << "` for method call of " << lex.cur_str() << std::endl; - } - sym = lookup_symbol(name); - SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; - if (!val) { - lex.error_at("undefined method identifier `", "`"); - } - lex.next(); - auto x = parse_expr100(lex, code, false); - x->chk_rvalue(lex); - if (x->cls == Expr::_Tensor) { - res = new Expr{Expr::_Apply, name, {obj}}; - res->args.insert(res->args.end(), x->args.begin(), x->args.end()); - } else { - res = new Expr{Expr::_Apply, name, {obj, x}}; - } - res->here = loc; - res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure); - res->deduce_type(lex); - if (modify) { - auto tmp = res; - res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; - res->here = loc; - res->flags = tmp->flags; - res->set_val(name); - res->deduce_type(lex); - } - } - return res; -} - -// parse [ ~ | - | + ] E -Expr* parse_expr75(Lexer& lex, CodeBlob& code, bool nv) { - if (lex.tok() == tok_bitwise_not || lex.tok() == tok_minus || lex.tok() == tok_plus) { - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr75(lex, code, false); - x->chk_rvalue(lex); - - // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) - // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later) - // in FunC, it was: - // `var fst = -1;` // is constantly 1 - // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on - // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), - // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here - if (x->cls == Expr::_Const) { - if (t == tok_bitwise_not) { - x->intval = ~x->intval; - } else if (t == tok_minus) { - x->intval = -x->intval; - } - if (!x->intval->signed_fits_bits(257)) { - lex.error("integer overflow"); - } - return x; - } - - auto res = new Expr{Expr::_Apply, name, {x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - return res; - } else { - return parse_expr80(lex, code, nv); - } -} - -// parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E } -Expr* parse_expr30(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr75(lex, code, nv); - while (lex.tok() == tok_mul || lex.tok() == tok_div || lex.tok() == tok_mod || lex.tok() == tok_divmod || lex.tok() == tok_divC || - lex.tok() == tok_divR || lex.tok() == tok_modC || lex.tok() == tok_modR) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - SrcLocation loc{lex.cur_location()}; - check_global_func(lex, name); - lex.next(); - auto x = parse_expr75(lex, code, false); - x->chk_rvalue(lex); - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E { (+ | -) E } -Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr30(lex, code, nv); - while (lex.tok() == tok_minus || lex.tok() == tok_plus) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr30(lex, code, false); - x->chk_rvalue(lex); - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E { ( << | >> | ~>> | ^>> ) E } -Expr* parse_expr17(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr20(lex, code, nv); - while (lex.tok() == tok_lshift || lex.tok() == tok_rshift || lex.tok() == tok_rshiftC || lex.tok() == tok_rshiftR) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr20(lex, code, false); - x->chk_rvalue(lex); - diagnose_addition_in_bitshift(loc, name, x); - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E [ (== | < | > | <= | >= | != | <=> ) E ] -Expr* parse_expr15(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr17(lex, code, nv); - if (lex.tok() == tok_eq || lex.tok() == tok_lt || lex.tok() == tok_gt || lex.tok() == tok_leq || lex.tok() == tok_geq || - lex.tok() == tok_neq || lex.tok() == tok_spaceship) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr17(lex, code, false); - x->chk_rvalue(lex); - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E { ( & | `|` | ^ ) E } -Expr* parse_expr14(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr15(lex, code, nv); - while (lex.tok() == tok_bitwise_and || lex.tok() == tok_bitwise_or || lex.tok() == tok_bitwise_xor) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr15(lex, code, false); - x->chk_rvalue(lex); - // diagnose tricky bitwise precedence, like "flags & 0xFF != 0" (& has lower precedence) - diagnose_bitwise_precedence(loc, name, res, x); - - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E [ ? E : E ] -Expr* parse_expr13(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr14(lex, code, nv); - if (lex.tok() == tok_question) { - res->chk_rvalue(lex); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr(lex, code, false); - x->chk_rvalue(lex); - lex.expect(tok_colon, "':'"); - auto y = parse_expr13(lex, code, false); - y->chk_rvalue(lex); - res = new Expr{Expr::_CondExpr, {res, x, y}}; - res->here = loc; - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse LE1 (= | += | -= | ... ) E2 -Expr* parse_expr10(Lexer& lex, CodeBlob& code, bool nv) { - auto x = parse_expr13(lex, code, nv); - TokenType t = lex.tok(); - if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || - t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC || - t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { - x->chk_lvalue(lex); - x->chk_rvalue(lex); - sym_idx_t name = G.symbols.lookup_add(std::string{"^_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto y = parse_expr10(lex, code, false); - y->chk_rvalue(lex); - Expr* z = new Expr{Expr::_Apply, name, {x, y}}; - z->here = loc; - z->set_val(t); - z->flags = Expr::_IsRvalue; - z->deduce_type(lex); - Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; - res->here = loc; - res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; - res->set_val(t); - res->deduce_type(lex); - return res; - } else if (t == tok_assign) { - x->chk_lvalue(lex); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto y = parse_expr10(lex, code, false); - y->chk_rvalue(lex); - x->predefine_vars(); - x->define_new_vars(code); - Expr* res = new Expr{Expr::_Letop, {x, y}}; - res->here = loc; - res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; - res->set_val(t); - res->deduce_type(lex); - return res; - } else { - return x; - } -} - -Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv) { - return parse_expr10(lex, code, nv); -} - -namespace blk_fl { -enum { end = 1, ret = 2, empty = 4 }; -typedef int val; -constexpr val init = end | empty; -void combine(val& x, const val y) { - x |= y & ret; - x &= y | ~(end | empty); -} -void combine_parallel(val& x, const val y) { - x &= y | ~(ret | empty); - x |= y & end; -} -} // namespace blk_fl - -blk_fl::val parse_return_stmt(Lexer& lex, CodeBlob& code) { - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - try { - // std::cerr << "in return: "; - unify(expr->e_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; - lex.error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - code.emplace_back(lex.cur_location(), Op::_Return, std::move(tmp_vars)); - lex.expect(tok_semicolon, "';'"); - return blk_fl::ret; -} - -blk_fl::val parse_implicit_ret_stmt(Lexer& lex, CodeBlob& code) { - auto ret_type = TypeExpr::new_unit(); - try { - // std::cerr << "in implicit return: "; - unify(ret_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with implicit end-of-block return type " << ret_type << ": " << ue; - lex.error(os.str()); - } - code.emplace_back(lex.cur_location(), Op::_Return); - return blk_fl::ret; -} - -blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code); - -blk_fl::val parse_block_stmt(Lexer& lex, CodeBlob& code, bool no_new_scope = false) { - lex.expect(tok_opbrace, "'{'"); - if (!no_new_scope) { - open_scope(lex.cur_location()); - } - blk_fl::val res = blk_fl::init; - bool warned = false; - while (lex.tok() != tok_clbrace) { - if (!(res & blk_fl::end) && !warned) { - lex.cur_location().show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, parse_stmt(lex, code)); - } - if (!no_new_scope) { - close_scope(lex.cur_location()); - } - lex.expect(tok_clbrace, "'}'"); - return res; -} - -blk_fl::val parse_repeat_stmt(Lexer& lex, CodeBlob& code) { - SrcLocation loc{lex.cur_location()}; - lex.expect(tok_repeat, "'repeat'"); - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; - lex.error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - lex.error("repeat count value is not a singleton"); - } - Op& repeat_op = code.emplace_back(loc, Op::_Repeat, tmp_vars); - code.push_set_cur(repeat_op.block0); - blk_fl::val res = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur_location()); - return res | blk_fl::end; -} - -blk_fl::val parse_while_stmt(Lexer& lex, CodeBlob& code) { - SrcLocation loc{lex.cur_location()}; - lex.expect(tok_while, "'while'"); - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; - lex.error(os.str()); - } - Op& while_op = code.emplace_back(loc, Op::_While); - code.push_set_cur(while_op.block0); - while_op.left = expr->pre_compile(code); - code.close_pop_cur(lex.cur_location()); - if (while_op.left.size() != 1) { - lex.error("while condition value is not a singleton"); - } - code.push_set_cur(while_op.block1); - blk_fl::val res1 = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur_location()); - return res1 | blk_fl::end; -} - -blk_fl::val parse_do_stmt(Lexer& lex, CodeBlob& code) { - Op& while_op = code.emplace_back(lex.cur_location(), Op::_Until); - lex.expect(tok_do, "'do'"); - code.push_set_cur(while_op.block0); - open_scope(lex.cur_location()); - blk_fl::val res = parse_block_stmt(lex, code, true); - lex.expect(tok_until, "'until'"); - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - close_scope(lex.cur_location()); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`until` condition value of type " << expr->e_type << " is not an integer: " << ue; - lex.error(os.str()); - } - while_op.left = expr->pre_compile(code); - code.close_pop_cur(lex.cur_location()); - if (while_op.left.size() != 1) { - lex.error("`until` condition value is not a singleton"); - } - return res & ~blk_fl::empty; -} - -blk_fl::val parse_try_catch_stmt(Lexer& lex, CodeBlob& code) { - code.require_callxargs = true; - lex.expect(tok_try, "'try'"); - Op& try_catch_op = code.emplace_back(lex.cur_location(), Op::_TryCatch); - code.push_set_cur(try_catch_op.block0); - blk_fl::val res0 = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur_location()); - lex.expect(tok_catch, "'catch'"); - code.push_set_cur(try_catch_op.block1); - open_scope(lex.cur_location()); - Expr* expr = parse_expr(lex, code, true); - expr->chk_lvalue(lex); - TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); - try { - unify(expr->e_type, tvm_error_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`catch` arguments have incorrect type " << expr->e_type << ": " << ue; - lex.error(os.str()); - } - expr->predefine_vars(); - expr->define_new_vars(code); - try_catch_op.left = expr->pre_compile(code); - tolk_assert(try_catch_op.left.size() == 2 || try_catch_op.left.size() == 1); - blk_fl::val res1 = parse_block_stmt(lex, code); - close_scope(lex.cur_location()); - code.close_pop_cur(lex.cur_location()); - blk_fl::combine_parallel(res0, res1); - return res0; -} - -blk_fl::val parse_if_stmt(Lexer& lex, CodeBlob& code, TokenType first_lex = tok_if) { - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; - lex.error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - lex.error("condition value is not a singleton"); - } - Op& if_op = code.emplace_back(loc, Op::_If, tmp_vars); - code.push_set_cur(if_op.block0); - blk_fl::val res1 = parse_block_stmt(lex, code); - blk_fl::val res2 = blk_fl::init; - code.close_pop_cur(lex.cur_location()); - if (lex.tok() == tok_else) { - lex.expect(tok_else, "'else'"); - code.push_set_cur(if_op.block1); - res2 = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur_location()); - } else if (lex.tok() == tok_elseif || lex.tok() == tok_elseifnot) { - code.push_set_cur(if_op.block1); - res2 = parse_if_stmt(lex, code, lex.tok()); - code.close_pop_cur(lex.cur_location()); - } else { - if_op.block1 = std::make_unique(lex.cur_location(), Op::_Nop); - } - if (first_lex == tok_ifnot || first_lex == tok_elseifnot) { - std::swap(if_op.block0, if_op.block1); - } - blk_fl::combine_parallel(res1, res2); - return res1; -} - -blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code) { - switch (lex.tok()) { - case tok_return: { - lex.next(); - return parse_return_stmt(lex, code); - } - case tok_opbrace: { - return parse_block_stmt(lex, code); - } - case tok_semicolon: { - lex.next(); - return blk_fl::init; - } - case tok_repeat: - return parse_repeat_stmt(lex, code); - case tok_if: - case tok_ifnot: - return parse_if_stmt(lex, code, lex.tok()); - case tok_do: - return parse_do_stmt(lex, code); - case tok_while: - return parse_while_stmt(lex, code); - case tok_try: - return parse_try_catch_stmt(lex, code); - default: { - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - expr->pre_compile(code); - lex.expect(tok_semicolon, "';'"); - return blk_fl::end; - } - } -} - -CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type, bool marked_as_pure) { - lex.expect(tok_opbrace, "'{'"); - CodeBlob* blob = new CodeBlob{ret_type}; - if (marked_as_pure) { - blob->flags |= CodeBlob::_ForbidImpure; - } - blob->import_params(std::move(arg_list)); - blk_fl::val res = blk_fl::init; - bool warned = false; - while (lex.tok() != tok_clbrace) { - if (!(res & blk_fl::end) && !warned) { - lex.cur_location().show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, parse_stmt(lex, *blob)); - } - if (res & blk_fl::end) { - parse_implicit_ret_stmt(lex, *blob); - } - blob->close_blk(lex.cur_location()); - lex.expect(tok_clbrace, "'}'"); - return blob; -} - -SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const FormalArgList& arg_list, TypeExpr* ret_type, - bool marked_as_pure) { - SrcLocation loc = lex.cur_location(); - lex.expect(tok_asm, "'asm'"); - int cnt = (int)arg_list.size(); - int width = ret_type->get_width(); - if (width < 0 || width > 16) { - throw ParseError{loc, "return type of an assembler built-in function must have a well-defined fixed width"}; - } - if (arg_list.size() > 16) { - throw ParseError{loc, "assembler built-in function must have at most 16 arguments"}; - } - std::vector cum_arg_width; - cum_arg_width.push_back(0); - int tot_width = 0; - for (auto& arg : arg_list) { - int arg_width = std::get(arg)->get_width(); - if (arg_width < 0 || arg_width > 16) { - throw ParseError{std::get(arg), - "parameters of an assembler built-in function must have a well-defined fixed width"}; - } - cum_arg_width.push_back(tot_width += arg_width); - } - std::vector asm_ops; - std::vector arg_order, ret_order; - if (lex.tok() == tok_oppar) { - lex.next(); - if (lex.tok() != tok_mapsto) { - std::vector visited(cnt, false); - for (int i = 0; i < cnt; i++) { - lex.check(tok_identifier, "identifier"); - auto sym = lookup_symbol(lex.cur_sym_idx()); - int j; - for (j = 0; j < cnt; j++) { - if (std::get(arg_list[j]) == sym) { - break; - } - } - if (j == cnt) { - lex.error("formal argument name expected"); - } - if (visited[j]) { - lex.error("formal argument listed twice"); - } - visited[j] = true; - int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; - while (c1 < c2) { - arg_order.push_back(c1++); - } - lex.next(); - } - tolk_assert(arg_order.size() == (unsigned)tot_width); - } - if (lex.tok() == tok_mapsto) { - lex.next(); - std::vector visited(width, false); - for (int i = 0; i < width; i++) { - if (lex.tok() != tok_int_const || lex.cur_str().size() > 3) { - lex.expect(tok_int_const, "number"); - } - int j = atoi(lex.cur_str_std_string().c_str()); - if (j < 0 || j >= width || visited[j]) { - lex.error("expected integer return value index 0 .. width-1"); - } - visited[j] = true; - ret_order.push_back(j); - lex.next(); - } - } - lex.expect(tok_clpar, "')'"); - } - while (lex.tok() == tok_string_const) { - std::string ops = lex.cur_str_std_string(); // \n\n... - std::string op; - for (const char& c : ops) { - if (c == '\n' || c == '\r') { - if (!op.empty()) { - asm_ops.push_back(AsmOp::Parse(op, cnt, width)); - if (asm_ops.back().is_custom()) { - cnt = width; - } - op.clear(); - } - } else { - op.push_back(c); - } - } - if (!op.empty()) { - asm_ops.push_back(AsmOp::Parse(op, cnt, width)); - if (asm_ops.back().is_custom()) { - cnt = width; - } - } - lex.next(); - } - if (asm_ops.empty()) { - lex.error("string with assembler instruction expected"); - } - lex.expect(tok_semicolon, "';'"); - std::string crc_s; - for (const AsmOp& asm_op : asm_ops) { - crc_s += asm_op.op; - } - crc_s.push_back(!marked_as_pure); - for (const int& x : arg_order) { - crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); - } - for (const int& x : ret_order) { - crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); - } - auto res = new SymValAsmFunc{func_type, std::move(asm_ops), marked_as_pure}; - res->arg_order = std::move(arg_order); - res->ret_order = std::move(ret_order); - res->crc = td::crc64(crc_s); - return res; -} - -std::vector parse_type_var_list(Lexer& lex) { - std::vector res; - lex.expect(tok_forall, "'forall'"); - int idx = 0; - while (true) { - if (lex.tok() == tok_type) { - lex.next(); - } - if (lex.tok() != tok_identifier) { - lex.error("free type identifier expected"); - } - SrcLocation loc = lex.cur_location(); - if (G.prohibited_var_names.count(G.symbols.get_name(lex.cur_sym_idx()))) { - throw ParseError{loc, PSTRING() << "symbol `" << G.symbols.get_name(lex.cur_sym_idx()) - << "` cannot be redefined as a variable"}; - } - SymDef* new_sym_def = define_symbol(lex.cur_sym_idx(), true, loc); - if (!new_sym_def || new_sym_def->value) { - lex.error_at("redefined type variable `", "`"); - } - auto var = TypeExpr::new_var(idx); - new_sym_def->value = new SymValType{SymValKind::_Typename, idx++, var}; - res.push_back(var); - lex.next(); - if (lex.tok() != tok_comma) { - break; - } - lex.next(); - } - lex.expect(tok_mapsto, "'->'"); - return res; -} - -void type_var_usage(TypeExpr* expr, const std::vector& typevars, std::vector& used) { - if (expr->constr != TypeExpr::te_Var) { - for (auto arg : expr->args) { - type_var_usage(arg, typevars, used); - } - return; - } - for (std::size_t i = 0; i < typevars.size(); i++) { - if (typevars[i] == expr) { - used.at(i) = true; - return; - } - } - return; -} - -TypeExpr* compute_type_closure(TypeExpr* expr, const std::vector& typevars) { - if (typevars.empty()) { - return expr; - } - std::vector used(typevars.size(), false); - type_var_usage(expr, typevars, used); - std::vector used_vars; - for (std::size_t i = 0; i < typevars.size(); i++) { - if (used.at(i)) { - used_vars.push_back(typevars[i]); - } - } - if (!used_vars.empty()) { - expr = TypeExpr::new_forall(std::move(used_vars), expr); - } - return expr; -} - -// if a function looks like `T f(...args) { return anotherF(...args); }`, -// set a bit to flags -// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)` -void detect_if_function_just_wraps_another(SymValCodeFunc* v_current, const td::RefInt256 &method_id) { - const std::string& function_name = v_current->code->name; - - // in "AST" representation, the first is Op::_Import (input arguments, even if none) - const auto& op_import = v_current->code->ops; - tolk_assert(op_import && op_import->cl == Op::_Import); - - // then Op::_Call (anotherF) - const Op* op_call = op_import->next.get(); - if (!op_call || op_call->cl != Op::_Call) - return; - tolk_assert(op_call->left.size() == 1); - - const auto& op_return = op_call->next; - if (!op_return || op_return->cl != Op::_Return || op_return->left.size() != 1) - return; - - bool indices_expected = static_cast(op_import->left.size()) == op_call->left[0] && op_call->left[0] == op_return->left[0]; - if (!indices_expected) - return; - - const SymDef* f_called = op_call->fun_ref; - const SymValFunc* v_called = dynamic_cast(f_called->value); - if (!v_called) - return; - - // `return` must use all arguments, e.g. `return (_0,_2,_1)`, not `return (_0,_1,_1)` - int args_used_mask = 0; - for (var_idx_t arg_idx : op_call->right) { - args_used_mask |= 1 << arg_idx; - } - if (args_used_mask != (1 << op_call->right.size()) - 1) - return; - - // detect getters (having method_id), they should not be treated as wrappers - // v_current->method_id will be assigned later; todo refactor function parsing completely, it's weird - // moreover, `recv_external()` and others are also exported, but FunC is unaware of method_id - // (it's assigned by Fift later) - // so, for now, just handle "special" function names, the same as in Asm.fif - if (!method_id.is_null()) - return; - if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" || - function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install") - return; - - // all types must be strictly defined (on mismatch, a compilation error will be triggered anyway) - if (v_called->sym_type->has_unknown_inside() || v_current->sym_type->has_unknown_inside()) - return; - // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome - if (v_current->get_arg_type()->get_width() != static_cast(op_call->right.size())) - return; - // 'return true;' (false, nil) are (surprisingly) also function calls, with auto_apply=true - if (v_called->auto_apply) - return; - // if an original is marked `pure`, and this one doesn't, it's okay; just check for inline_ref storage - if (v_current->is_inline_ref()) - return; - - // ok, f_current is a wrapper - v_current->flags |= SymValFunc::flagWrapsAnotherF; - if (G.is_verbosity(2)) { - std::cerr << function_name << " -> " << f_called->name() << std::endl; - } -} - -static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { - unsigned int crc = td::crc16(static_cast(func_name)); - return td::make_refint((crc & 0xffff) | 0x10000); -} - -// todo rewrite function declaration parsing completely, it's weird -void parse_func_def(Lexer& lex) { - SrcLocation loc = lex.cur_location(); - open_scope(loc); - std::vector type_vars; - bool is_get_method = false; - if (lex.tok() == tok_forall) { - type_vars = parse_type_var_list(lex); - } else if (lex.tok() == tok_get) { - is_get_method = true; - lex.next(); - } - auto ret_type = parse_type(lex); - if (lex.tok() != tok_identifier) { - lex.error("function name identifier expected"); - } - std::string func_name = lex.cur_str_std_string(); - int func_sym_idx = lex.cur_sym_idx(); - lex.next(); - FormalArgList arg_list = parse_formal_args(lex); - bool marked_as_pure = false; - if (lex.tok() == tok_impure) { - static bool warning_shown = false; - if (!warning_shown) { - lex.cur_location().show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure"); - warning_shown = true; - } - lex.next(); - } else if (lex.tok() == tok_pure) { - marked_as_pure = true; - lex.next(); - } - int flags_inline = 0; - if (lex.tok() == tok_inline) { - flags_inline = SymValFunc::flagInline; - lex.next(); - } else if (lex.tok() == tok_inlineref) { - flags_inline = SymValFunc::flagInlineRef; - lex.next(); - } - td::RefInt256 method_id; - if (lex.tok() == tok_method_id) { - if (is_get_method) { - lex.error("both `get` and `method_id` are not allowed"); - } - lex.next(); - if (lex.tok() == tok_oppar) { // method_id(N) - lex.next(); - method_id = td::string_to_int256(lex.cur_str_std_string()); - lex.expect(tok_int_const, "number"); - if (method_id.is_null()) { - lex.error_at("invalid integer constant `", "`"); - } - lex.expect(tok_clpar, "')'"); - } else { - static bool warning_shown = false; - if (!warning_shown) { - lex.cur_location().show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`"); - warning_shown = true; - } - method_id = calculate_method_id_by_func_name(func_name); - } - } - if (is_get_method) { - tolk_assert(method_id.is_null()); - method_id = calculate_method_id_by_func_name(func_name); - for (const SymDef* other : G.glob_get_methods) { - if (!td::cmp(dynamic_cast(other->value)->method_id, method_id)) { - lex.error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name + "` produce the same hash. Consider renaming one of these functions."); - } - } - } - TypeExpr* func_type = TypeExpr::new_map(extract_total_arg_type(arg_list), ret_type); - func_type = compute_type_closure(func_type, type_vars); - if (lex.tok() == tok_builtin) { - const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); - const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; - if (!func_val || !func_val->is_builtin()) { - lex.error("`builtin` used for non-builtin function"); - } -#ifdef TOLK_DEBUG - // in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility - if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != marked_as_pure) { - lex.error("declaration for `builtin` function doesn't match an actual one"); - } -#endif - lex.next(); - lex.expect(tok_semicolon, "';'"); - close_scope(lex.cur_location()); - return; - } - if (lex.tok() != tok_semicolon && lex.tok() != tok_opbrace && lex.tok() != tok_asm) { - lex.expect(tok_opbrace, "function body block"); - } - if (G.is_verbosity(1)) { - std::cerr << "function " << func_name << " : " << func_type << std::endl; - } - SymDef* func_sym = define_global_symbol(func_sym_idx, 0, loc); - tolk_assert(func_sym); - SymValFunc* func_sym_val = dynamic_cast(func_sym->value); - if (func_sym->value) { - if (func_sym->value->kind != SymValKind::_Func || !func_sym_val) { - lex.error("was not defined as a function before"); - } - try { - unify(func_sym_val->sym_type, func_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous type of function " << func_name << " : " << func_sym_val->sym_type - << " cannot be unified with new type " << func_type << ": " << ue; - lex.error(os.str()); - } - } - if (lex.tok() == tok_semicolon) { - make_new_glob_func(func_sym, func_type, marked_as_pure); - lex.next(); - } else if (lex.tok() == tok_opbrace) { - if (dynamic_cast(func_sym_val)) { - lex.error("function `" + func_name + "` has been already defined as an assembler built-in"); - } - SymValCodeFunc* func_sym_code; - if (func_sym_val) { - func_sym_code = dynamic_cast(func_sym_val); - if (!func_sym_code) { - lex.error("function `" + func_name + "` has been already defined in an yet-unknown way"); - } - } else { - func_sym_code = make_new_glob_func(func_sym, func_type, marked_as_pure); - } - if (func_sym_code->code) { - lex.error("redefinition of function `"s + func_name + "`"); - } - if (marked_as_pure && ret_type->get_width() == 0) { - lex.error("a pure function should return something, otherwise it will be optimized out anyway"); - } - CodeBlob* code = parse_func_body(lex, arg_list, ret_type, marked_as_pure); - code->name = func_name; - code->loc = loc; - // code->print(std::cerr); // !!!DEBUG!!! - func_sym_code->code = code; - detect_if_function_just_wraps_another(func_sym_code, method_id); - } else { - SrcLocation asm_location = lex.cur_location(); - SymValAsmFunc* asm_func = parse_asm_func_body(lex, func_type, arg_list, ret_type, marked_as_pure); -#ifdef TOLK_DEBUG - asm_func->name = func_name; -#endif - if (func_sym_val) { - if (dynamic_cast(func_sym_val)) { - throw ParseError(asm_location, "function `" + func_name + "` was already declared as an ordinary function"); - } - SymValAsmFunc* asm_func_old = dynamic_cast(func_sym_val); - if (asm_func_old) { - if (asm_func->crc != asm_func_old->crc) { - throw ParseError(asm_location, "redefinition of built-in assembler function `" + func_name + "`"); - } - } else { - throw ParseError(asm_location, "redefinition of previously (somehow) defined function `" + func_name + "`"); - } - } - func_sym->value = asm_func; - } - if (method_id.not_null()) { - auto val = dynamic_cast(func_sym->value); - if (!val) { - lex.error("cannot set method id for unknown function `" + func_name + "`"); - } - if (val->method_id.is_null()) { - val->method_id = std::move(method_id); - } else if (td::cmp(val->method_id, method_id) != 0) { - lex.error("integer method identifier for `" + func_name + "` changed from " + - val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string()); - } - } - if (flags_inline) { - auto val = dynamic_cast(func_sym->value); - if (!val) { - lex.error("cannot set unknown function `" + func_name + "` as an inline"); - } - if (!val->is_inline() && !val->is_inline_ref()) { - val->flags |= flags_inline; - } else if ((val->flags & (SymValFunc::flagInline | SymValFunc::flagInlineRef)) != flags_inline) { - lex.error("inline mode for `" + func_name + "` changed with respect to a previous declaration"); - } - } - if (is_get_method) { - auto val = dynamic_cast(func_sym->value); - if (!val) { - lex.error("cannot set unknown function `" + func_name + "` as a get method"); - } - val->flags |= SymValFunc::flagGetMethod; - G.glob_get_methods.push_back(func_sym); - } - if (G.is_verbosity(1)) { - std::cerr << "new type of function " << func_name << " : " << func_type << std::endl; - } - close_scope(lex.cur_location()); -} - -void parse_pragma(Lexer& lex) { - SrcLocation loc = lex.cur_location(); - lex.next_special(tok_pragma_name, "pragma name"); - std::string_view pragma_name = lex.cur_str(); - if (pragma_name == "version") { - lex.next(); - TokenType cmp_tok = lex.tok(); - char op = '='; bool eq = false; - if (cmp_tok == tok_gt || cmp_tok == tok_geq) { - op = '>'; - eq = cmp_tok == tok_geq; - } else if (cmp_tok == tok_lt || cmp_tok == tok_leq) { - op = '<'; - eq = cmp_tok == tok_leq; - } else if (cmp_tok == tok_eq) { - op = '='; - } else if (cmp_tok == tok_bitwise_xor) { - op = '^'; - } else { - lex.error("invalid comparison operator"); - } - lex.next_special(tok_semver, "semver"); - std::string_view pragma_value = lex.cur_str(); - int sem_ver[3] = {0, 0, 0}; - char segs = 1; - auto stoi = [&](std::string_view s) { - auto R = td::to_integer_safe(static_cast(s)); - if (R.is_error()) { - lex.error("invalid semver format"); - } - return R.move_as_ok(); - }; - std::istringstream iss_value(static_cast(pragma_value)); - for (int idx = 0; idx < 3; idx++) { - std::string s{"0"}; - std::getline(iss_value, s, '.'); - sem_ver[idx] = stoi(s); - } - // End reading semver from source code - int tolk_ver[3] = {0, 0, 0}; - std::istringstream iss(tolk_version); - for (int idx = 0; idx < 3; idx++) { - std::string s; - std::getline(iss, s, '.'); - tolk_ver[idx] = stoi(s); - } - // End parsing embedded semver - bool match = true; - switch (op) { - case '=': - if ((tolk_ver[0] != sem_ver[0]) || - (tolk_ver[1] != sem_ver[1]) || - (tolk_ver[2] != sem_ver[2])) { - match = false; - } - break; - case '>': - if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || - ((tolk_ver[0] < sem_ver[0])) ) { - match = false; - } - break; - case '<': - if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || - ((tolk_ver[0] > sem_ver[0])) ) { - match = false; - } - break; - case '^': - if ( ((segs == 3) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) - || ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) - || ((segs == 1) && ((tolk_ver[0] < sem_ver[0]))) ) { - match = false; - } - break; - default: - __builtin_unreachable(); - } - if (!match) { - throw ParseError(loc, std::string("Tolk version ") + tolk_version + " does not satisfy this condition"); - } - } else if (pragma_name == G.pragma_allow_post_modification.name()) { - G.pragma_allow_post_modification.enable(loc); - } else if (pragma_name == G.pragma_compute_asm_ltr.name()) { - G.pragma_compute_asm_ltr.enable(loc); - } else if (pragma_name == G.pragma_remove_unused_functions.name()) { - G.pragma_remove_unused_functions.enable(loc); - } else { - lex.error("unknown pragma name"); - } - lex.next(); - lex.expect(tok_semicolon, "';'"); -} - -void parse_include(Lexer& lex, SrcFile* parent_file) { - SrcLocation loc = lex.cur_location(); - lex.expect(tok_include, "#include"); - if (lex.tok() != tok_string_const) { - lex.expect(tok_string_const, "source file name"); - } - std::string rel_filename = lex.cur_str_std_string(); - if (rel_filename.empty()) { - lex.error("imported file name is an empty string"); - } - if (size_t rc = parent_file->rel_filename.rfind('/'); rc != std::string::npos) { - rel_filename = parent_file->rel_filename.substr(0, rc + 1) + rel_filename; - } - lex.next(); - lex.expect(tok_semicolon, "';'"); - - td::Result locate_res = locate_source_file(rel_filename); - if (locate_res.is_error()) { - throw ParseError(loc, "Failed to import: " + locate_res.move_as_error().message().str()); - } - - SrcFile* imported_file = locate_res.move_as_ok(); - parent_file->imports.emplace_back(SrcFile::ImportStatement{imported_file}); - if (!imported_file->was_parsed) { - parse_source_file(imported_file); - } -} - -// this function either throws (on any error) or returns nothing meaning success (filling global variables) -void parse_source_file(SrcFile* file) { - if (!file->is_stdlib_file()) { - G.generated_from += file->rel_filename; - G.generated_from += ", "; - } - file->was_parsed = true; - - Lexer lex(file); - while (!lex.is_eof()) { - if (lex.tok() == tok_pragma) { - parse_pragma(lex); - } else if (lex.tok() == tok_include) { - parse_include(lex, file); - } else if (lex.tok() == tok_global) { - parse_global_var_decls(lex); - } else if (lex.tok() == tok_const) { - parse_const_decls(lex); - } else { - parse_func_def(lex); - } - } -} - -td::Result locate_source_file(const std::string& rel_filename) { - td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); - if (path.is_error()) { - return path.move_as_error(); - } - - std::string abs_filename = path.move_as_ok(); - if (SrcFile* file = G.all_src_files.find_file(abs_filename)) { - return file; // file was already parsed (imported from somewhere else) - } - - td::Result text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str()); - if (text.is_error()) { - return text.move_as_error(); - } - - return G.all_src_files.register_file(rel_filename, abs_filename, text.move_as_ok()); -} - -} // namespace tolk diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index 552abd11..9463dbbd 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -22,12 +22,6 @@ namespace tolk { -Symbol::Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) { - subclass = this->str[0] == '.' ? SymbolSubclass::dot_identifier - : this->str[0] == '~' ? SymbolSubclass::tilde_identifier - : SymbolSubclass::undef; -} - std::string Symbol::unknown_symbol_name(sym_idx_t i) { if (!i) { return "_"; @@ -78,7 +72,7 @@ void open_scope(SrcLocation loc) { G.scope_opened_at.push_back(loc); } -void close_scope(SrcLocation loc) { +void close_scope() { if (!G.scope_level) { throw Fatal{"cannot close the outer scope"}; } diff --git a/tolk/symtable.h b/tolk/symtable.h index 0566122a..243437d3 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -36,18 +36,11 @@ struct SymValBase { }; -enum class SymbolSubclass { - undef = 0, - dot_identifier = 1, // begins with . (a const method) - tilde_identifier = 2 // begins with ~ (a non-const method) -}; - struct Symbol { std::string str; sym_idx_t idx; - SymbolSubclass subclass; - Symbol(std::string str, sym_idx_t idx); + Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {} static std::string unknown_symbol_name(sym_idx_t i); }; @@ -64,10 +57,10 @@ private: public: static constexpr sym_idx_t not_found = 0; - sym_idx_t lookup(const std::string_view& str, int mode = 0) { + sym_idx_t lookup(std::string_view str, int mode = 0) { return gen_lookup(str, mode); } - sym_idx_t lookup_add(const std::string& str) { + sym_idx_t lookup_add(std::string_view str) { return gen_lookup(str, 1); } Symbol* operator[](sym_idx_t i) const { @@ -76,9 +69,6 @@ public: std::string get_name(sym_idx_t i) const { return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i); } - SymbolSubclass get_subclass(sym_idx_t i) const { - return sym[i] ? sym[i]->subclass : SymbolSubclass::undef; - } }; struct SymTableOverflow { @@ -104,7 +94,7 @@ struct SymDef { void open_scope(SrcLocation loc); -void close_scope(SrcLocation loc); +void close_scope(); SymDef* lookup_symbol(sym_idx_t idx); SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, SrcLocation loc = {}); diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 066fecbd..044d62f0 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -27,7 +27,8 @@ #include "compiler-state.h" #include "lexer.h" #include -#include "git.h" +#include "ast-from-tokens.h" +#include "ast-to-legacy.h" #include #include "td/utils/port/path.h" #include @@ -269,13 +270,13 @@ int tolk_proceed(const std::string &entrypoint_file_name) { if (locate_res.is_error()) { throw Fatal("Failed to locate stdlib: " + locate_res.error().message().str()); } - parse_source_file(locate_res.move_as_ok()); + process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok())); } td::Result locate_res = locate_source_file(entrypoint_file_name); if (locate_res.is_error()) { throw Fatal("Failed to locate " + entrypoint_file_name + ": " + locate_res.error().message().str()); } - parse_source_file(locate_res.move_as_ok()); + process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok())); // todo #ifdef TOLK_PROFILING + comment // lexer_measure_performance(all_src_files.get_all_files()); @@ -293,6 +294,10 @@ int tolk_proceed(const std::string &entrypoint_file_name) { unif_err.print_message(std::cerr); std::cerr << std::endl; return 2; + } catch (UnexpectedASTNodeType& error) { + std::cerr << "fatal: " << error.what() << std::endl; + std::cerr << "It's a compiler bug, please report to developers" << std::endl; + return 2; } } diff --git a/tolk/tolk.h b/tolk/tolk.h index 15aeba25..b62c6a58 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -17,7 +17,7 @@ #pragma once #include "src-file.h" -#include "lexer.h" +#include "type-expr.h" #include "symtable.h" #include "crypto/common/refint.h" #include "td/utils/Status.h" @@ -38,136 +38,6 @@ namespace tolk { * */ -struct TypeExpr { - enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll } constr; - enum AtomicType { - _Int = tok_int, - _Cell = tok_cell, - _Slice = tok_slice, - _Builder = tok_builder, - _Cont = tok_cont, - _Tuple = tok_tuple, - _Type = tok_type - }; - int value; - int minw, maxw; - static constexpr int w_inf = 1023; - std::vector args; - bool was_forall_var = false; - TypeExpr(te_type _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { - } - TypeExpr(te_type _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { - } - TypeExpr(te_type _constr, std::vector list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(te_type _constr, std::initializer_list list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(te_type _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { - compute_width(); - } - TypeExpr(te_type _constr, TypeExpr* elem0, std::vector list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - TypeExpr(te_type _constr, TypeExpr* elem0, std::initializer_list list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - bool is_atomic() const { - return constr == te_Atomic; - } - bool is_atomic(int v) const { - return constr == te_Atomic && value == v; - } - bool is_int() const { - return is_atomic(_Int); - } - bool is_var() const { - return constr == te_Var; - } - bool is_map() const { - return constr == te_Map; - } - bool is_tuple() const { - return constr == te_Tuple; - } - bool has_fixed_width() const { - return minw == maxw; - } - int get_width() const { - return has_fixed_width() ? minw : -1; - } - void compute_width(); - bool recompute_width(); - void show_width(std::ostream& os); - std::ostream& print(std::ostream& os, int prio = 0) const; - void replace_with(TypeExpr* te2); - int extract_components(std::vector& comp_list); - bool equals_to(const TypeExpr* rhs) const; - bool has_unknown_inside() const; - static int holes, type_vars; - static TypeExpr* new_hole() { - return new TypeExpr{te_Unknown, ++holes}; - } - static TypeExpr* new_hole(int width) { - return new TypeExpr{te_Unknown, ++holes, width}; - } - static TypeExpr* new_unit() { - return new TypeExpr{te_Tensor, 0, 0}; - } - static TypeExpr* new_atomic(int value) { - return new TypeExpr{te_Atomic, value, 1}; - } - static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); - static TypeExpr* new_func() { - return new_map(new_hole(), new_hole()); - } - static TypeExpr* new_tensor(std::vector list, bool red = true) { - return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(std::initializer_list list) { - return new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { - return new_tensor({te1, te2}); - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { - return new_tensor({te1, te2, te3}); - } - static TypeExpr* new_tuple(TypeExpr* arg0) { - return new TypeExpr{te_Tuple, arg0}; - } - static TypeExpr* new_tuple(std::vector list, bool red = false) { - return new_tuple(new_tensor(std::move(list), red)); - } - static TypeExpr* new_tuple(std::initializer_list list) { - return new_tuple(new_tensor(std::move(list))); - } - static TypeExpr* new_var() { - return new TypeExpr{te_Var, --type_vars, 1}; - } - static TypeExpr* new_var(int idx) { - return new TypeExpr{te_Var, idx, 1}; - } - static TypeExpr* new_forall(std::vector list, TypeExpr* body) { - return new TypeExpr{te_ForAll, body, std::move(list)}; - } - static TypeExpr* new_forall(std::initializer_list list, TypeExpr* body) { - return new TypeExpr{te_ForAll, body, std::move(list)}; - } - static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); - static std::vector remove_forall(TypeExpr*& te); - static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); -}; - -std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); - struct UnifyError : std::exception { TypeExpr* te1; TypeExpr* te2; @@ -197,14 +67,13 @@ using const_idx_t = int; struct TmpVar { TypeExpr* v_type; var_idx_t idx; - enum { _In = 1, _Named = 2, _Tmp = 4, _UniqueName = 0x20 }; - int cls; + bool is_tmp_unnamed; sym_idx_t name; int coord; SrcLocation where; std::vector> on_modification; - TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc); + TmpVar(var_idx_t _idx, bool _is_tmp_unnamed, TypeExpr* _type, SymDef* sym, SrcLocation loc); void show(std::ostream& os, int omit_idx = 0) const; void dump(std::ostream& os) const; void set_location(SrcLocation loc); @@ -586,9 +455,9 @@ struct CodeBlob { return res; } bool import_params(FormalArgList arg_list); - var_idx_t create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation loc); + var_idx_t create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation loc); var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { - return create_var(TmpVar::_Tmp, var_type, nullptr, loc); + return create_var(true, var_type, nullptr, loc); } int split_vars(bool strict = false); bool compute_used_code_vars(); @@ -631,7 +500,6 @@ struct CodeBlob { struct SymVal : SymValBase { TypeExpr* sym_type; - bool auto_apply{false}; SymVal(SymValKind kind, int idx, TypeExpr* sym_type = nullptr) : SymValBase(kind, idx), sym_type(sym_type) { } @@ -702,16 +570,6 @@ struct SymValCodeFunc : SymValFunc { bool does_need_codegen() const; }; -struct SymValType : SymValBase { - TypeExpr* sym_type; - SymValType(SymValKind kind, int idx, TypeExpr* _stype = nullptr) : SymValBase(kind, idx), sym_type(_stype) { - } - ~SymValType() override = default; - TypeExpr* get_type() const { - return sym_type; - } -}; - struct SymValGlobVar : SymValBase { TypeExpr* sym_type; int out_idx{0}; @@ -762,7 +620,6 @@ struct SymValConst : SymValBase { // defined in parse-tolk.cpp td::Result locate_source_file(const std::string& rel_filename); -void parse_source_file(SrcFile* file); /* @@ -792,7 +649,7 @@ struct Expr { }; ExprCls cls; int val{0}; - enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32, _IsInsideParenthesis = 64 }; + enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32 }; int flags{0}; SrcLocation here; td::RefInt256 intval; @@ -834,18 +691,23 @@ struct Expr { bool is_type() const { return flags & _IsType; } - bool is_inside_parenthesis() const { - return flags & _IsInsideParenthesis; - } bool is_type_apply() const { return cls == _TypeApply; } bool is_mktuple() const { return cls == _MkTuple; } - void chk_rvalue(const Lexer& lex) const; // todo here and below: strange to pass Lexer - void chk_lvalue(const Lexer& lex) const; - bool deduce_type(const Lexer& lex); + void chk_rvalue() const { + if (!is_rvalue()) { + throw ParseError(here, "rvalue expected"); + } + } + void chk_lvalue() const { + if (!is_lvalue()) { + throw ParseError(here, "lvalue expected"); + } + } + bool deduce_type(); void set_location(SrcLocation loc) { here = loc; } diff --git a/tolk/type-expr.h b/tolk/type-expr.h new file mode 100644 index 00000000..4893df35 --- /dev/null +++ b/tolk/type-expr.h @@ -0,0 +1,140 @@ +#pragma once + +#include +#include +#include "lexer.h" + +namespace tolk { + +struct TypeExpr { + enum Kind { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll }; + // todo not _ + enum AtomicType { + _Int = tok_int, + _Cell = tok_cell, + _Slice = tok_slice, + _Builder = tok_builder, + _Cont = tok_cont, + _Tuple = tok_tuple, + }; + Kind constr; + int value; + int minw, maxw; + static constexpr int w_inf = 1023; + std::vector args; + bool was_forall_var = false; + TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { + } + TypeExpr(Kind _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { + } + TypeExpr(Kind _constr, std::vector list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(Kind _constr, std::initializer_list list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(Kind _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { + compute_width(); + } + TypeExpr(Kind _constr, TypeExpr* elem0, std::vector list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + TypeExpr(Kind _constr, TypeExpr* elem0, std::initializer_list list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + bool is_atomic() const { + return constr == te_Atomic; + } + bool is_atomic(int v) const { + return constr == te_Atomic && value == v; + } + bool is_int() const { + return is_atomic(_Int); + } + bool is_var() const { + return constr == te_Var; + } + bool is_map() const { + return constr == te_Map; + } + bool is_tuple() const { + return constr == te_Tuple; + } + bool has_fixed_width() const { + return minw == maxw; + } + int get_width() const { + return has_fixed_width() ? minw : -1; + } + void compute_width(); + bool recompute_width(); + void show_width(std::ostream& os); + std::ostream& print(std::ostream& os, int prio = 0) const; + void replace_with(TypeExpr* te2); + int extract_components(std::vector& comp_list); + bool equals_to(const TypeExpr* rhs) const; + bool has_unknown_inside() const; + static int holes, type_vars; + static TypeExpr* new_hole() { + return new TypeExpr{te_Unknown, ++holes}; + } + static TypeExpr* new_hole(int width) { + return new TypeExpr{te_Unknown, ++holes, width}; + } + static TypeExpr* new_unit() { + return new TypeExpr{te_Tensor, 0, 0}; + } + static TypeExpr* new_atomic(int value) { + return new TypeExpr{te_Atomic, value, 1}; + } + static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); + static TypeExpr* new_func() { + return new_map(new_hole(), new_hole()); + } + static TypeExpr* new_tensor(std::vector list, bool red = true) { + return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(std::initializer_list list) { + return new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { + return new_tensor({te1, te2}); + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { + return new_tensor({te1, te2, te3}); + } + static TypeExpr* new_tuple(TypeExpr* arg0) { + return new TypeExpr{te_Tuple, arg0}; + } + static TypeExpr* new_tuple(std::vector list, bool red = false) { + return new_tuple(new_tensor(std::move(list), red)); + } + static TypeExpr* new_tuple(std::initializer_list list) { + return new_tuple(new_tensor(list)); + } + static TypeExpr* new_var() { + return new TypeExpr{te_Var, --type_vars, 1}; + } + static TypeExpr* new_var(int idx) { + return new TypeExpr{te_Var, idx, 1}; + } + static TypeExpr* new_forall(std::vector list, TypeExpr* body) { + return new TypeExpr{te_ForAll, body, std::move(list)}; + } + static TypeExpr* new_forall(std::initializer_list list, TypeExpr* body) { + return new TypeExpr{te_ForAll, body, std::move(list)}; + } + static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); + static std::vector remove_forall(TypeExpr*& te); + static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); +}; + +std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); + +} // namespace tolk diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp index 04de323d..cc2073ed 100644 --- a/tolk/unify-types.cpp +++ b/tolk/unify-types.cpp @@ -268,8 +268,6 @@ std::ostream& TypeExpr::print(std::ostream& os, int lex_level) const { return os << "cont"; case _Tuple: return os << "tuple"; - case _Type: - return os << "type"; default: return os << "atomic-type-" << value; }