1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

[Tolk] Implement AST: intermediate representation of tolk files

Now, the whole .tolk file can be loaded as AST tree and
then converted to Expr/Op.
This gives a great ability to implement AST transformations.
In the future, more and more code analysis will be moved out of legacy to AST-level.
This commit is contained in:
tolk-vm 2024-10-31 11:03:33 +04:00
parent 6c30e5a7eb
commit 80001d1756
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
23 changed files with 3798 additions and 2233 deletions

View file

@ -5,8 +5,10 @@ set(TOLK_SOURCE
lexer.cpp
symtable.cpp
compiler-state.cpp
ast.cpp
ast-from-tokens.cpp
ast-to-legacy.cpp
unify-types.cpp
parse-tolk.cpp
abscode.cpp
gen-abscode.cpp
analyzer.cpp

View file

@ -25,8 +25,8 @@ namespace tolk {
*
*/
TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc)
: v_type(_type), idx(_idx), cls(_cls), coord(0), where(loc) {
TmpVar::TmpVar(var_idx_t _idx, bool _is_tmp_unnamed, TypeExpr* _type, SymDef* sym, SrcLocation loc)
: v_type(_type), idx(_idx), is_tmp_unnamed(_is_tmp_unnamed), coord(0), where(loc) {
if (sym) {
name = sym->sym_idx;
sym->value->idx = _idx;
@ -59,9 +59,9 @@ void TmpVar::dump(std::ostream& os) const {
}
void TmpVar::show(std::ostream& os, int omit_idx) const {
if (cls & _Named) {
if (!is_tmp_unnamed) {
os << G.symbols.get_name(name);
if (omit_idx && (omit_idx >= 2 || (cls & _UniqueName))) {
if (omit_idx >= 2) {
return;
}
}
@ -474,8 +474,8 @@ void CodeBlob::print(std::ostream& os, int flags) const {
os << "-------- END ---------\n\n";
}
var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation location) {
vars.emplace_back(var_cnt, cls, var_type, sym, location);
var_idx_t CodeBlob::create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation location) {
vars.emplace_back(var_cnt, is_tmp_unnamed, var_type, sym, location);
if (sym) {
sym->value->idx = var_cnt;
}
@ -492,7 +492,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) {
SymDef* arg_sym;
SrcLocation arg_loc;
std::tie(arg_type, arg_sym, arg_loc) = par;
list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, arg_loc));
list.push_back(create_var(arg_sym == nullptr, arg_type, arg_sym, arg_loc));
}
emplace_back(loc, Op::_Import, list);
in_var_cnt = var_cnt;

View file

@ -46,7 +46,7 @@ int CodeBlob::split_vars(bool strict) {
if (k != 1) {
var.coord = ~((n << 8) + k);
for (int i = 0; i < k; i++) {
auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where);
auto v = create_var(vars[j].is_tmp_unnamed, comp_types[i], 0, vars[j].where);
tolk_assert(v == n + i);
tolk_assert(vars[v].idx == v);
vars[v].name = vars[j].name;

877
tolk/ast-from-tokens.cpp Normal file
View file

@ -0,0 +1,877 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ast-from-tokens.h"
#include "ast.h"
#include "platform-utils.h"
#include "type-expr.h"
/*
* Here we construct AST for a tolk file.
* While constructing, no global state is modified.
* Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on.
* There was no way to perform any more or less semantic analysis.
* Implementing AST gives a giant advance for future modifications and stability.
*/
namespace tolk {
// given a token, determine whether it's <, or >, or similar
static bool is_comparison_binary_op(TokenType tok) {
return tok == tok_lt || tok == tok_gt || tok == tok_leq || tok == tok_geq || tok == tok_eq || tok == tok_neq || tok == tok_spaceship;
}
// same as above, but to detect bitwise operators: & | ^
// (in Tolk, they are used as logical ones due to absence of a boolean type and && || operators)
static bool is_bitwise_binary_op(TokenType tok) {
return tok == tok_bitwise_and || tok == tok_bitwise_or || tok == tok_bitwise_xor;
}
// same as above, but to detect addition/subtraction
static bool is_add_or_sub_binary_op(TokenType tok) {
return tok == tok_plus || tok == tok_minus;
}
// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected)
// it would better be a warning, but we decided to make it a strict error
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_lower_precedence(SrcLocation loc, std::string_view op_lower, std::string_view op_higher) {
std::string name_lower = static_cast<std::string>(op_lower);
std::string name_higher = static_cast<std::string>(op_higher);
throw ParseError(loc, name_lower + " has lower precedence than " + name_higher +
", probably this code won't work as you expected. "
"Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error.");
}
// fire an error for a case "arg1 & arg2 | arg3"
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_mix_bitwise_and_or(SrcLocation loc, std::string_view op1, std::string_view op2) {
std::string name1 = static_cast<std::string>(op1);
std::string name2 = static_cast<std::string>(op2);
throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis"
", probably this code won't work as you expected. "
"Use parenthesis to emphasize operator precedence.");
}
// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence
// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected
// the only way to suppress this error for the programmer is to use parenthesis
// (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator},
// that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis)
static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyV lhs, AnyV rhs) {
// handle "flags & 0xFF != 0" (rhs = "0xFF != 0")
if (rhs->type == ast_binary_operator && is_comparison_binary_op(rhs->as<ast_binary_operator>()->tok)) {
fire_error_lower_precedence(loc, operator_name, rhs->as<ast_binary_operator>()->operator_name);
}
// handle "0 != flags & 0xFF" (lhs = "0 != flags")
if (lhs->type == ast_binary_operator && is_comparison_binary_op(lhs->as<ast_binary_operator>()->tok)) {
fire_error_lower_precedence(loc, operator_name, lhs->as<ast_binary_operator>()->operator_name);
}
// handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2")
if (lhs->type == ast_binary_operator && is_bitwise_binary_op(lhs->as<ast_binary_operator>()->tok) && lhs->as<ast_binary_operator>()->operator_name != operator_name) {
fire_error_mix_bitwise_and_or(loc, lhs->as<ast_binary_operator>()->operator_name, operator_name);
}
}
// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected)
static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyV rhs) {
if (rhs->type == ast_binary_operator && is_add_or_sub_binary_op(rhs->as<ast_binary_operator>()->tok)) {
fire_error_lower_precedence(loc, bitshift_operator_name, rhs->as<ast_binary_operator>()->operator_name);
}
}
/*
*
* PARSE SOURCE
*
*/
// TE ::= TA | TA -> TE
// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ]
TypeExpr* parse_type(Lexer& lex, V<ast_forall_list> forall_list);
TypeExpr* parse_type1(Lexer& lex, V<ast_forall_list> forall_list) {
switch (lex.tok()) {
case tok_int:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Int);
case tok_cell:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Cell);
case tok_slice:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Slice);
case tok_builder:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Builder);
case tok_cont:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Cont);
case tok_tuple:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Tuple);
case tok_var:
case tok_underscore:
lex.next();
return TypeExpr::new_hole();
case tok_identifier: {
if (int idx = forall_list ? forall_list->lookup_idx(lex.cur_str()) : -1; idx != -1) {
lex.next();
return forall_list->get_item(idx)->created_type;
}
lex.error("Is not a type identifier");
}
default:
break;
}
TokenType c;
if (lex.tok() == tok_opbracket) {
lex.next();
c = tok_clbracket;
} else {
lex.expect(tok_oppar, "<type>");
c = tok_clpar;
}
if (lex.tok() == c) {
lex.next();
return c == tok_clpar ? TypeExpr::new_unit() : TypeExpr::new_tuple({});
}
auto t1 = parse_type(lex, forall_list);
if (lex.tok() == tok_clpar) {
lex.expect(c, c == tok_clpar ? "')'" : "']'");
return t1;
}
std::vector<TypeExpr*> tlist{1, t1};
while (lex.tok() == tok_comma) {
lex.next();
tlist.push_back(parse_type(lex, forall_list));
}
lex.expect(c, c == tok_clpar ? "')'" : "']'");
return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist));
}
TypeExpr* parse_type(Lexer& lex, V<ast_forall_list> forall_list) {
TypeExpr* res = parse_type1(lex, forall_list);
if (lex.tok() == tok_mapsto) {
lex.next();
TypeExpr* to = parse_type(lex, forall_list);
return TypeExpr::new_map(res, to);
}
return res;
}
AnyV parse_argument(Lexer& lex, V<ast_forall_list> forall_list) {
TypeExpr* arg_type = nullptr;
SrcLocation loc = lex.cur_location();
if (lex.tok() == tok_underscore) {
lex.next();
if (lex.tok() == tok_comma || lex.tok() == tok_clpar) {
return createV<ast_argument>(loc, "", TypeExpr::new_hole());
}
arg_type = TypeExpr::new_hole();
loc = lex.cur_location();
} else if (lex.tok() != tok_identifier) { // int, cell, [X], etc.
arg_type = parse_type(lex, forall_list);
} else if (lex.tok() == tok_identifier) {
if (forall_list && forall_list->lookup_idx(lex.cur_str()) != -1) {
arg_type = parse_type(lex, forall_list);
} else {
arg_type = TypeExpr::new_hole();
}
} else {
lex.error("Is not a type identifier");
}
if (lex.tok() == tok_underscore || lex.tok() == tok_comma || lex.tok() == tok_clpar) {
if (lex.tok() == tok_underscore) {
loc = lex.cur_location();
lex.next();
}
return createV<ast_argument>(loc, "", arg_type);
}
lex.check(tok_identifier, "parameter name");
loc = lex.cur_location();
std::string_view arg_name = lex.cur_str();
lex.next();
return createV<ast_argument>(loc, arg_name, arg_type);
}
AnyV parse_global_var_declaration(Lexer& lex) {
TypeExpr* declared_type = nullptr;
SrcLocation loc = lex.cur_location();
if (lex.tok() == tok_underscore) {
lex.next();
declared_type = TypeExpr::new_hole();
loc = lex.cur_location();
} else if (lex.tok() != tok_identifier) {
declared_type = parse_type(lex, nullptr);
}
lex.check(tok_identifier, "global variable name");
std::string_view var_name = lex.cur_str();
lex.next();
return createV<ast_global_var_declaration>(loc, var_name, declared_type);
}
AnyV parse_expr(Lexer& lex);
AnyV parse_constant_declaration(Lexer& lex) {
TypeExpr *declared_type = nullptr;
if (lex.tok() == tok_int) {
declared_type = TypeExpr::new_atomic(TypeExpr::_Int);
lex.next();
} else if (lex.tok() == tok_slice) {
declared_type = TypeExpr::new_atomic(TypeExpr::_Slice);
lex.next();
}
lex.check(tok_identifier, "constant name");
SrcLocation loc = lex.cur_location();
std::string_view const_name = lex.cur_str();
lex.next();
lex.expect(tok_assign, "'='");
AnyV init_value = parse_expr(lex);
return createV<ast_constant_declaration>(loc, const_name, declared_type, init_value);
}
AnyV parse_argument_list(Lexer& lex, V<ast_forall_list> forall_list) {
SrcLocation loc = lex.cur_location();
std::vector<AnyV> args;
lex.expect(tok_oppar, "argument list");
if (lex.tok() != tok_clpar) {
args.push_back(parse_argument(lex, forall_list));
while (lex.tok() == tok_comma) {
lex.next();
args.push_back(parse_argument(lex, forall_list));
}
}
lex.expect(tok_clpar, "')'");
return createV<ast_argument_list>(loc, std::move(args));
}
AnyV parse_constant_declaration_list(Lexer& lex) {
std::vector<AnyV> consts;
SrcLocation loc = lex.cur_location();
lex.expect(tok_const, "'const'");
while (true) {
consts.push_back(parse_constant_declaration(lex));
if (lex.tok() != tok_comma) {
break;
}
lex.expect(tok_comma, "','");
}
lex.expect(tok_semicolon, "';'");
return createV<ast_constant_declaration_list>(loc, std::move(consts));
}
AnyV parse_global_var_declaration_list(Lexer& lex) {
std::vector<AnyV> globals;
SrcLocation loc = lex.cur_location();
lex.expect(tok_global, "'global'");
while (true) {
globals.push_back(parse_global_var_declaration(lex));
if (lex.tok() != tok_comma) {
break;
}
lex.expect(tok_comma, "','");
}
lex.expect(tok_semicolon, "';'");
return createV<ast_global_var_declaration_list>(loc, std::move(globals));
}
// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _
AnyV parse_expr100(Lexer& lex) {
SrcLocation loc = lex.cur_location();
if (lex.tok() == tok_oppar) {
lex.next();
if (lex.tok() == tok_clpar) {
lex.next();
return createV<ast_tensor>(loc, {});
}
AnyV res = parse_expr(lex);
if (lex.tok() == tok_clpar) {
lex.next();
return createV<ast_parenthesized_expr>(loc, res);
}
std::vector<AnyV> items;
bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)`
items.emplace_back(res);
while (lex.tok() == tok_comma) {
lex.next();
AnyV item = parse_expr(lex);
if (is_type_expression != (item->type == ast_type_expression)) {
lex.error("mixing type and non-type expressions inside the same tuple");
}
items.emplace_back(item);
}
lex.expect(tok_clpar, "')'");
if (is_type_expression) {
std::vector<TypeExpr*> types;
types.reserve(items.size());
for (AnyV item : items) {
types.emplace_back(item->as<ast_type_expression>()->declared_type);
}
return createV<ast_type_expression>(loc, TypeExpr::new_tensor(std::move(types)));
}
return createV<ast_tensor>(loc, std::move(items));
}
if (lex.tok() == tok_opbracket) {
lex.next();
if (lex.tok() == tok_clbracket) {
lex.next();
return createV<ast_tensor_square>(loc, {});
}
AnyV res = parse_expr(lex);
std::vector<AnyV> items;
bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)`
items.emplace_back(res);
while (lex.tok() == tok_comma) {
lex.next();
AnyV item = parse_expr(lex);
if (is_type_expression != (item->type == ast_type_expression)) {
lex.error("mixing type and non-type expressions inside the same tuple");
}
items.emplace_back(item);
}
lex.expect(tok_clbracket, "']'");
if (is_type_expression) {
std::vector<TypeExpr*> types;
types.reserve(items.size());
for (AnyV item : items) {
types.emplace_back(item->as<ast_type_expression>()->declared_type);
}
return createV<ast_type_expression>(loc, TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types))));
}
return createV<ast_tensor_square>(loc, std::move(items));
}
TokenType t = lex.tok();
if (t == tok_int_const) {
std::string_view int_val = lex.cur_str();
lex.next();
return createV<ast_int_const>(loc, int_val);
}
if (t == tok_string_const) {
std::string_view str_val = lex.cur_str();
lex.next();
char modifier = 0;
if (lex.tok() == tok_string_modifier) {
modifier = lex.cur_str()[0];
lex.next();
}
return createV<ast_string_const>(loc, str_val, modifier);
}
if (t == tok_underscore) {
lex.next();
return createV<ast_underscore>(loc);
}
if (t == tok_var) {
lex.next();
return createV<ast_type_expression>(loc, TypeExpr::new_hole());
}
if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_cont || t == tok_tuple) {
lex.next();
return createV<ast_type_expression>(loc, TypeExpr::new_atomic(t));
}
if (t == tok_true || t == tok_false) {
lex.next();
return createV<ast_bool_const>(loc, t == tok_true);
}
if (t == tok_nil) {
lex.next();
return createV<ast_nil_tuple>(loc);
}
if (t == tok_identifier) {
std::string_view str_val = lex.cur_str();
lex.next();
return createV<ast_identifier>(loc, str_val);
}
lex.expect(tok_identifier, "identifier");
return nullptr;
}
// parse E { E }
AnyV parse_expr90(Lexer& lex) {
AnyV res = parse_expr100(lex);
while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && lex.cur_str()[0] != '.' && lex.cur_str()[0] != '~')) {
if (const auto* v_type_expr = res->try_as<ast_type_expression>()) {
AnyV dest = parse_expr100(lex);
return createV<ast_variable_declaration>(v_type_expr->loc, v_type_expr->declared_type, dest);
} else {
AnyV arg = parse_expr100(lex);
return createV<ast_function_call>(res->loc, res, arg);
}
}
return res;
}
// parse E { .method E | ~method E }
AnyV parse_expr80(Lexer& lex) {
AnyV lhs = parse_expr90(lex);
while (lex.tok() == tok_identifier && (lex.cur_str()[0] == '.' || lex.cur_str()[0] == '~')) {
std::string_view method_name = lex.cur_str();
SrcLocation loc = lex.cur_location();
lex.next();
const ASTNodeBase *arg = parse_expr100(lex);
lhs = createV<ast_dot_tilde_call>(loc, method_name, lhs, arg);
}
return lhs;
}
// parse [ ~ | - | + ] E
AnyV parse_expr75(Lexer& lex) {
TokenType t = lex.tok();
if (t == tok_bitwise_not || t == tok_minus || t == tok_plus) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str();
lex.next();
AnyV rhs = parse_expr75(lex);
return createV<ast_unary_operator>(loc, operator_name, t, rhs);
} else {
return parse_expr80(lex);
}
}
// parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E }
AnyV parse_expr30(Lexer& lex) {
AnyV lhs = parse_expr75(lex);
TokenType t = lex.tok();
while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || t == tok_divC ||
t == tok_divR || t == tok_modC || t == tok_modR) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str();
lex.next();
AnyV rhs = parse_expr75(lex);
lhs = createV<ast_binary_operator>(loc, operator_name, t, lhs, rhs);
t = lex.tok();
}
return lhs;
}
// parse E { (+ | -) E }
AnyV parse_expr20(Lexer& lex) {
AnyV lhs = parse_expr30(lex);
TokenType t = lex.tok();
while (t == tok_minus || t == tok_plus) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str();
lex.next();
AnyV rhs = parse_expr30(lex);
lhs = createV<ast_binary_operator>(loc, operator_name, t, lhs, rhs);
t = lex.tok();
}
return lhs;
}
// parse E { ( << | >> | ~>> | ^>> ) E }
AnyV parse_expr17(Lexer& lex) {
AnyV lhs = parse_expr20(lex);
TokenType t = lex.tok();
while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str();
lex.next();
AnyV rhs = parse_expr20(lex);
diagnose_addition_in_bitshift(loc, operator_name, rhs);
lhs = createV<ast_binary_operator>(loc, operator_name, t, lhs, rhs);
t = lex.tok();
}
return lhs;
}
// parse E [ (== | < | > | <= | >= | != | <=> ) E ]
AnyV parse_expr15(Lexer& lex) {
AnyV lhs = parse_expr17(lex);
TokenType t = lex.tok();
if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str();
lex.next();
AnyV rhs = parse_expr17(lex);
lhs = createV<ast_binary_operator>(loc, operator_name, t, lhs, rhs);
}
return lhs;
}
// parse E { ( & | `|` | ^ ) E }
AnyV parse_expr14(Lexer& lex) {
AnyV lhs = parse_expr15(lex);
TokenType t = lex.tok();
while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str();
lex.next();
AnyV rhs = parse_expr15(lex);
diagnose_bitwise_precedence(loc, operator_name, lhs, rhs);
lhs = createV<ast_binary_operator>(loc, operator_name, t, lhs, rhs);
t = lex.tok();
}
return lhs;
}
// parse E [ ? E : E ]
AnyV parse_expr13(Lexer& lex) {
AnyV res = parse_expr14(lex);
if (lex.tok() == tok_question) {
SrcLocation loc = lex.cur_location();
lex.next();
AnyV when_true = parse_expr(lex);
lex.expect(tok_colon, "':'");
AnyV when_false = parse_expr13(lex);
return createV<ast_ternary_operator>(loc, res, when_true, when_false);
}
return res;
}
// parse LE1 (= | += | -= | ... ) E2
AnyV parse_expr10(Lexer& lex) {
AnyV lhs = parse_expr13(lex);
TokenType t = lex.tok();
if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC ||
t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC ||
t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor ||
t == tok_assign) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str();
lex.next();
AnyV rhs = parse_expr10(lex);
return createV<ast_binary_operator>(loc, operator_name, t, lhs, rhs);
}
return lhs;
}
AnyV parse_expr(Lexer& lex) {
return parse_expr10(lex);
}
AnyV parse_return_stmt(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_return, "'return'");
AnyV child = parse_expr(lex);
lex.expect(tok_semicolon, "';'");
return createV<ast_return_statement>(loc, child);
}
AnyV parse_statement(Lexer& lex);
V<ast_sequence> parse_sequence(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_opbrace, "'{'");
std::vector<AnyV> items;
while (lex.tok() != tok_clbrace) {
items.push_back(parse_statement(lex));
}
SrcLocation loc_end = lex.cur_location();
lex.expect(tok_clbrace, "'}'");
return createV<ast_sequence>(loc, loc_end, items);
}
AnyV parse_repeat_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_repeat, "'repeat'");
AnyV cond = parse_expr(lex);
V<ast_sequence> body = parse_sequence(lex);
return createV<ast_repeat_statement>(loc, cond, body);
}
AnyV parse_while_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_while, "'while'");
AnyV cond = parse_expr(lex);
V<ast_sequence> body = parse_sequence(lex);
return createV<ast_while_statement>(loc, cond, body);
}
ASTNodeBase* parse_do_until_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_do, "'do'");
V<ast_sequence> body = parse_sequence(lex);
lex.expect(tok_until, "'until'");
AnyV cond = parse_expr(lex);
return createV<ast_do_until_statement>(loc, body, cond);
}
AnyV parse_try_catch_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_try, "'try'");
V<ast_sequence> try_body = parse_sequence(lex);
lex.expect(tok_catch, "'catch'");
AnyV catch_expr = parse_expr(lex);
V<ast_sequence> catch_body = parse_sequence(lex);
return createV<ast_try_catch_statement>(loc, try_body, catch_expr, catch_body);
}
AnyV parse_if_statement(Lexer& lex, bool is_ifnot) {
SrcLocation loc = lex.cur_location();
lex.next();
AnyV cond = parse_expr(lex);
V<ast_sequence> if_body = parse_sequence(lex);
V<ast_sequence> else_body = nullptr;
if (lex.tok() == tok_else) {
lex.next();
else_body = parse_sequence(lex);
} else if (lex.tok() == tok_elseif) {
AnyV v_inner_if = parse_if_statement(lex, false);
else_body = createV<ast_sequence>(v_inner_if->loc, lex.cur_location(), {v_inner_if});
} else if (lex.tok() == tok_elseifnot) {
AnyV v_inner_if = parse_if_statement(lex, true);
else_body = createV<ast_sequence>(v_inner_if->loc, lex.cur_location(), {v_inner_if});
} else {
else_body = createV<ast_sequence>(lex.cur_location(), lex.cur_location(), {});
}
return createV<ast_if_statement>(loc, is_ifnot, cond, if_body, else_body);
}
AnyV parse_statement(Lexer& lex) {
switch (lex.tok()) {
case tok_return:
return parse_return_stmt(lex);
case tok_opbrace:
return parse_sequence(lex);
case tok_repeat:
return parse_repeat_statement(lex);
case tok_if:
return parse_if_statement(lex, false);
case tok_ifnot:
return parse_if_statement(lex, true);
case tok_do:
return parse_do_until_statement(lex);
case tok_while:
return parse_while_statement(lex);
case tok_try:
return parse_try_catch_statement(lex);
case tok_semicolon: {
lex.next();
return createV<ast_empty>;
}
default: {
AnyV expr = parse_expr(lex);
lex.expect(tok_semicolon, "';'");
return expr;
}
}
}
AnyV parse_func_body(Lexer& lex) {
return parse_sequence(lex);
}
AnyV parse_asm_func_body(Lexer& lex, V<ast_argument_list> arg_list) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_asm, "'asm'");
size_t n_args = arg_list->size();
if (n_args > 16) {
throw ParseError{loc, "assembler built-in function can have at most 16 arguments"};
}
std::vector<int> arg_order, ret_order;
if (lex.tok() == tok_oppar) {
lex.next();
while (lex.tok() == tok_identifier || lex.tok() == tok_int_const) {
int arg_idx = arg_list->lookup_idx(lex.cur_str());
if (arg_idx == -1) {
lex.error("argument name expected");
}
arg_order.push_back(arg_idx);
lex.next();
}
if (lex.tok() == tok_mapsto) {
lex.next();
while (lex.tok() == tok_int_const) {
int ret_idx = std::atoi(static_cast<std::string>(lex.cur_str()).c_str());
ret_order.push_back(ret_idx);
lex.next();
}
}
lex.expect(tok_clpar, "')'");
}
std::vector<AnyV> asm_commands;
lex.check(tok_string_const, "\"ASM COMMAND\"");
while (lex.tok() == tok_string_const) {
std::string_view asm_command = lex.cur_str();
asm_commands.push_back(createV<ast_string_const>(lex.cur_location(), asm_command, 0));
lex.next();
}
lex.expect(tok_semicolon, "';'");
return createV<ast_asm_body>(loc, std::move(arg_order), std::move(ret_order), std::move(asm_commands));
}
AnyV parse_forall(Lexer& lex) {
SrcLocation loc = lex.cur_location();
std::vector<AnyV> forall_items;
lex.expect(tok_forall, "'forall'");
int idx = 0;
while (true) {
lex.check(tok_identifier, "T expected");
std::string_view nameT = lex.cur_str();
TypeExpr* type = TypeExpr::new_var(idx++);
forall_items.emplace_back(createV<ast_forall_item>(lex.cur_location(), type, static_cast<std::string>(nameT)));
lex.next();
if (lex.tok() != tok_comma) {
break;
}
lex.next();
}
lex.expect(tok_mapsto, "'->'");
return createV<ast_forall_list>{loc, std::move(forall_items)};
}
AnyV parse_function_declaration(Lexer& lex) {
SrcLocation loc = lex.cur_location();
V<ast_forall_list> forall_list = nullptr;
bool is_get_method = false;
bool is_builtin = false;
bool marked_as_inline = false;
bool marked_as_inline_ref = false;
if (lex.tok() == tok_forall) {
forall_list = parse_forall(lex)->as<ast_forall_list>();
} else if (lex.tok() == tok_get) {
is_get_method = true;
lex.next();
}
TypeExpr* ret_type = parse_type(lex, forall_list);
lex.check(tok_identifier, "function name identifier expected");
std::string func_name = static_cast<std::string>(lex.cur_str());
lex.next();
V<ast_argument_list> arg_list = parse_argument_list(lex, forall_list)->as<ast_argument_list>();
bool marked_as_pure = false;
if (lex.tok() == tok_impure) {
static bool warning_shown = false;
if (!warning_shown) {
lex.cur_location().show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure");
warning_shown = true;
}
lex.next();
} else if (lex.tok() == tok_pure) {
marked_as_pure = true;
lex.next();
}
if (lex.tok() == tok_inline) {
marked_as_inline = true;
lex.next();
} else if (lex.tok() == tok_inlineref) {
marked_as_inline_ref = true;
lex.next();
}
V<ast_int_const> method_id = nullptr;
if (lex.tok() == tok_method_id) {
if (is_get_method) {
lex.error("both `get` and `method_id` are not allowed");
}
lex.next();
if (lex.tok() == tok_oppar) { // method_id(N)
lex.next();
lex.check(tok_int_const, "number");
std::string_view int_val = lex.cur_str();
method_id = createV<ast_int_const>(lex.cur_location(), int_val);
lex.next();
lex.expect(tok_clpar, "')'");
} else {
static bool warning_shown = false;
if (!warning_shown) {
lex.cur_location().show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`");
warning_shown = true;
}
is_get_method = true;
}
}
AnyV body = nullptr;
if (lex.tok() == tok_builtin) {
is_builtin = true;
body = createV<ast_empty>;
lex.next();
lex.expect(tok_semicolon, "';'");
} else if (lex.tok() == tok_semicolon) {
// todo this is just a prototype, remove this "feature" in the future
lex.next();
body = createV<ast_empty>;
} else if (lex.tok() == tok_opbrace) {
body = parse_func_body(lex);
} else if (lex.tok() == tok_asm) {
body = parse_asm_func_body(lex, arg_list);
} else {
lex.expect(tok_opbrace, "function body block");
}
auto f_declaration = createV<ast_function_declaration>(loc, func_name, arg_list, body);
f_declaration->ret_type = ret_type;
f_declaration->forall_list = forall_list;
f_declaration->marked_as_pure = marked_as_pure;
f_declaration->marked_as_get_method = is_get_method;
f_declaration->marked_as_builtin = is_builtin;
f_declaration->marked_as_inline = marked_as_inline;
f_declaration->marked_as_inline_ref = marked_as_inline_ref;
f_declaration->method_id = method_id;
return f_declaration;
}
AnyV parse_pragma(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.next_special(tok_pragma_name, "pragma name");
std::string_view pragma_name = lex.cur_str();
if (pragma_name == "version") {
lex.next();
TokenType cmp_tok = lex.tok();
bool valid = cmp_tok == tok_gt || cmp_tok == tok_geq || cmp_tok == tok_lt || cmp_tok == tok_leq || cmp_tok == tok_eq || cmp_tok == tok_bitwise_xor;
if (!valid) {
lex.error("invalid comparison operator");
}
lex.next_special(tok_semver, "semver");
std::string_view semver = lex.cur_str();
lex.next();
lex.expect(tok_semicolon, "';'");
return createV<ast_pragma_version>(loc, cmp_tok, semver);
}
lex.next();
lex.expect(tok_semicolon, "';'");
return createV<ast_pragma_no_arg>(loc, pragma_name);
}
AnyV parse_include_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_include, "#include");
lex.check(tok_string_const, "source file name");
std::string_view rel_filename = lex.cur_str();
if (rel_filename.empty()) {
lex.error("imported file name is an empty string");
}
lex.next();
lex.expect(tok_semicolon, "';'");
return createV<ast_include_statement>(loc, rel_filename);
}
// the main (exported) function
AnyV parse_src_file_to_ast(SrcFile* file) {
file->was_parsed = true;
std::vector<AnyV> toplevel_declarations;
Lexer lex(file);
while (!lex.is_eof()) {
if (lex.tok() == tok_pragma) {
toplevel_declarations.push_back(parse_pragma(lex));
} else if (lex.tok() == tok_include) {
toplevel_declarations.push_back(parse_include_statement(lex));
} else if (lex.tok() == tok_global) {
toplevel_declarations.push_back(parse_global_var_declaration_list(lex));
} else if (lex.tok() == tok_const) {
toplevel_declarations.push_back(parse_constant_declaration_list(lex));
} else {
toplevel_declarations.push_back(parse_function_declaration(lex));
}
}
return createV<ast_tolk_file>(file, std::move(toplevel_declarations));
}
} // namespace tolk

27
tolk/ast-from-tokens.h Normal file
View file

@ -0,0 +1,27 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "src-file.h"
namespace tolk {
struct ASTNodeBase;
const ASTNodeBase* parse_src_file_to_ast(SrcFile* file);
} // namespace tolk

155
tolk/ast-replacer.h Normal file
View file

@ -0,0 +1,155 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "ast.h"
#include "platform-utils.h"
/*
* A module of implementing traversing a vertex tree and replacing any vertex to another.
* For example, to replace "beginCell()" call to "begin_cell()" in a function body (in V<ast_function>)
* regardless of the place this call is performed, you need to iterate over all the function AST,
* to find ast_function_call(beginCell), create ast_function_call(begin_cell) instead and to replace
* a pointer inside its parent.
* Inheriting from ASTVisitor makes this task quite simple, without any boilerplate.
*
* If you need just to traverse a vertex tree without replacing vertices,
* consider another api: ast-visitor.h.
*/
namespace tolk {
class ASTReplacer {
protected:
GNU_ATTRIBUTE_ALWAYS_INLINE static AnyV replace_children(const ASTNodeLeaf* v) {
return v;
}
GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeUnary* v) {
auto* v_mutable = const_cast<ASTNodeUnary*>(v);
v_mutable->child = replace(v_mutable->child);
return v_mutable;
}
GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeBinary* v) {
auto* v_mutable = const_cast<ASTNodeBinary*>(v);
v_mutable->lhs = replace(v->lhs);
v_mutable->rhs = replace(v->rhs);
return v_mutable;
}
GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeVararg* v) {
auto* v_mutable = const_cast<ASTNodeVararg*>(v);
for (AnyV& child : v_mutable->children) {
child = replace(child);
}
return v_mutable;
}
public:
virtual ~ASTReplacer() = default;
virtual AnyV replace(AnyV v) = 0;
};
class ASTReplacerInFunctionBody : public ASTReplacer {
protected:
using parent = ASTReplacerInFunctionBody;
virtual AnyV replace(V<ast_empty> v) { return replace_children(v); }
virtual AnyV replace(V<ast_identifier> v) { return replace_children(v); }
virtual AnyV replace(V<ast_int_const> v) { return replace_children(v); }
virtual AnyV replace(V<ast_string_const> v) { return replace_children(v); }
virtual AnyV replace(V<ast_bool_const> v) { return replace_children(v); }
virtual AnyV replace(V<ast_nil_tuple> v) { return replace_children(v); }
virtual AnyV replace(V<ast_function_call> v) { return replace_children(v); }
virtual AnyV replace(V<ast_parenthesized_expr> v) { return replace_children(v); }
virtual AnyV replace(V<ast_underscore> v) { return replace_children(v); }
virtual AnyV replace(V<ast_type_expression> v) { return replace_children(v); }
virtual AnyV replace(V<ast_variable_declaration> v) { return replace_children(v); }
virtual AnyV replace(V<ast_tensor> v) { return replace_children(v); }
virtual AnyV replace(V<ast_tensor_square> v) { return replace_children(v); }
virtual AnyV replace(V<ast_dot_tilde_call> v) { return replace_children(v); }
virtual AnyV replace(V<ast_unary_operator> v) { return replace_children(v); }
virtual AnyV replace(V<ast_binary_operator> v) { return replace_children(v); }
virtual AnyV replace(V<ast_ternary_operator> v) { return replace_children(v); }
virtual AnyV replace(V<ast_return_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_sequence> v) { return replace_children(v); }
virtual AnyV replace(V<ast_repeat_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_while_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_do_until_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_try_catch_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_if_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_asm_body> v) { return replace_children(v); }
AnyV replace(AnyV v) final {
switch (v->type) {
case ast_empty: return replace(v->as<ast_empty>());
case ast_identifier: return replace(v->as<ast_identifier>());
case ast_int_const: return replace(v->as<ast_int_const>());
case ast_string_const: return replace(v->as<ast_string_const>());
case ast_bool_const: return replace(v->as<ast_bool_const>());
case ast_nil_tuple: return replace(v->as<ast_nil_tuple>());
case ast_function_call: return replace(v->as<ast_function_call>());
case ast_parenthesized_expr: return replace(v->as<ast_parenthesized_expr>());
case ast_underscore: return replace(v->as<ast_underscore>());
case ast_type_expression: return replace(v->as<ast_type_expression>());
case ast_variable_declaration: return replace(v->as<ast_variable_declaration>());
case ast_tensor: return replace(v->as<ast_tensor>());
case ast_tensor_square: return replace(v->as<ast_tensor_square>());
case ast_dot_tilde_call: return replace(v->as<ast_dot_tilde_call>());
case ast_unary_operator: return replace(v->as<ast_unary_operator>());
case ast_binary_operator: return replace(v->as<ast_binary_operator>());
case ast_ternary_operator: return replace(v->as<ast_ternary_operator>());
case ast_return_statement: return replace(v->as<ast_return_statement>());
case ast_sequence: return replace(v->as<ast_sequence>());
case ast_repeat_statement: return replace(v->as<ast_repeat_statement>());
case ast_while_statement: return replace(v->as<ast_while_statement>());
case ast_do_until_statement: return replace(v->as<ast_do_until_statement>());
case ast_try_catch_statement: return replace(v->as<ast_try_catch_statement>());
case ast_if_statement: return replace(v->as<ast_if_statement>());
case ast_asm_body: return replace(v->as<ast_asm_body>());
default:
throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::visit");
}
}
public:
void start_replacing_in_function(V<ast_function_declaration> v) {
replace(v->get_body());
}
};
class ASTReplacerAllFunctionsInFile : public ASTReplacerInFunctionBody {
protected:
using parent = ASTReplacerAllFunctionsInFile;
virtual bool should_enter_function(V<ast_function_declaration> v) = 0;
public:
void start_replacing_in_file(V<ast_tolk_file> v_file) {
for (AnyV v : v_file->get_toplevel_declarations()) {
if (auto v_function = v->try_as<ast_function_declaration>()) {
if (should_enter_function(v_function)) {
replace(v_function->get_body());
}
}
}
}
};
} // namespace tolk

233
tolk/ast-stringifier.h Normal file
View file

@ -0,0 +1,233 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#ifdef TOLK_DEBUG
#include "ast.h"
#include "ast-visitor.h"
#include <sstream>
/*
* ASTStringifier is used to print out the whole vertex tree in a human-readable format.
* To stringify any vertex, call v->debug_print(), which uses this class.
*/
namespace tolk {
class ASTStringifier final : public ASTVisitor {
constexpr static std::pair<ASTNodeType, const char*> name_pairs[] = {
{ast_empty, "ast_empty"},
{ast_identifier, "ast_identifier"},
{ast_int_const, "ast_int_const"},
{ast_string_const, "ast_string_const"},
{ast_bool_const, "ast_bool_const"},
{ast_nil_tuple, "ast_nil_tuple"},
{ast_function_call, "ast_function_call"},
{ast_parenthesized_expr, "ast_parenthesized_expr"},
{ast_global_var_declaration, "ast_global_var_declaration"},
{ast_global_var_declaration_list, "ast_global_var_declaration_list"},
{ast_constant_declaration, "ast_constant_declaration"},
{ast_constant_declaration_list, "ast_constant_declaration_list"},
{ast_underscore, "ast_underscore"},
{ast_type_expression, "ast_type_expression"},
{ast_variable_declaration, "ast_variable_declaration"},
{ast_tensor, "ast_tensor"},
{ast_tensor_square, "ast_tensor_square"},
{ast_dot_tilde_call, "ast_dot_tilde_call"},
{ast_unary_operator, "ast_unary_operator"},
{ast_binary_operator, "ast_binary_operator"},
{ast_ternary_operator, "ast_ternary_operator"},
{ast_return_statement, "ast_return_statement"},
{ast_sequence, "ast_sequence"},
{ast_repeat_statement, "ast_repeat_statement"},
{ast_while_statement, "ast_while_statement"},
{ast_do_until_statement, "ast_do_until_statement"},
{ast_try_catch_statement, "ast_try_catch_statement"},
{ast_if_statement, "ast_if_statement"},
{ast_forall_item, "ast_forall_item"},
{ast_forall_list, "ast_forall_list"},
{ast_argument, "ast_argument"},
{ast_argument_list, "ast_argument_list"},
{ast_asm_body, "ast_asm_body"},
{ast_function_declaration, "ast_function_declaration"},
{ast_pragma_no_arg, "ast_pragma_no_arg"},
{ast_pragma_version, "ast_pragma_version"},
{ast_include_statement, "ast_include_statement"},
{ast_tolk_file, "ast_tolk_file"},
};
template<ASTNodeType node_type>
constexpr static const char* ast_node_type_to_string() {
static_assert(std::size(name_pairs) == ast_tolk_file + 1, "name_pairs needs to be updated");
return name_pairs[node_type].second;
}
int depth = 0;
std::string out;
bool colored = false;
template<ASTNodeType node_type>
void handle_vertex(V<node_type> v) {
out += std::string(depth * 2, ' ');
out += ast_node_type_to_string<node_type>();
if (std::string postfix = specific_str(v); !postfix.empty()) {
out += colored ? " \x1b[34m" : " // ";
out += postfix;
out += colored ? "\x1b[0m" : "";
}
out += '\n';
depth++;
visit_children(v);
depth--;
}
static std::string specific_str(AnyV node) {
switch (node->type) {
case ast_identifier:
return static_cast<std::string>(node->as<ast_identifier>()->name);
case ast_int_const:
return static_cast<std::string>(node->as<ast_int_const>()->int_val);
case ast_string_const:
if (char modifier = node->as<ast_string_const>()->modifier) {
return "\"" + static_cast<std::string>(node->as<ast_string_const>()->str_val) + "\"" + std::string(1, modifier);
} else {
return "\"" + static_cast<std::string>(node->as<ast_string_const>()->str_val) + "\"";
}
case ast_global_var_declaration:
return static_cast<std::string>(node->as<ast_global_var_declaration>()->var_name);
case ast_constant_declaration:
return static_cast<std::string>(node->as<ast_constant_declaration>()->const_name);
case ast_type_expression: {
std::ostringstream os;
os << node->as<ast_type_expression>()->declared_type;
return os.str();
}
case ast_variable_declaration: {
std::ostringstream os;
os << node->as<ast_variable_declaration>()->declared_type;
return os.str();
}
case ast_dot_tilde_call:
return static_cast<std::string>(node->as<ast_dot_tilde_call>()->method_name);
case ast_unary_operator:
return static_cast<std::string>(node->as<ast_unary_operator>()->operator_name);
case ast_binary_operator:
return static_cast<std::string>(node->as<ast_binary_operator>()->operator_name);
case ast_sequence:
return "" + std::to_string(node->as<ast_sequence>()->get_items().size());
case ast_if_statement:
return node->as<ast_if_statement>()->is_ifnot ? "ifnot" : "";
case ast_argument: {
std::ostringstream os;
os << node->as<ast_argument>()->arg_type;
return static_cast<std::string>(node->as<ast_argument>()->arg_name) + ": " + os.str();
}
case ast_function_declaration: {
std::string arg_names;
for (int i = 0; i < node->as<ast_function_declaration>()->get_num_args(); i++) {
if (!arg_names.empty())
arg_names += ",";
arg_names += node->as<ast_function_declaration>()->get_arg(i)->arg_name;
}
return "fun " + node->as<ast_function_declaration>()->name + "(" + arg_names + ")";
}
case ast_pragma_no_arg:
return static_cast<std::string>(node->as<ast_pragma_no_arg>()->pragma_name);
case ast_pragma_version:
return static_cast<std::string>(node->as<ast_pragma_version>()->semver);
case ast_include_statement:
return static_cast<std::string>(node->as<ast_include_statement>()->file_name);
case ast_tolk_file:
return node->as<ast_tolk_file>()->file->rel_filename;
default:
return {};
}
}
public:
explicit ASTStringifier(bool colored) : colored(colored) {
}
std::string to_string_with_children(AnyV v) {
out.clear();
visit(v);
return std::move(out);
}
static std::string to_string_without_children(AnyV v) {
std::string result = ast_node_type_to_string(v->type);
if (std::string postfix = specific_str(v); !postfix.empty()) {
result += ' ';
result += specific_str(v);
}
return result;
}
static const char* ast_node_type_to_string(ASTNodeType node_type) {
return name_pairs[node_type].second;
}
void visit(AnyV v) override {
switch (v->type) {
case ast_empty: return handle_vertex(v->as<ast_empty>());
case ast_identifier: return handle_vertex(v->as<ast_identifier>());
case ast_int_const: return handle_vertex(v->as<ast_int_const>());
case ast_string_const: return handle_vertex(v->as<ast_string_const>());
case ast_bool_const: return handle_vertex(v->as<ast_bool_const>());
case ast_nil_tuple: return handle_vertex(v->as<ast_nil_tuple>());
case ast_function_call: return handle_vertex(v->as<ast_function_call>());
case ast_parenthesized_expr: return handle_vertex(v->as<ast_parenthesized_expr>());
case ast_global_var_declaration: return handle_vertex(v->as<ast_global_var_declaration>());
case ast_global_var_declaration_list: return handle_vertex(v->as<ast_global_var_declaration_list>());
case ast_constant_declaration: return handle_vertex(v->as<ast_constant_declaration>());
case ast_constant_declaration_list: return handle_vertex(v->as<ast_constant_declaration_list>());
case ast_underscore: return handle_vertex(v->as<ast_underscore>());
case ast_type_expression: return handle_vertex(v->as<ast_type_expression>());
case ast_variable_declaration: return handle_vertex(v->as<ast_variable_declaration>());
case ast_tensor: return handle_vertex(v->as<ast_tensor>());
case ast_tensor_square: return handle_vertex(v->as<ast_tensor_square>());
case ast_dot_tilde_call: return handle_vertex(v->as<ast_dot_tilde_call>());
case ast_unary_operator: return handle_vertex(v->as<ast_unary_operator>());
case ast_binary_operator: return handle_vertex(v->as<ast_binary_operator>());
case ast_ternary_operator: return handle_vertex(v->as<ast_ternary_operator>());
case ast_return_statement: return handle_vertex(v->as<ast_return_statement>());
case ast_sequence: return handle_vertex(v->as<ast_sequence>());
case ast_repeat_statement: return handle_vertex(v->as<ast_repeat_statement>());
case ast_while_statement: return handle_vertex(v->as<ast_while_statement>());
case ast_do_until_statement: return handle_vertex(v->as<ast_do_until_statement>());
case ast_try_catch_statement: return handle_vertex(v->as<ast_try_catch_statement>());
case ast_if_statement: return handle_vertex(v->as<ast_if_statement>());
case ast_forall_item: return handle_vertex(v->as<ast_forall_item>());
case ast_forall_list: return handle_vertex(v->as<ast_forall_list>());
case ast_argument: return handle_vertex(v->as<ast_argument>());
case ast_argument_list: return handle_vertex(v->as<ast_argument_list>());
case ast_asm_body: return handle_vertex(v->as<ast_asm_body>());
case ast_function_declaration: return handle_vertex(v->as<ast_function_declaration>());
case ast_pragma_no_arg: return handle_vertex(v->as<ast_pragma_no_arg>());
case ast_pragma_version: return handle_vertex(v->as<ast_pragma_version>());
case ast_include_statement: return handle_vertex(v->as<ast_include_statement>());
case ast_tolk_file: return handle_vertex(v->as<ast_tolk_file>());
default:
throw UnexpectedASTNodeType(v, "ASTStringifier::visit");
}
}
};
} // namespace tolk
#endif // TOLK_DEBUG

1438
tolk/ast-to-legacy.cpp Normal file

File diff suppressed because it is too large Load diff

28
tolk/ast-to-legacy.h Normal file
View file

@ -0,0 +1,28 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "ast.h"
namespace tolk {
struct SrcFile;
void process_file_ast(AnyV file_ast);
} // namespace tolk

199
tolk/ast-visitor.h Normal file
View file

@ -0,0 +1,199 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "ast.h"
#include "platform-utils.h"
/*
* A module implementing base functionality of read-only traversing a vertex tree.
* Since a vertex in general doesn't store a vector of children, iterating is possible only for concrete node_type.
* E.g., for ast_if_statement, visit nodes cond, if-body and else-body. For ast_string_const, nothing. And so on.
* Visitors below are helpers to inherit from and handle specific vertex types.
*
* Note, that absence of "children" in ASTNodeBase is not a drawback. Instead, it encourages you to think
* about types and match the type system.
*
* The visitor is read-only, it does not modify visited nodes (except if you purposely call mutating methods).
* For example, if you want to replace "beginCell()" call with "begin_cell", a visitor isn't enough for you.
* To replace vertices, consider another API: ast-replacer.h.
*/
namespace tolk {
class ASTVisitor {
protected:
GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTNodeLeaf* v) {
static_cast<void>(v);
}
GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeUnary* v) {
visit(v->child);
}
GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeBinary* v) {
visit(v->lhs);
visit(v->rhs);
}
GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeVararg* v) {
for (AnyV child : v->children) {
visit(child);
}
}
virtual void visit(AnyV v) = 0;
public:
virtual ~ASTVisitor() = default;
};
class ASTVisitorFunctionBody : public ASTVisitor {
protected:
using parent = ASTVisitorFunctionBody;
virtual void visit(V<ast_empty> v) { return visit_children(v); }
virtual void visit(V<ast_identifier> v) { return visit_children(v); }
virtual void visit(V<ast_int_const> v) { return visit_children(v); }
virtual void visit(V<ast_string_const> v) { return visit_children(v); }
virtual void visit(V<ast_bool_const> v) { return visit_children(v); }
virtual void visit(V<ast_nil_tuple> v) { return visit_children(v); }
virtual void visit(V<ast_function_call> v) { return visit_children(v); }
virtual void visit(V<ast_parenthesized_expr> v) { return visit_children(v); }
virtual void visit(V<ast_underscore> v) { return visit_children(v); }
virtual void visit(V<ast_type_expression> v) { return visit_children(v); }
virtual void visit(V<ast_variable_declaration> v) { return visit_children(v); }
virtual void visit(V<ast_tensor> v) { return visit_children(v); }
virtual void visit(V<ast_tensor_square> v) { return visit_children(v); }
virtual void visit(V<ast_dot_tilde_call> v) { return visit_children(v); }
virtual void visit(V<ast_unary_operator> v) { return visit_children(v); }
virtual void visit(V<ast_binary_operator> v) { return visit_children(v); }
virtual void visit(V<ast_ternary_operator> v) { return visit_children(v); }
virtual void visit(V<ast_return_statement> v) { return visit_children(v); }
virtual void visit(V<ast_sequence> v) { return visit_children(v); }
virtual void visit(V<ast_repeat_statement> v) { return visit_children(v); }
virtual void visit(V<ast_while_statement> v) { return visit_children(v); }
virtual void visit(V<ast_do_until_statement> v) { return visit_children(v); }
virtual void visit(V<ast_try_catch_statement> v) { return visit_children(v); }
virtual void visit(V<ast_if_statement> v) { return visit_children(v); }
virtual void visit(V<ast_asm_body> v) { return visit_children(v); }
void visit(AnyV v) final {
switch (v->type) {
case ast_empty: return visit(v->as<ast_empty>());
case ast_identifier: return visit(v->as<ast_identifier>());
case ast_int_const: return visit(v->as<ast_int_const>());
case ast_string_const: return visit(v->as<ast_string_const>());
case ast_bool_const: return visit(v->as<ast_bool_const>());
case ast_nil_tuple: return visit(v->as<ast_nil_tuple>());
case ast_function_call: return visit(v->as<ast_function_call>());
case ast_parenthesized_expr: return visit(v->as<ast_parenthesized_expr>());
case ast_underscore: return visit(v->as<ast_underscore>());
case ast_type_expression: return visit(v->as<ast_type_expression>());
case ast_variable_declaration: return visit(v->as<ast_variable_declaration>());
case ast_tensor: return visit(v->as<ast_tensor>());
case ast_tensor_square: return visit(v->as<ast_tensor_square>());
case ast_dot_tilde_call: return visit(v->as<ast_dot_tilde_call>());
case ast_unary_operator: return visit(v->as<ast_unary_operator>());
case ast_binary_operator: return visit(v->as<ast_binary_operator>());
case ast_ternary_operator: return visit(v->as<ast_ternary_operator>());
case ast_return_statement: return visit(v->as<ast_return_statement>());
case ast_sequence: return visit(v->as<ast_sequence>());
case ast_repeat_statement: return visit(v->as<ast_repeat_statement>());
case ast_while_statement: return visit(v->as<ast_while_statement>());
case ast_do_until_statement: return visit(v->as<ast_do_until_statement>());
case ast_try_catch_statement: return visit(v->as<ast_try_catch_statement>());
case ast_if_statement: return visit(v->as<ast_if_statement>());
case ast_asm_body: return visit(v->as<ast_asm_body>());
default:
throw UnexpectedASTNodeType(v, "ASTVisitorFunctionBody::visit");
}
}
public:
void start_visiting_function(V<ast_function_declaration> v_function) {
visit(v_function->get_body());
}
};
class ASTVisitorAllFunctionsInFile : public ASTVisitorFunctionBody {
protected:
using parent = ASTVisitorAllFunctionsInFile;
virtual bool should_enter_function(V<ast_function_declaration> v) = 0;
public:
void start_visiting_file(V<ast_tolk_file> v_file) {
for (AnyV v : v_file->get_toplevel_declarations()) {
if (auto v_func = v->try_as<ast_function_declaration>()) {
if (should_enter_function(v_func)) {
visit(v_func->get_body());
}
}
}
}
};
class ASTVisitorToplevelDeclarations : public ASTVisitor {
protected:
using parent = ASTVisitorToplevelDeclarations;
virtual void on_pragma_no_arg(V<ast_pragma_no_arg> v) = 0;
virtual void on_pragma_version(V<ast_pragma_version> v) = 0;
virtual void on_include_statement(V<ast_include_statement> v) = 0;
virtual void on_constant_declaration(V<ast_constant_declaration> v) = 0;
virtual void on_global_var_declaration(V<ast_global_var_declaration> v) = 0;
virtual void on_function_declaration(V<ast_function_declaration> v) = 0;
void visit(AnyV v) final {
switch (v->type) {
case ast_pragma_no_arg:
on_pragma_no_arg(v->as<ast_pragma_no_arg>());
break;
case ast_pragma_version:
on_pragma_version(v->as<ast_pragma_version>());
break;
case ast_include_statement:
on_include_statement(v->as<ast_include_statement>());
break;
case ast_constant_declaration_list:
for (const auto& v_decl : v->as<ast_constant_declaration_list>()->get_declarations()) {
on_constant_declaration(v_decl->as<ast_constant_declaration>());
}
break;
case ast_global_var_declaration_list:
for (const auto& v_decl : v->as<ast_global_var_declaration_list>()->get_declarations()) {
on_global_var_declaration(v_decl->as<ast_global_var_declaration>());
}
break;
case ast_function_declaration:
on_function_declaration(v->as<ast_function_declaration>());
break;
default:
throw UnexpectedASTNodeType(v, "ASTVisitorToplevelDeclarations::visit");
}
}
public:
void start_visiting_file(V<ast_tolk_file> v_file) {
for (AnyV v : v_file->get_toplevel_declarations()) {
visit(v);
}
}
};
} // namespace tolk

70
tolk/ast.cpp Normal file
View file

@ -0,0 +1,70 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ast.h"
#include "ast-stringifier.h"
#include <iostream>
namespace tolk {
static_assert(sizeof(ASTNodeBase) == 12);
#ifdef TOLK_DEBUG
std::string ASTNodeBase::to_debug_string(bool colored) const {
ASTStringifier s(colored);
return s.to_string_with_children(this);
}
void ASTNodeBase::debug_print() const {
std::cerr << to_debug_string(true) << std::endl;
}
#endif // TOLK_DEBUG
UnexpectedASTNodeType::UnexpectedASTNodeType(AnyV v_unexpected, const char* place_where): v_unexpected(v_unexpected) {
message = "Unexpected ASTNodeType ";
#ifdef TOLK_DEBUG
message += ASTStringifier::ast_node_type_to_string(v_unexpected->type);
message += " ";
#endif
message += "in ";
message += place_where;
}
void ASTNodeBase::error(const std::string& err_msg) const {
throw ParseError(loc, err_msg);
}
int Vertex<ast_forall_list>::lookup_idx(std::string_view nameT) const {
for (size_t idx = 0; idx < children.size(); ++idx) {
if (children[idx] && children[idx]->as<ast_forall_item>()->nameT == nameT) {
return static_cast<int>(idx);
}
}
return -1;
}
int Vertex<ast_argument_list>::lookup_idx(std::string_view arg_name) const {
for (size_t idx = 0; idx < children.size(); ++idx) {
if (children[idx] && children[idx]->as<ast_argument>()->arg_name == arg_name) {
return static_cast<int>(idx);
}
}
return -1;
}
} // namespace tolk

567
tolk/ast.h Normal file
View file

@ -0,0 +1,567 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include "platform-utils.h"
#include "src-file.h"
#include "type-expr.h"
#include "lexer.h"
/*
* Here we introduce AST representation of Tolk source code.
* Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on.
* There was no way to perform any more or less semantic analysis.
* In Tolk, I've implemented parsing .tolk files into AST at first, and then converting this AST
* into legacy representation (see ast-to-legacy.cpp).
* In the future, more and more code analysis will be moved out of legacy to AST-level.
*
* From the user's point of view, all AST vertices are constant. All API is based on constancy.
* Even though fields of vertex structs are public, they can't be modified, since vertices are accepted by const ref.
* Generally, there are two ways of accepting a vertex:
* * AnyV (= const ASTNodeBase*)
* the only you can do with this vertex is to see v->type (ASTNodeType) and to cast via v->as<node_type>()
* * V<node_type> (= const Vertex<node_type>*)
* a specific type of vertex, you can use its fields and methods
* There is one way of creating a vertex:
* * createV<node_type>(...constructor_args) (= new Vertex<node_type>(...))
* vertices are currently created on a heap, without any custom memory arena, just allocated and never deleted
*
* Having AnyV and knowing its node_type, a call
* v->as<node_type>()
* will return a typed vertex.
* There is also a shorthand v->try_as<node_type>() which returns V<node_type> or nullptr if types don't match:
* if (auto v_int = v->try_as<ast_int_const>())
* Note, that there casts are NOT DYNAMIC. ASTNode is not a virtual base, it has no vtable.
* So, as<...>() is just a compile-time casting, without any runtime overhead.
*
* Note, that ASTNodeBase doesn't store any vector of children. That's why there is no way to loop over
* a random (unknown) vertex. Only a concrete Vertex<node_type> stores its children (if any).
* Hence, to iterate over a custom vertex (e.g., a function body), one should inherit some kind of ASTVisitor.
* Besides read-only visiting, there is a "visit and replace" pattern.
* See ast-visitor.h and ast-replacer.h.
*/
namespace tolk {
enum ASTNodeType {
ast_empty,
ast_identifier,
ast_int_const,
ast_string_const,
ast_bool_const,
ast_nil_tuple,
ast_function_call,
ast_parenthesized_expr,
ast_global_var_declaration,
ast_global_var_declaration_list,
ast_constant_declaration,
ast_constant_declaration_list,
ast_underscore,
ast_type_expression,
ast_variable_declaration,
ast_tensor,
ast_tensor_square,
ast_dot_tilde_call,
ast_unary_operator,
ast_binary_operator,
ast_ternary_operator,
ast_return_statement,
ast_sequence,
ast_repeat_statement,
ast_while_statement,
ast_do_until_statement,
ast_try_catch_statement,
ast_if_statement,
ast_forall_item,
ast_forall_list,
ast_argument,
ast_argument_list,
ast_asm_body,
ast_function_declaration,
ast_pragma_no_arg,
ast_pragma_version,
ast_include_statement,
ast_tolk_file,
};
struct ASTNodeBase;
using AnyV = const ASTNodeBase*;
template<ASTNodeType node_type>
struct Vertex;
template<ASTNodeType node_type>
using V = const Vertex<node_type>*;
#define createV new Vertex
struct UnexpectedASTNodeType final : std::exception {
AnyV v_unexpected;
std::string message;
explicit UnexpectedASTNodeType(AnyV v_unexpected, const char* place_where);
const char* what() const noexcept override {
return message.c_str();
}
};
// ---------------------------------------------------------
struct ASTNodeBase {
const ASTNodeType type;
const SrcLocation loc;
ASTNodeBase(ASTNodeType type, SrcLocation loc) : type(type), loc(loc) {}
template<ASTNodeType node_type>
V<node_type> as() const {
#ifdef TOLK_DEBUG
if (type != node_type) {
throw Fatal("v->as<...> to wrong node_type");
}
#endif
return static_cast<V<node_type>>(this);
}
template<ASTNodeType node_type>
V<node_type> try_as() const {
return type == node_type ? static_cast<V<node_type>>(this) : nullptr;
}
#ifdef TOLK_DEBUG
std::string to_debug_string() const { return to_debug_string(false); }
std::string to_debug_string(bool colored) const;
void debug_print() const;
#endif
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void error(const std::string& err_msg) const;
};
struct ASTNodeLeaf : ASTNodeBase {
friend class ASTVisitor;
friend class ASTReplacer;
protected:
ASTNodeLeaf(ASTNodeType type, SrcLocation loc)
: ASTNodeBase(type, loc) {}
};
struct ASTNodeUnary : ASTNodeBase {
friend class ASTVisitor;
friend class ASTReplacer;
protected:
AnyV child;
ASTNodeUnary(ASTNodeType type, SrcLocation loc, AnyV child)
: ASTNodeBase(type, loc), child(child) {}
};
struct ASTNodeBinary : ASTNodeBase {
friend class ASTVisitor;
friend class ASTReplacer;
protected:
AnyV lhs;
AnyV rhs;
ASTNodeBinary(ASTNodeType type, SrcLocation loc, AnyV lhs, AnyV rhs)
: ASTNodeBase(type, loc), lhs(lhs), rhs(rhs) {}
};
struct ASTNodeVararg : ASTNodeBase {
friend class ASTVisitor;
friend class ASTReplacer;
protected:
std::vector<AnyV> children;
ASTNodeVararg(ASTNodeType type, SrcLocation loc, std::vector<AnyV> children)
: ASTNodeBase(type, loc), children(std::move(children)) {}
public:
int size() const { return static_cast<int>(children.size()); }
bool empty() const { return children.empty(); }
};
// ---------------------------------------------------------
template<>
struct Vertex<ast_empty> final : ASTNodeLeaf {
Vertex()
: ASTNodeLeaf(ast_empty, SrcLocation()) {}
};
template<>
struct Vertex<ast_identifier> final : ASTNodeLeaf {
std::string_view name;
Vertex(SrcLocation loc, std::string_view name)
: ASTNodeLeaf(ast_identifier, loc), name(name) {}
};
template<>
struct Vertex<ast_int_const> final : ASTNodeLeaf {
std::string_view int_val;
Vertex(SrcLocation loc, std::string_view int_val)
: ASTNodeLeaf(ast_int_const, loc), int_val(int_val) {}
};
template<>
struct Vertex<ast_string_const> final : ASTNodeLeaf {
std::string_view str_val;
char modifier;
Vertex(SrcLocation loc, std::string_view str_val, char modifier)
: ASTNodeLeaf(ast_string_const, loc), str_val(str_val), modifier(modifier) {}
};
template<>
struct Vertex<ast_bool_const> final : ASTNodeLeaf {
bool bool_val;
Vertex(SrcLocation loc, bool bool_val)
: ASTNodeLeaf(ast_bool_const, loc), bool_val(bool_val) {}
};
template<>
struct Vertex<ast_nil_tuple> final : ASTNodeLeaf {
explicit Vertex(SrcLocation loc)
: ASTNodeLeaf(ast_nil_tuple, loc) {}
};
template<>
struct Vertex<ast_function_call> final : ASTNodeBinary {
// even for f(1,2,3), f (lhs) is called with a single arg (tensor "(1,2,3)") (rhs)
AnyV get_called_f() const { return lhs; }
AnyV get_called_arg() const { return rhs; }
Vertex(SrcLocation loc, AnyV lhs_f, AnyV arg)
: ASTNodeBinary(ast_function_call, loc, lhs_f, arg) {}
};
template<>
struct Vertex<ast_parenthesized_expr> final : ASTNodeUnary {
AnyV get_expr() const { return child; }
Vertex(SrcLocation loc, AnyV expr)
: ASTNodeUnary(ast_parenthesized_expr, loc, expr) {}
};
template<>
struct Vertex<ast_global_var_declaration> final : ASTNodeLeaf {
std::string_view var_name;
TypeExpr* declared_type; // may be nullptr
Vertex(SrcLocation loc, std::string_view var_name, TypeExpr* declared_type)
: ASTNodeLeaf(ast_global_var_declaration, loc), var_name(var_name), declared_type(declared_type) {}
};
template<>
struct Vertex<ast_global_var_declaration_list> final : ASTNodeVararg {
const std::vector<AnyV>& get_declarations() const { return children; }
Vertex(SrcLocation loc, std::vector<AnyV> declarations)
: ASTNodeVararg(ast_global_var_declaration_list, loc, std::move(declarations)) {}
};
template<>
struct Vertex<ast_constant_declaration> final : ASTNodeUnary {
std::string_view const_name;
TypeExpr* declared_type; // may be nullptr
AnyV get_init_value() const { return child; }
Vertex(SrcLocation loc, std::string_view const_name, TypeExpr* declared_type, AnyV init_value)
: ASTNodeUnary(ast_constant_declaration, loc, init_value), const_name(const_name), declared_type(declared_type) {}
};
template<>
struct Vertex<ast_constant_declaration_list> final : ASTNodeVararg {
const std::vector<AnyV>& get_declarations() const { return children; }
Vertex(SrcLocation loc, std::vector<AnyV> declarations)
: ASTNodeVararg(ast_constant_declaration_list, loc, std::move(declarations)) {}
};
template<>
struct Vertex<ast_underscore> final : ASTNodeLeaf {
explicit Vertex(SrcLocation loc)
: ASTNodeLeaf(ast_underscore, loc) {}
};
template<>
struct Vertex<ast_type_expression> final : ASTNodeLeaf {
TypeExpr* declared_type;
Vertex(SrcLocation loc, TypeExpr* declared_type)
: ASTNodeLeaf(ast_type_expression, loc), declared_type(declared_type) {}
};
template<>
struct Vertex<ast_variable_declaration> final : ASTNodeUnary {
TypeExpr* declared_type;
AnyV get_variable_or_list() const { return child; } // identifier, tuple, tensor
Vertex(SrcLocation loc, TypeExpr* declared_type, AnyV dest)
: ASTNodeUnary(ast_variable_declaration, loc, dest), declared_type(declared_type) {}
};
template<>
struct Vertex<ast_tensor> final : ASTNodeVararg {
const std::vector<AnyV>& get_items() const { return children; }
AnyV get_item(int i) const { return children.at(i); }
Vertex(SrcLocation loc, std::vector<AnyV> items)
: ASTNodeVararg(ast_tensor, loc, std::move(items)) {}
};
template<>
struct Vertex<ast_tensor_square> final : ASTNodeVararg {
const std::vector<AnyV>& get_items() const { return children; }
AnyV get_item(int i) const { return children.at(i); }
Vertex(SrcLocation loc, std::vector<AnyV> items)
: ASTNodeVararg(ast_tensor_square, loc, std::move(items)) {}
};
template<>
struct Vertex<ast_dot_tilde_call> final : ASTNodeBinary {
std::string_view method_name; // starts with . or ~
AnyV get_lhs() const { return lhs; }
AnyV get_arg() const { return rhs; }
Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, AnyV rhs)
: ASTNodeBinary(ast_dot_tilde_call, loc, lhs, rhs), method_name(method_name) {}
};
template<>
struct Vertex<ast_unary_operator> final : ASTNodeUnary {
std::string_view operator_name;
TokenType tok;
AnyV get_rhs() const { return child; }
Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV rhs)
: ASTNodeUnary(ast_unary_operator, loc, rhs), operator_name(operator_name), tok(tok) {}
};
template<>
struct Vertex<ast_binary_operator> final : ASTNodeBinary {
std::string_view operator_name;
TokenType tok;
AnyV get_lhs() const { return lhs; }
AnyV get_rhs() const { return rhs; }
Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV lhs, AnyV rhs)
: ASTNodeBinary(ast_binary_operator, loc, lhs, rhs), operator_name(operator_name), tok(tok) {}
};
template<>
struct Vertex<ast_ternary_operator> final : ASTNodeVararg {
AnyV get_cond() const { return children.at(0); }
AnyV get_when_true() const { return children.at(1); }
AnyV get_when_false() const { return children.at(2); }
Vertex(SrcLocation loc, AnyV cond, AnyV when_true, AnyV when_false)
: ASTNodeVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {}
};
template<>
struct Vertex<ast_return_statement> : ASTNodeUnary {
AnyV get_return_value() const { return child; }
Vertex(SrcLocation loc, AnyV child)
: ASTNodeUnary(ast_return_statement, loc, child) {}
};
template<>
struct Vertex<ast_sequence> final : ASTNodeVararg {
SrcLocation loc_end;
const std::vector<AnyV>& get_items() const { return children; }
AnyV get_item(int i) const { return children.at(i); }
Vertex(SrcLocation loc, SrcLocation loc_end, std::vector<AnyV> items)
: ASTNodeVararg(ast_sequence, loc, std::move(items)), loc_end(loc_end) {}
};
template<>
struct Vertex<ast_repeat_statement> final : ASTNodeBinary {
AnyV get_cond() const { return lhs; }
auto get_body() const { return rhs->as<ast_sequence>(); }
Vertex(SrcLocation loc, AnyV cond, V<ast_sequence> body)
: ASTNodeBinary(ast_repeat_statement, loc, cond, body) {}
};
template<>
struct Vertex<ast_while_statement> final : ASTNodeBinary {
AnyV get_cond() const { return lhs; }
auto get_body() const { return rhs->as<ast_sequence>(); }
Vertex(SrcLocation loc, AnyV cond, V<ast_sequence> body)
: ASTNodeBinary(ast_while_statement, loc, cond, body) {}
};
template<>
struct Vertex<ast_do_until_statement> final : ASTNodeBinary {
auto get_body() const { return lhs->as<ast_sequence>(); }
AnyV get_cond() const { return rhs; }
Vertex(SrcLocation loc, V<ast_sequence> body, AnyV cond)
: ASTNodeBinary(ast_do_until_statement, loc, body, cond) {}
};
template<>
struct Vertex<ast_try_catch_statement> final : ASTNodeVararg {
auto get_try_body() const { return children.at(0)->as<ast_sequence>(); }
AnyV get_catch_expr() const { return children.at(1); } // it's a tensor
auto get_catch_body() const { return children.at(2)->as<ast_sequence>(); }
Vertex(SrcLocation loc, V<ast_sequence> try_body, AnyV catch_expr, V<ast_sequence> catch_body)
: ASTNodeVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {}
};
template<>
struct Vertex<ast_if_statement> final : ASTNodeVararg {
bool is_ifnot;
AnyV get_cond() const { return children.at(0); }
auto get_if_body() const { return children.at(1)->as<ast_sequence>(); }
auto get_else_body() const { return children.at(2)->as<ast_sequence>(); } // always exists (when else omitted, it's empty)
Vertex(SrcLocation loc, bool is_ifnot, AnyV cond, V<ast_sequence> if_body, V<ast_sequence> else_body)
: ASTNodeVararg(ast_if_statement, loc, {cond, if_body, else_body}), is_ifnot(is_ifnot) {}
};
template<>
struct Vertex<ast_forall_item> final : ASTNodeLeaf {
TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates
std::string nameT;
Vertex(SrcLocation loc, TypeExpr* created_type, std::string nameT)
: ASTNodeLeaf(ast_forall_item, loc), created_type(created_type), nameT(std::move(nameT)) {}
};
template<>
struct Vertex<ast_forall_list> final : ASTNodeVararg {
std::vector<AnyV> get_items() const { return children; }
auto get_item(int i) const { return children.at(i)->as<ast_forall_item>(); }
Vertex(SrcLocation loc, std::vector<AnyV> forall_items)
: ASTNodeVararg(ast_forall_list, loc, std::move(forall_items)) {}
int lookup_idx(std::string_view nameT) const;
};
template<>
struct Vertex<ast_argument> final : ASTNodeLeaf {
std::string_view arg_name;
TypeExpr* arg_type;
Vertex(SrcLocation loc, std::string_view arg_name, TypeExpr* arg_type)
: ASTNodeLeaf(ast_argument, loc), arg_name(arg_name), arg_type(arg_type) {}
};
template<>
struct Vertex<ast_asm_body> final : ASTNodeVararg {
std::vector<int> arg_order;
std::vector<int> ret_order;
const std::vector<AnyV>& get_asm_commands() const { return children; } // ast_string_const[]
Vertex(SrcLocation loc, std::vector<int> arg_order, std::vector<int> ret_order, std::vector<AnyV> asm_commands)
: ASTNodeVararg(ast_asm_body, loc, std::move(asm_commands)), arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {}
};
template<>
struct Vertex<ast_argument_list> final : ASTNodeVararg {
const std::vector<AnyV>& get_args() const { return children; }
auto get_arg(int i) const { return children.at(i)->as<ast_argument>(); }
Vertex(SrcLocation loc, std::vector<AnyV> args)
: ASTNodeVararg(ast_argument_list, loc, std::move(args)) {}
int lookup_idx(std::string_view arg_name) const;
};
template<>
struct Vertex<ast_function_declaration> final : ASTNodeBinary {
int get_num_args() const { return lhs->as<ast_argument_list>()->size(); }
auto get_arg_list() const { return lhs->as<ast_argument_list>(); }
auto get_arg(int i) const { return lhs->as<ast_argument_list>()->get_arg(i); }
AnyV get_body() const { return rhs; } // ast_sequence / ast_asm_body / ast_empty
std::string name;
TypeExpr* ret_type = nullptr;
V<ast_forall_list> forall_list = nullptr;
bool marked_as_pure = false;
bool marked_as_builtin = false;
bool marked_as_get_method = false;
bool marked_as_inline = false;
bool marked_as_inline_ref = false;
V<ast_int_const> method_id = nullptr;
Vertex(SrcLocation loc, std::string name, V<ast_argument_list> args, AnyV body)
: ASTNodeBinary(ast_function_declaration, loc, args, body), name(std::move(name)) {}
};
template<>
struct Vertex<ast_pragma_no_arg> final : ASTNodeLeaf {
std::string_view pragma_name;
Vertex(SrcLocation loc, std::string_view pragma_name)
: ASTNodeLeaf(ast_pragma_no_arg, loc), pragma_name(pragma_name) {}
};
template<>
struct Vertex<ast_pragma_version> final : ASTNodeLeaf {
TokenType cmp_tok;
std::string_view semver;
Vertex(SrcLocation loc, TokenType cmp_tok, std::string_view semver)
: ASTNodeLeaf(ast_pragma_version, loc), cmp_tok(cmp_tok), semver(semver) {}
};
template<>
struct Vertex<ast_include_statement> final : ASTNodeLeaf {
std::string_view file_name;
Vertex(SrcLocation loc, std::string_view file_name)
: ASTNodeLeaf(ast_include_statement, loc), file_name(file_name) {}
};
template<>
struct Vertex<ast_tolk_file> final : ASTNodeVararg {
const SrcFile* const file;
const std::vector<AnyV>& get_toplevel_declarations() const { return children; }
Vertex(const SrcFile* file, std::vector<AnyV> toplevel_declarations)
: ASTNodeVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)), file(file) {}
};
} // namespace tolk

View file

@ -72,22 +72,6 @@ SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const
return define_builtin_func_impl(name, new SymValAsmFunc{func_type, make_simple_compile(macro), arg_order, ret_order, !impure});
}
SymDef* force_autoapply(SymDef* def) {
if (def) {
auto val = dynamic_cast<SymVal*>(def->value);
if (val) {
val->auto_apply = true;
}
}
return def;
}
template <typename... Args>
SymDef* define_builtin_const(std::string name, TypeExpr* const_type, Args&&... args) {
return force_autoapply(
define_builtin_func(name, TypeExpr::new_map(TypeExpr::new_unit(), const_type), std::forward<Args>(args)...));
}
bool SymValAsmFunc::compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in,
SrcLocation where) const {
if (simple_compile) {
@ -1219,11 +1203,10 @@ void define_builtins() {
define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6));
define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3));
define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7));
define_builtin_const("true", Int, /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true));
define_builtin_const("false", Int, /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false));
define_builtin_func("true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true));
define_builtin_func("false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false));
// define_builtin_func("null", Null, AsmOp::Const("PUSHNULL"));
define_builtin_const("nil", Tuple, AsmOp::Const("PUSHNULL"));
define_builtin_const("Nil", Tuple, AsmOp::Const("NIL"));
define_builtin_func("nil", TypeExpr::new_map(TypeExpr::new_unit(), Tuple), AsmOp::Const("PUSHNULL"));
define_builtin_func("null?", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null);
define_builtin_func("throw", impure_un_op, compile_throw, true);
define_builtin_func("throw_if", impure_bin_op, std::bind(compile_cond_throw, _1, _2, true), true);

View file

@ -41,19 +41,7 @@ Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list<Expr*> _arglist)
}
}
void Expr::chk_rvalue(const Lexer& lex) const {
if (!is_rvalue()) {
lex.error_at("rvalue expected before `", "`");
}
}
void Expr::chk_lvalue(const Lexer& lex) const {
if (!is_lvalue()) {
lex.error_at("lvalue expected before `", "`");
}
}
bool Expr::deduce_type(const Lexer& lex) {
bool Expr::deduce_type() {
if (e_type) {
return true;
}
@ -77,7 +65,7 @@ bool Expr::deduce_type(const Lexer& lex) {
std::ostringstream os;
os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type "
<< fun_type->args[0] << ": " << ue;
lex.error(os.str());
throw ParseError(here, os.str());
}
e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
@ -92,7 +80,7 @@ bool Expr::deduce_type(const Lexer& lex) {
std::ostringstream os;
os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type
<< ": " << ue;
lex.error(os.str());
throw ParseError(here, os.str());
}
e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
@ -107,7 +95,7 @@ bool Expr::deduce_type(const Lexer& lex) {
std::ostringstream os;
os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type "
<< args[0]->e_type << ": " << ue;
lex.error(os.str());
throw ParseError(here, os.str());
}
e_type = args[0]->e_type;
TypeExpr::remove_indirect(e_type);
@ -124,7 +112,7 @@ bool Expr::deduce_type(const Lexer& lex) {
os << "cannot implicitly assign an expression of type " << args[1]->e_type
<< " to a variable or pattern of type " << rhs_type << " in modifying method `" << G.symbols.get_name(val)
<< "` : " << ue;
lex.error(os.str());
throw ParseError(here, os.str());
}
e_type = rhs_type->args[1];
TypeExpr::remove_indirect(e_type);
@ -139,7 +127,7 @@ bool Expr::deduce_type(const Lexer& lex) {
} catch (UnifyError& ue) {
std::ostringstream os;
os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue;
lex.error(os.str());
throw ParseError(here, os.str());
}
try {
unify(args[1]->e_type, args[2]->e_type);
@ -147,7 +135,7 @@ bool Expr::deduce_type(const Lexer& lex) {
std::ostringstream os;
os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and "
<< args[2]->e_type << " : " << ue;
lex.error(os.str());
throw ParseError(here, os.str());
}
e_type = args[1]->e_type;
TypeExpr::remove_indirect(e_type);
@ -170,13 +158,13 @@ int Expr::define_new_vars(CodeBlob& code) {
}
case _Var:
if (val < 0) {
val = code.create_var(TmpVar::_Named, e_type, sym, here);
val = code.create_var(false, e_type, sym, here);
return 1;
}
break;
case _Hole:
if (val < 0) {
val = code.create_var(TmpVar::_Tmp, e_type, nullptr, here);
val = code.create_var(true, e_type, nullptr, here);
}
break;
}
@ -279,7 +267,7 @@ std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *>& args, CodeB
res_lists[i] = args[i]->pre_compile(code, lval_globs);
for (size_t j = 0; j < res_lists[i].size(); ++j) {
TmpVar& var = code.vars.at(res_lists[i][j]);
if (!lval_globs && (var.cls & TmpVar::_Named)) {
if (!lval_globs && !var.is_tmp_unnamed) {
var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable {
if (!done) {
done = true;

View file

@ -361,19 +361,21 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase {
if (str == "asm") return tok_asm;
if (str == "get") return tok_get;
if (str == "try") return tok_try;
if (str == "nil") return tok_nil;
break;
case 4:
if (str == "else") return tok_else;
if (str == "true") return tok_true;
if (str == "pure") return tok_pure;
if (str == "then") return tok_then;
if (str == "cell") return tok_cell;
if (str == "cont") return tok_cont;
if (str == "type") return tok_type; // todo unused token?
break;
case 5:
if (str == "slice") return tok_slice;
if (str == "tuple") return tok_tuple;
if (str == "const") return tok_const;
if (str == "false") return tok_false;
if (str == "while") return tok_while;
if (str == "until") return tok_until;
if (str == "catch") return tok_catch;
@ -427,7 +429,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase {
if (TokenType kw_tok = maybe_keyword(str_val)) {
lex->add_token(kw_tok, str_val);
} else {
G.symbols.lookup_add(static_cast<std::string>(str_val));
G.symbols.lookup_add(str_val);
lex->add_token(tok_identifier, str_val);
}
return true;
@ -453,7 +455,7 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase {
std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1);
lex->skip_chars(1);
G.symbols.lookup_add(static_cast<std::string>(str_val));
G.symbols.lookup_add(str_val);
lex->add_token(tok_identifier, str_val);
return true;
}
@ -610,21 +612,12 @@ void Lexer::next_special(TokenType parse_next_as, const char* str_expected) {
cur_token = tokens_circularbuf[++cur_token_idx & 7];
}
int Lexer::cur_sym_idx() const {
assert(tok() == tok_identifier);
return G.symbols.lookup_add(cur_str_std_string());
}
void Lexer::error(const std::string& err_msg) const {
throw ParseError(cur_location(), err_msg);
}
void Lexer::error_at(const std::string& prefix, const std::string& suffix) const {
throw ParseError(cur_location(), prefix + cur_str_std_string() + suffix);
}
void Lexer::on_expect_call_failed(const char* str_expected) const {
throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + cur_str_std_string() + "`");
throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + std::string(cur_str()) + "`");
}
void lexer_init() {

View file

@ -31,6 +31,10 @@ enum TokenType {
tok_identifier,
tok_true,
tok_false,
tok_nil, // todo "null" keyword is still absent, "nil" in FunC is an empty tuple
tok_plus,
tok_minus,
tok_mul,
@ -108,7 +112,6 @@ enum TokenType {
tok_builder,
tok_cont,
tok_tuple,
tok_type,
tok_mapsto,
tok_forall,
@ -206,10 +209,8 @@ public:
TokenType tok() const { return cur_token.type; }
std::string_view cur_str() const { return cur_token.str_val; }
std::string cur_str_std_string() const { return static_cast<std::string>(cur_token.str_val); }
SrcLocation cur_location() const { return location; }
const SrcFile* cur_file() const { return file; }
int cur_sym_idx() const;
void next();
void next_special(TokenType parse_next_as, const char* str_expected);
@ -228,8 +229,6 @@ public:
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void error(const std::string& err_msg) const;
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void error_at(const std::string& prefix, const std::string& suffix) const;
};
void lexer_init();

File diff suppressed because it is too large Load diff

View file

@ -22,12 +22,6 @@
namespace tolk {
Symbol::Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {
subclass = this->str[0] == '.' ? SymbolSubclass::dot_identifier
: this->str[0] == '~' ? SymbolSubclass::tilde_identifier
: SymbolSubclass::undef;
}
std::string Symbol::unknown_symbol_name(sym_idx_t i) {
if (!i) {
return "_";
@ -78,7 +72,7 @@ void open_scope(SrcLocation loc) {
G.scope_opened_at.push_back(loc);
}
void close_scope(SrcLocation loc) {
void close_scope() {
if (!G.scope_level) {
throw Fatal{"cannot close the outer scope"};
}

View file

@ -36,18 +36,11 @@ struct SymValBase {
};
enum class SymbolSubclass {
undef = 0,
dot_identifier = 1, // begins with . (a const method)
tilde_identifier = 2 // begins with ~ (a non-const method)
};
struct Symbol {
std::string str;
sym_idx_t idx;
SymbolSubclass subclass;
Symbol(std::string str, sym_idx_t idx);
Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {}
static std::string unknown_symbol_name(sym_idx_t i);
};
@ -64,10 +57,10 @@ private:
public:
static constexpr sym_idx_t not_found = 0;
sym_idx_t lookup(const std::string_view& str, int mode = 0) {
sym_idx_t lookup(std::string_view str, int mode = 0) {
return gen_lookup(str, mode);
}
sym_idx_t lookup_add(const std::string& str) {
sym_idx_t lookup_add(std::string_view str) {
return gen_lookup(str, 1);
}
Symbol* operator[](sym_idx_t i) const {
@ -76,9 +69,6 @@ public:
std::string get_name(sym_idx_t i) const {
return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i);
}
SymbolSubclass get_subclass(sym_idx_t i) const {
return sym[i] ? sym[i]->subclass : SymbolSubclass::undef;
}
};
struct SymTableOverflow {
@ -104,7 +94,7 @@ struct SymDef {
void open_scope(SrcLocation loc);
void close_scope(SrcLocation loc);
void close_scope();
SymDef* lookup_symbol(sym_idx_t idx);
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, SrcLocation loc = {});

View file

@ -27,7 +27,8 @@
#include "compiler-state.h"
#include "lexer.h"
#include <getopt.h>
#include "git.h"
#include "ast-from-tokens.h"
#include "ast-to-legacy.h"
#include <fstream>
#include "td/utils/port/path.h"
#include <sys/stat.h>
@ -269,13 +270,13 @@ int tolk_proceed(const std::string &entrypoint_file_name) {
if (locate_res.is_error()) {
throw Fatal("Failed to locate stdlib: " + locate_res.error().message().str());
}
parse_source_file(locate_res.move_as_ok());
process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok()));
}
td::Result<SrcFile*> locate_res = locate_source_file(entrypoint_file_name);
if (locate_res.is_error()) {
throw Fatal("Failed to locate " + entrypoint_file_name + ": " + locate_res.error().message().str());
}
parse_source_file(locate_res.move_as_ok());
process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok()));
// todo #ifdef TOLK_PROFILING + comment
// lexer_measure_performance(all_src_files.get_all_files());
@ -293,6 +294,10 @@ int tolk_proceed(const std::string &entrypoint_file_name) {
unif_err.print_message(std::cerr);
std::cerr << std::endl;
return 2;
} catch (UnexpectedASTNodeType& error) {
std::cerr << "fatal: " << error.what() << std::endl;
std::cerr << "It's a compiler bug, please report to developers" << std::endl;
return 2;
}
}

View file

@ -17,7 +17,7 @@
#pragma once
#include "src-file.h"
#include "lexer.h"
#include "type-expr.h"
#include "symtable.h"
#include "crypto/common/refint.h"
#include "td/utils/Status.h"
@ -38,136 +38,6 @@ namespace tolk {
*
*/
struct TypeExpr {
enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll } constr;
enum AtomicType {
_Int = tok_int,
_Cell = tok_cell,
_Slice = tok_slice,
_Builder = tok_builder,
_Cont = tok_cont,
_Tuple = tok_tuple,
_Type = tok_type
};
int value;
int minw, maxw;
static constexpr int w_inf = 1023;
std::vector<TypeExpr*> args;
bool was_forall_var = false;
TypeExpr(te_type _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) {
}
TypeExpr(te_type _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) {
}
TypeExpr(te_type _constr, std::vector<TypeExpr*> list)
: constr(_constr), value((int)list.size()), args(std::move(list)) {
compute_width();
}
TypeExpr(te_type _constr, std::initializer_list<TypeExpr*> list)
: constr(_constr), value((int)list.size()), args(std::move(list)) {
compute_width();
}
TypeExpr(te_type _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} {
compute_width();
}
TypeExpr(te_type _constr, TypeExpr* elem0, std::vector<TypeExpr*> list)
: constr(_constr), value((int)list.size() + 1), args{elem0} {
args.insert(args.end(), list.begin(), list.end());
compute_width();
}
TypeExpr(te_type _constr, TypeExpr* elem0, std::initializer_list<TypeExpr*> list)
: constr(_constr), value((int)list.size() + 1), args{elem0} {
args.insert(args.end(), list.begin(), list.end());
compute_width();
}
bool is_atomic() const {
return constr == te_Atomic;
}
bool is_atomic(int v) const {
return constr == te_Atomic && value == v;
}
bool is_int() const {
return is_atomic(_Int);
}
bool is_var() const {
return constr == te_Var;
}
bool is_map() const {
return constr == te_Map;
}
bool is_tuple() const {
return constr == te_Tuple;
}
bool has_fixed_width() const {
return minw == maxw;
}
int get_width() const {
return has_fixed_width() ? minw : -1;
}
void compute_width();
bool recompute_width();
void show_width(std::ostream& os);
std::ostream& print(std::ostream& os, int prio = 0) const;
void replace_with(TypeExpr* te2);
int extract_components(std::vector<TypeExpr*>& comp_list);
bool equals_to(const TypeExpr* rhs) const;
bool has_unknown_inside() const;
static int holes, type_vars;
static TypeExpr* new_hole() {
return new TypeExpr{te_Unknown, ++holes};
}
static TypeExpr* new_hole(int width) {
return new TypeExpr{te_Unknown, ++holes, width};
}
static TypeExpr* new_unit() {
return new TypeExpr{te_Tensor, 0, 0};
}
static TypeExpr* new_atomic(int value) {
return new TypeExpr{te_Atomic, value, 1};
}
static TypeExpr* new_map(TypeExpr* from, TypeExpr* to);
static TypeExpr* new_func() {
return new_map(new_hole(), new_hole());
}
static TypeExpr* new_tensor(std::vector<TypeExpr*> list, bool red = true) {
return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)};
}
static TypeExpr* new_tensor(std::initializer_list<TypeExpr*> list) {
return new TypeExpr{te_Tensor, std::move(list)};
}
static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) {
return new_tensor({te1, te2});
}
static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) {
return new_tensor({te1, te2, te3});
}
static TypeExpr* new_tuple(TypeExpr* arg0) {
return new TypeExpr{te_Tuple, arg0};
}
static TypeExpr* new_tuple(std::vector<TypeExpr*> list, bool red = false) {
return new_tuple(new_tensor(std::move(list), red));
}
static TypeExpr* new_tuple(std::initializer_list<TypeExpr*> list) {
return new_tuple(new_tensor(std::move(list)));
}
static TypeExpr* new_var() {
return new TypeExpr{te_Var, --type_vars, 1};
}
static TypeExpr* new_var(int idx) {
return new TypeExpr{te_Var, idx, 1};
}
static TypeExpr* new_forall(std::vector<TypeExpr*> list, TypeExpr* body) {
return new TypeExpr{te_ForAll, body, std::move(list)};
}
static TypeExpr* new_forall(std::initializer_list<TypeExpr*> list, TypeExpr* body) {
return new TypeExpr{te_ForAll, body, std::move(list)};
}
static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr);
static std::vector<TypeExpr*> remove_forall(TypeExpr*& te);
static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector<TypeExpr*>& new_vars);
};
std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr);
struct UnifyError : std::exception {
TypeExpr* te1;
TypeExpr* te2;
@ -197,14 +67,13 @@ using const_idx_t = int;
struct TmpVar {
TypeExpr* v_type;
var_idx_t idx;
enum { _In = 1, _Named = 2, _Tmp = 4, _UniqueName = 0x20 };
int cls;
bool is_tmp_unnamed;
sym_idx_t name;
int coord;
SrcLocation where;
std::vector<std::function<void(SrcLocation)>> on_modification;
TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc);
TmpVar(var_idx_t _idx, bool _is_tmp_unnamed, TypeExpr* _type, SymDef* sym, SrcLocation loc);
void show(std::ostream& os, int omit_idx = 0) const;
void dump(std::ostream& os) const;
void set_location(SrcLocation loc);
@ -586,9 +455,9 @@ struct CodeBlob {
return res;
}
bool import_params(FormalArgList arg_list);
var_idx_t create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation loc);
var_idx_t create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation loc);
var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) {
return create_var(TmpVar::_Tmp, var_type, nullptr, loc);
return create_var(true, var_type, nullptr, loc);
}
int split_vars(bool strict = false);
bool compute_used_code_vars();
@ -631,7 +500,6 @@ struct CodeBlob {
struct SymVal : SymValBase {
TypeExpr* sym_type;
bool auto_apply{false};
SymVal(SymValKind kind, int idx, TypeExpr* sym_type = nullptr)
: SymValBase(kind, idx), sym_type(sym_type) {
}
@ -702,16 +570,6 @@ struct SymValCodeFunc : SymValFunc {
bool does_need_codegen() const;
};
struct SymValType : SymValBase {
TypeExpr* sym_type;
SymValType(SymValKind kind, int idx, TypeExpr* _stype = nullptr) : SymValBase(kind, idx), sym_type(_stype) {
}
~SymValType() override = default;
TypeExpr* get_type() const {
return sym_type;
}
};
struct SymValGlobVar : SymValBase {
TypeExpr* sym_type;
int out_idx{0};
@ -762,7 +620,6 @@ struct SymValConst : SymValBase {
// defined in parse-tolk.cpp
td::Result<SrcFile*> locate_source_file(const std::string& rel_filename);
void parse_source_file(SrcFile* file);
/*
@ -792,7 +649,7 @@ struct Expr {
};
ExprCls cls;
int val{0};
enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32, _IsInsideParenthesis = 64 };
enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32 };
int flags{0};
SrcLocation here;
td::RefInt256 intval;
@ -834,18 +691,23 @@ struct Expr {
bool is_type() const {
return flags & _IsType;
}
bool is_inside_parenthesis() const {
return flags & _IsInsideParenthesis;
}
bool is_type_apply() const {
return cls == _TypeApply;
}
bool is_mktuple() const {
return cls == _MkTuple;
}
void chk_rvalue(const Lexer& lex) const; // todo here and below: strange to pass Lexer
void chk_lvalue(const Lexer& lex) const;
bool deduce_type(const Lexer& lex);
void chk_rvalue() const {
if (!is_rvalue()) {
throw ParseError(here, "rvalue expected");
}
}
void chk_lvalue() const {
if (!is_lvalue()) {
throw ParseError(here, "lvalue expected");
}
}
bool deduce_type();
void set_location(SrcLocation loc) {
here = loc;
}

140
tolk/type-expr.h Normal file
View file

@ -0,0 +1,140 @@
#pragma once
#include <vector>
#include <iostream>
#include "lexer.h"
namespace tolk {
struct TypeExpr {
enum Kind { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll };
// todo not _
enum AtomicType {
_Int = tok_int,
_Cell = tok_cell,
_Slice = tok_slice,
_Builder = tok_builder,
_Cont = tok_cont,
_Tuple = tok_tuple,
};
Kind constr;
int value;
int minw, maxw;
static constexpr int w_inf = 1023;
std::vector<TypeExpr*> args;
bool was_forall_var = false;
TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) {
}
TypeExpr(Kind _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) {
}
TypeExpr(Kind _constr, std::vector<TypeExpr*> list)
: constr(_constr), value((int)list.size()), args(std::move(list)) {
compute_width();
}
TypeExpr(Kind _constr, std::initializer_list<TypeExpr*> list)
: constr(_constr), value((int)list.size()), args(std::move(list)) {
compute_width();
}
TypeExpr(Kind _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} {
compute_width();
}
TypeExpr(Kind _constr, TypeExpr* elem0, std::vector<TypeExpr*> list)
: constr(_constr), value((int)list.size() + 1), args{elem0} {
args.insert(args.end(), list.begin(), list.end());
compute_width();
}
TypeExpr(Kind _constr, TypeExpr* elem0, std::initializer_list<TypeExpr*> list)
: constr(_constr), value((int)list.size() + 1), args{elem0} {
args.insert(args.end(), list.begin(), list.end());
compute_width();
}
bool is_atomic() const {
return constr == te_Atomic;
}
bool is_atomic(int v) const {
return constr == te_Atomic && value == v;
}
bool is_int() const {
return is_atomic(_Int);
}
bool is_var() const {
return constr == te_Var;
}
bool is_map() const {
return constr == te_Map;
}
bool is_tuple() const {
return constr == te_Tuple;
}
bool has_fixed_width() const {
return minw == maxw;
}
int get_width() const {
return has_fixed_width() ? minw : -1;
}
void compute_width();
bool recompute_width();
void show_width(std::ostream& os);
std::ostream& print(std::ostream& os, int prio = 0) const;
void replace_with(TypeExpr* te2);
int extract_components(std::vector<TypeExpr*>& comp_list);
bool equals_to(const TypeExpr* rhs) const;
bool has_unknown_inside() const;
static int holes, type_vars;
static TypeExpr* new_hole() {
return new TypeExpr{te_Unknown, ++holes};
}
static TypeExpr* new_hole(int width) {
return new TypeExpr{te_Unknown, ++holes, width};
}
static TypeExpr* new_unit() {
return new TypeExpr{te_Tensor, 0, 0};
}
static TypeExpr* new_atomic(int value) {
return new TypeExpr{te_Atomic, value, 1};
}
static TypeExpr* new_map(TypeExpr* from, TypeExpr* to);
static TypeExpr* new_func() {
return new_map(new_hole(), new_hole());
}
static TypeExpr* new_tensor(std::vector<TypeExpr*> list, bool red = true) {
return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)};
}
static TypeExpr* new_tensor(std::initializer_list<TypeExpr*> list) {
return new TypeExpr{te_Tensor, std::move(list)};
}
static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) {
return new_tensor({te1, te2});
}
static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) {
return new_tensor({te1, te2, te3});
}
static TypeExpr* new_tuple(TypeExpr* arg0) {
return new TypeExpr{te_Tuple, arg0};
}
static TypeExpr* new_tuple(std::vector<TypeExpr*> list, bool red = false) {
return new_tuple(new_tensor(std::move(list), red));
}
static TypeExpr* new_tuple(std::initializer_list<TypeExpr*> list) {
return new_tuple(new_tensor(list));
}
static TypeExpr* new_var() {
return new TypeExpr{te_Var, --type_vars, 1};
}
static TypeExpr* new_var(int idx) {
return new TypeExpr{te_Var, idx, 1};
}
static TypeExpr* new_forall(std::vector<TypeExpr*> list, TypeExpr* body) {
return new TypeExpr{te_ForAll, body, std::move(list)};
}
static TypeExpr* new_forall(std::initializer_list<TypeExpr*> list, TypeExpr* body) {
return new TypeExpr{te_ForAll, body, std::move(list)};
}
static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr);
static std::vector<TypeExpr*> remove_forall(TypeExpr*& te);
static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector<TypeExpr*>& new_vars);
};
std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr);
} // namespace tolk

View file

@ -268,8 +268,6 @@ std::ostream& TypeExpr::print(std::ostream& os, int lex_level) const {
return os << "cont";
case _Tuple:
return os << "tuple";
case _Type:
return os << "type";
default:
return os << "atomic-type-" << value;
}