1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-02-12 19:22:37 +00:00

[Tolk] Compilation pipeline, register global symbols in advance

Since I've implemented AST, now I can drop forward declarations.
Instead, I traverse AST of all files and register global symbols
(functions, constants, global vars) as a separate step, in advance.

That's why, while converting AST to Expr/Op, all available symbols are
already registered.
This greatly simplifies "intermediate state" of yet unknown functions
and checking them afterward.

Redeclaration of local variables (inside the same scope)
is now also prohibited.
This commit is contained in:
tolk-vm 2024-10-31 11:04:58 +04:00
parent 80001d1756
commit 5a3e3595d6
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
28 changed files with 1266 additions and 1134 deletions

View file

@ -676,7 +676,7 @@ int fixed248::pow(int x, int y) inline_ref {
if (sq <= 0) {
return - (sq == 0); ;; underflow
}
int y = expm1_f257(mulrshiftr256(ll, log2_const_f256()));
y = expm1_f257(mulrshiftr256(ll, log2_const_f256()));
return (y ~>> (9 - q)) - (-1 << sq);
}

View file

@ -7,7 +7,12 @@ set(TOLK_SOURCE
compiler-state.cpp
ast.cpp
ast-from-tokens.cpp
ast-to-legacy.cpp
pipe-discover-parse-sources.cpp
pipe-handle-pragmas.cpp
pipe-register-symbols.cpp
pipe-ast-to-legacy.cpp
pipe-find-unused-symbols.cpp
pipe-generate-fif-output.cpp
unify-types.cpp
abscode.cpp
gen-abscode.cpp

View file

@ -103,9 +103,9 @@ static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bits
// TE ::= TA | TA -> TE
// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ]
TypeExpr* parse_type(Lexer& lex, V<ast_forall_list> forall_list);
static TypeExpr* parse_type(Lexer& lex, V<ast_forall_list> forall_list);
TypeExpr* parse_type1(Lexer& lex, V<ast_forall_list> forall_list) {
static TypeExpr* parse_type1(Lexer& lex, V<ast_forall_list> forall_list) {
switch (lex.tok()) {
case tok_int:
lex.next();
@ -165,7 +165,7 @@ TypeExpr* parse_type1(Lexer& lex, V<ast_forall_list> forall_list) {
return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist));
}
TypeExpr* parse_type(Lexer& lex, V<ast_forall_list> forall_list) {
static TypeExpr* parse_type(Lexer& lex, V<ast_forall_list> forall_list) {
TypeExpr* res = parse_type1(lex, forall_list);
if (lex.tok() == tok_mapsto) {
lex.next();
@ -175,13 +175,14 @@ TypeExpr* parse_type(Lexer& lex, V<ast_forall_list> forall_list) {
return res;
}
AnyV parse_argument(Lexer& lex, V<ast_forall_list> forall_list) {
static AnyV parse_argument(Lexer& lex, V<ast_forall_list> forall_list) {
TypeExpr* arg_type = nullptr;
SrcLocation loc = lex.cur_location();
if (lex.tok() == tok_underscore) {
lex.next();
if (lex.tok() == tok_comma || lex.tok() == tok_clpar) {
return createV<ast_argument>(loc, "", TypeExpr::new_hole());
auto v_empty = createV<ast_identifier>(lex.cur_location(), "");
return createV<ast_argument>(loc, v_empty, TypeExpr::new_hole());
}
arg_type = TypeExpr::new_hole();
loc = lex.cur_location();
@ -201,16 +202,17 @@ AnyV parse_argument(Lexer& lex, V<ast_forall_list> forall_list) {
loc = lex.cur_location();
lex.next();
}
return createV<ast_argument>(loc, "", arg_type);
auto v_empty = createV<ast_identifier>(lex.cur_location(), "");
return createV<ast_argument>(loc, v_empty, arg_type);
}
lex.check(tok_identifier, "parameter name");
loc = lex.cur_location();
std::string_view arg_name = lex.cur_str();
auto v_ident = createV<ast_identifier>(lex.cur_location(), lex.cur_str());
lex.next();
return createV<ast_argument>(loc, arg_name, arg_type);
return createV<ast_argument>(loc, v_ident, arg_type);
}
AnyV parse_global_var_declaration(Lexer& lex) {
static AnyV parse_global_var_declaration(Lexer& lex) {
TypeExpr* declared_type = nullptr;
SrcLocation loc = lex.cur_location();
if (lex.tok() == tok_underscore) {
@ -221,14 +223,15 @@ AnyV parse_global_var_declaration(Lexer& lex) {
declared_type = parse_type(lex, nullptr);
}
lex.check(tok_identifier, "global variable name");
std::string_view var_name = lex.cur_str();
auto v_ident = createV<ast_identifier>(lex.cur_location(), lex.cur_str());
lex.next();
return createV<ast_global_var_declaration>(loc, var_name, declared_type);
return createV<ast_global_var_declaration>(loc, v_ident, declared_type);
}
AnyV parse_expr(Lexer& lex);
AnyV parse_constant_declaration(Lexer& lex) {
static AnyV parse_constant_declaration(Lexer& lex) {
SrcLocation loc = lex.cur_location();
TypeExpr *declared_type = nullptr;
if (lex.tok() == tok_int) {
declared_type = TypeExpr::new_atomic(TypeExpr::_Int);
@ -238,15 +241,14 @@ AnyV parse_constant_declaration(Lexer& lex) {
lex.next();
}
lex.check(tok_identifier, "constant name");
SrcLocation loc = lex.cur_location();
std::string_view const_name = lex.cur_str();
auto v_ident = createV<ast_identifier>(lex.cur_location(), lex.cur_str());
lex.next();
lex.expect(tok_assign, "'='");
AnyV init_value = parse_expr(lex);
return createV<ast_constant_declaration>(loc, const_name, declared_type, init_value);
return createV<ast_constant_declaration>(loc, v_ident, declared_type, init_value);
}
AnyV parse_argument_list(Lexer& lex, V<ast_forall_list> forall_list) {
static AnyV parse_argument_list(Lexer& lex, V<ast_forall_list> forall_list) {
SrcLocation loc = lex.cur_location();
std::vector<AnyV> args;
lex.expect(tok_oppar, "argument list");
@ -261,7 +263,7 @@ AnyV parse_argument_list(Lexer& lex, V<ast_forall_list> forall_list) {
return createV<ast_argument_list>(loc, std::move(args));
}
AnyV parse_constant_declaration_list(Lexer& lex) {
static AnyV parse_constant_declaration_list(Lexer& lex) {
std::vector<AnyV> consts;
SrcLocation loc = lex.cur_location();
lex.expect(tok_const, "'const'");
@ -276,7 +278,7 @@ AnyV parse_constant_declaration_list(Lexer& lex) {
return createV<ast_constant_declaration_list>(loc, std::move(consts));
}
AnyV parse_global_var_declaration_list(Lexer& lex) {
static AnyV parse_global_var_declaration_list(Lexer& lex) {
std::vector<AnyV> globals;
SrcLocation loc = lex.cur_location();
lex.expect(tok_global, "'global'");
@ -292,7 +294,7 @@ AnyV parse_global_var_declaration_list(Lexer& lex) {
}
// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _
AnyV parse_expr100(Lexer& lex) {
static AnyV parse_expr100(Lexer& lex) {
SrcLocation loc = lex.cur_location();
if (lex.tok() == tok_oppar) {
lex.next();
@ -402,7 +404,7 @@ AnyV parse_expr100(Lexer& lex) {
}
// parse E { E }
AnyV parse_expr90(Lexer& lex) {
static AnyV parse_expr90(Lexer& lex) {
AnyV res = parse_expr100(lex);
while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && lex.cur_str()[0] != '.' && lex.cur_str()[0] != '~')) {
if (const auto* v_type_expr = res->try_as<ast_type_expression>()) {
@ -417,7 +419,7 @@ AnyV parse_expr90(Lexer& lex) {
}
// parse E { .method E | ~method E }
AnyV parse_expr80(Lexer& lex) {
static AnyV parse_expr80(Lexer& lex) {
AnyV lhs = parse_expr90(lex);
while (lex.tok() == tok_identifier && (lex.cur_str()[0] == '.' || lex.cur_str()[0] == '~')) {
std::string_view method_name = lex.cur_str();
@ -430,7 +432,7 @@ AnyV parse_expr80(Lexer& lex) {
}
// parse [ ~ | - | + ] E
AnyV parse_expr75(Lexer& lex) {
static AnyV parse_expr75(Lexer& lex) {
TokenType t = lex.tok();
if (t == tok_bitwise_not || t == tok_minus || t == tok_plus) {
SrcLocation loc = lex.cur_location();
@ -444,7 +446,7 @@ AnyV parse_expr75(Lexer& lex) {
}
// parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E }
AnyV parse_expr30(Lexer& lex) {
static AnyV parse_expr30(Lexer& lex) {
AnyV lhs = parse_expr75(lex);
TokenType t = lex.tok();
while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || t == tok_divC ||
@ -460,7 +462,7 @@ AnyV parse_expr30(Lexer& lex) {
}
// parse E { (+ | -) E }
AnyV parse_expr20(Lexer& lex) {
static AnyV parse_expr20(Lexer& lex) {
AnyV lhs = parse_expr30(lex);
TokenType t = lex.tok();
while (t == tok_minus || t == tok_plus) {
@ -475,7 +477,7 @@ AnyV parse_expr20(Lexer& lex) {
}
// parse E { ( << | >> | ~>> | ^>> ) E }
AnyV parse_expr17(Lexer& lex) {
static AnyV parse_expr17(Lexer& lex) {
AnyV lhs = parse_expr20(lex);
TokenType t = lex.tok();
while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) {
@ -491,7 +493,7 @@ AnyV parse_expr17(Lexer& lex) {
}
// parse E [ (== | < | > | <= | >= | != | <=> ) E ]
AnyV parse_expr15(Lexer& lex) {
static AnyV parse_expr15(Lexer& lex) {
AnyV lhs = parse_expr17(lex);
TokenType t = lex.tok();
if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) {
@ -505,7 +507,7 @@ AnyV parse_expr15(Lexer& lex) {
}
// parse E { ( & | `|` | ^ ) E }
AnyV parse_expr14(Lexer& lex) {
static AnyV parse_expr14(Lexer& lex) {
AnyV lhs = parse_expr15(lex);
TokenType t = lex.tok();
while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) {
@ -521,7 +523,7 @@ AnyV parse_expr14(Lexer& lex) {
}
// parse E [ ? E : E ]
AnyV parse_expr13(Lexer& lex) {
static AnyV parse_expr13(Lexer& lex) {
AnyV res = parse_expr14(lex);
if (lex.tok() == tok_question) {
SrcLocation loc = lex.cur_location();
@ -535,7 +537,7 @@ AnyV parse_expr13(Lexer& lex) {
}
// parse LE1 (= | += | -= | ... ) E2
AnyV parse_expr10(Lexer& lex) {
static AnyV parse_expr10(Lexer& lex) {
AnyV lhs = parse_expr13(lex);
TokenType t = lex.tok();
if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC ||
@ -555,7 +557,7 @@ AnyV parse_expr(Lexer& lex) {
return parse_expr10(lex);
}
AnyV parse_return_stmt(Lexer& lex) {
static AnyV parse_return_stmt(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_return, "'return'");
AnyV child = parse_expr(lex);
@ -565,7 +567,7 @@ AnyV parse_return_stmt(Lexer& lex) {
AnyV parse_statement(Lexer& lex);
V<ast_sequence> parse_sequence(Lexer& lex) {
static V<ast_sequence> parse_sequence(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_opbrace, "'{'");
std::vector<AnyV> items;
@ -577,7 +579,7 @@ V<ast_sequence> parse_sequence(Lexer& lex) {
return createV<ast_sequence>(loc, loc_end, items);
}
AnyV parse_repeat_statement(Lexer& lex) {
static AnyV parse_repeat_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_repeat, "'repeat'");
AnyV cond = parse_expr(lex);
@ -585,7 +587,7 @@ AnyV parse_repeat_statement(Lexer& lex) {
return createV<ast_repeat_statement>(loc, cond, body);
}
AnyV parse_while_statement(Lexer& lex) {
static AnyV parse_while_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_while, "'while'");
AnyV cond = parse_expr(lex);
@ -593,7 +595,7 @@ AnyV parse_while_statement(Lexer& lex) {
return createV<ast_while_statement>(loc, cond, body);
}
ASTNodeBase* parse_do_until_statement(Lexer& lex) {
static AnyV parse_do_until_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_do, "'do'");
V<ast_sequence> body = parse_sequence(lex);
@ -602,7 +604,7 @@ ASTNodeBase* parse_do_until_statement(Lexer& lex) {
return createV<ast_do_until_statement>(loc, body, cond);
}
AnyV parse_try_catch_statement(Lexer& lex) {
static AnyV parse_try_catch_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_try, "'try'");
V<ast_sequence> try_body = parse_sequence(lex);
@ -612,7 +614,7 @@ AnyV parse_try_catch_statement(Lexer& lex) {
return createV<ast_try_catch_statement>(loc, try_body, catch_expr, catch_body);
}
AnyV parse_if_statement(Lexer& lex, bool is_ifnot) {
static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) {
SrcLocation loc = lex.cur_location();
lex.next();
AnyV cond = parse_expr(lex);
@ -652,8 +654,9 @@ AnyV parse_statement(Lexer& lex) {
case tok_try:
return parse_try_catch_statement(lex);
case tok_semicolon: {
SrcLocation loc = lex.cur_location();
lex.next();
return createV<ast_empty>;
return createV<ast_empty>(loc);
}
default: {
AnyV expr = parse_expr(lex);
@ -663,11 +666,11 @@ AnyV parse_statement(Lexer& lex) {
}
}
AnyV parse_func_body(Lexer& lex) {
static AnyV parse_func_body(Lexer& lex) {
return parse_sequence(lex);
}
AnyV parse_asm_func_body(Lexer& lex, V<ast_argument_list> arg_list) {
static AnyV parse_asm_func_body(Lexer& lex, V<ast_argument_list> arg_list) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_asm, "'asm'");
size_t n_args = arg_list->size();
@ -706,7 +709,7 @@ AnyV parse_asm_func_body(Lexer& lex, V<ast_argument_list> arg_list) {
return createV<ast_asm_body>(loc, std::move(arg_order), std::move(ret_order), std::move(asm_commands));
}
AnyV parse_forall(Lexer& lex) {
static AnyV parse_forall(Lexer& lex) {
SrcLocation loc = lex.cur_location();
std::vector<AnyV> forall_items;
lex.expect(tok_forall, "'forall'");
@ -726,7 +729,7 @@ AnyV parse_forall(Lexer& lex) {
return createV<ast_forall_list>{loc, std::move(forall_items)};
}
AnyV parse_function_declaration(Lexer& lex) {
static AnyV parse_function_declaration(Lexer& lex) {
SrcLocation loc = lex.cur_location();
V<ast_forall_list> forall_list = nullptr;
bool is_get_method = false;
@ -741,7 +744,7 @@ AnyV parse_function_declaration(Lexer& lex) {
}
TypeExpr* ret_type = parse_type(lex, forall_list);
lex.check(tok_identifier, "function name identifier expected");
std::string func_name = static_cast<std::string>(lex.cur_str());
auto v_ident = createV<ast_identifier>(lex.cur_location(), lex.cur_str());
lex.next();
V<ast_argument_list> arg_list = parse_argument_list(lex, forall_list)->as<ast_argument_list>();
bool marked_as_pure = false;
@ -790,13 +793,9 @@ AnyV parse_function_declaration(Lexer& lex) {
if (lex.tok() == tok_builtin) {
is_builtin = true;
body = createV<ast_empty>;
body = createV<ast_empty>(lex.cur_location());
lex.next();
lex.expect(tok_semicolon, "';'");
} else if (lex.tok() == tok_semicolon) {
// todo this is just a prototype, remove this "feature" in the future
lex.next();
body = createV<ast_empty>;
} else if (lex.tok() == tok_opbrace) {
body = parse_func_body(lex);
} else if (lex.tok() == tok_asm) {
@ -805,7 +804,7 @@ AnyV parse_function_declaration(Lexer& lex) {
lex.expect(tok_opbrace, "function body block");
}
auto f_declaration = createV<ast_function_declaration>(loc, func_name, arg_list, body);
auto f_declaration = createV<ast_function_declaration>(loc, v_ident, arg_list, body);
f_declaration->ret_type = ret_type;
f_declaration->forall_list = forall_list;
f_declaration->marked_as_pure = marked_as_pure;
@ -817,7 +816,7 @@ AnyV parse_function_declaration(Lexer& lex) {
return f_declaration;
}
AnyV parse_pragma(Lexer& lex) {
static AnyV parse_pragma(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.next_special(tok_pragma_name, "pragma name");
std::string_view pragma_name = lex.cur_str();
@ -839,7 +838,7 @@ AnyV parse_pragma(Lexer& lex) {
return createV<ast_pragma_no_arg>(loc, pragma_name);
}
AnyV parse_include_statement(Lexer& lex) {
static AnyV parse_include_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_include, "#include");
lex.check(tok_string_const, "source file name");
@ -847,15 +846,14 @@ AnyV parse_include_statement(Lexer& lex) {
if (rel_filename.empty()) {
lex.error("imported file name is an empty string");
}
auto v_str = createV<ast_string_const>(lex.cur_location(), rel_filename, 0);
lex.next();
lex.expect(tok_semicolon, "';'");
return createV<ast_include_statement>(loc, rel_filename);
return createV<ast_include_statement>(loc, v_str);
}
// the main (exported) function
AnyV parse_src_file_to_ast(SrcFile* file) {
file->was_parsed = true;
AnyV parse_src_file_to_ast(const SrcFile* file) {
std::vector<AnyV> toplevel_declarations;
Lexer lex(file);
while (!lex.is_eof()) {
@ -867,6 +865,8 @@ AnyV parse_src_file_to_ast(SrcFile* file) {
toplevel_declarations.push_back(parse_global_var_declaration_list(lex));
} else if (lex.tok() == tok_const) {
toplevel_declarations.push_back(parse_constant_declaration_list(lex));
} else if (lex.tok() == tok_semicolon) {
lex.next(); // don't add op_empty, no need
} else {
toplevel_declarations.push_back(parse_function_declaration(lex));
}

View file

@ -22,6 +22,6 @@ namespace tolk {
struct ASTNodeBase;
const ASTNodeBase* parse_src_file_to_ast(SrcFile* file);
const ASTNodeBase* parse_src_file_to_ast(const SrcFile* file);
} // namespace tolk

View file

@ -96,64 +96,70 @@ class ASTStringifier final : public ASTVisitor {
depth--;
}
static std::string specific_str(AnyV node) {
switch (node->type) {
static std::string specific_str(AnyV v) {
switch (v->type) {
case ast_identifier:
return static_cast<std::string>(node->as<ast_identifier>()->name);
return static_cast<std::string>(v->as<ast_identifier>()->name);
case ast_int_const:
return static_cast<std::string>(node->as<ast_int_const>()->int_val);
return static_cast<std::string>(v->as<ast_int_const>()->int_val);
case ast_string_const:
if (char modifier = node->as<ast_string_const>()->modifier) {
return "\"" + static_cast<std::string>(node->as<ast_string_const>()->str_val) + "\"" + std::string(1, modifier);
if (char modifier = v->as<ast_string_const>()->modifier) {
return "\"" + static_cast<std::string>(v->as<ast_string_const>()->str_val) + "\"" + std::string(1, modifier);
} else {
return "\"" + static_cast<std::string>(node->as<ast_string_const>()->str_val) + "\"";
return "\"" + static_cast<std::string>(v->as<ast_string_const>()->str_val) + "\"";
}
case ast_function_call: {
if (auto v_lhs = v->as<ast_function_call>()->get_called_f()->try_as<ast_identifier>()) {
return static_cast<std::string>(v_lhs->name) + "()";
}
return {};
}
case ast_global_var_declaration:
return static_cast<std::string>(node->as<ast_global_var_declaration>()->var_name);
return static_cast<std::string>(v->as<ast_global_var_declaration>()->get_identifier()->name);
case ast_constant_declaration:
return static_cast<std::string>(node->as<ast_constant_declaration>()->const_name);
return static_cast<std::string>(v->as<ast_constant_declaration>()->get_identifier()->name);
case ast_type_expression: {
std::ostringstream os;
os << node->as<ast_type_expression>()->declared_type;
os << v->as<ast_type_expression>()->declared_type;
return os.str();
}
case ast_variable_declaration: {
std::ostringstream os;
os << node->as<ast_variable_declaration>()->declared_type;
os << v->as<ast_variable_declaration>()->declared_type;
return os.str();
}
case ast_dot_tilde_call:
return static_cast<std::string>(node->as<ast_dot_tilde_call>()->method_name);
return static_cast<std::string>(v->as<ast_dot_tilde_call>()->method_name);
case ast_unary_operator:
return static_cast<std::string>(node->as<ast_unary_operator>()->operator_name);
return static_cast<std::string>(v->as<ast_unary_operator>()->operator_name);
case ast_binary_operator:
return static_cast<std::string>(node->as<ast_binary_operator>()->operator_name);
return static_cast<std::string>(v->as<ast_binary_operator>()->operator_name);
case ast_sequence:
return "" + std::to_string(node->as<ast_sequence>()->get_items().size());
return "" + std::to_string(v->as<ast_sequence>()->get_items().size());
case ast_if_statement:
return node->as<ast_if_statement>()->is_ifnot ? "ifnot" : "";
return v->as<ast_if_statement>()->is_ifnot ? "ifnot" : "";
case ast_argument: {
std::ostringstream os;
os << node->as<ast_argument>()->arg_type;
return static_cast<std::string>(node->as<ast_argument>()->arg_name) + ": " + os.str();
os << v->as<ast_argument>()->arg_type;
return static_cast<std::string>(v->as<ast_argument>()->get_identifier()->name) + ": " + os.str();
}
case ast_function_declaration: {
std::string arg_names;
for (int i = 0; i < node->as<ast_function_declaration>()->get_num_args(); i++) {
for (int i = 0; i < v->as<ast_function_declaration>()->get_num_args(); i++) {
if (!arg_names.empty())
arg_names += ",";
arg_names += node->as<ast_function_declaration>()->get_arg(i)->arg_name;
arg_names += v->as<ast_function_declaration>()->get_arg(i)->get_identifier()->name;
}
return "fun " + node->as<ast_function_declaration>()->name + "(" + arg_names + ")";
return "fun " + static_cast<std::string>(v->as<ast_function_declaration>()->get_identifier()->name) + "(" + arg_names + ")";
}
case ast_pragma_no_arg:
return static_cast<std::string>(node->as<ast_pragma_no_arg>()->pragma_name);
return static_cast<std::string>(v->as<ast_pragma_no_arg>()->pragma_name);
case ast_pragma_version:
return static_cast<std::string>(node->as<ast_pragma_version>()->semver);
return static_cast<std::string>(v->as<ast_pragma_version>()->semver);
case ast_include_statement:
return static_cast<std::string>(node->as<ast_include_statement>()->file_name);
return static_cast<std::string>(v->as<ast_include_statement>()->get_file_leaf()->str_val);
case ast_tolk_file:
return node->as<ast_tolk_file>()->file->rel_filename;
return v->as<ast_tolk_file>()->file->rel_filename;
default:
return {};
}

View file

@ -1,28 +0,0 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "ast.h"
namespace tolk {
struct SrcFile;
void process_file_ast(AnyV file_ast);
} // namespace tolk

View file

@ -148,52 +148,4 @@ public:
}
};
class ASTVisitorToplevelDeclarations : public ASTVisitor {
protected:
using parent = ASTVisitorToplevelDeclarations;
virtual void on_pragma_no_arg(V<ast_pragma_no_arg> v) = 0;
virtual void on_pragma_version(V<ast_pragma_version> v) = 0;
virtual void on_include_statement(V<ast_include_statement> v) = 0;
virtual void on_constant_declaration(V<ast_constant_declaration> v) = 0;
virtual void on_global_var_declaration(V<ast_global_var_declaration> v) = 0;
virtual void on_function_declaration(V<ast_function_declaration> v) = 0;
void visit(AnyV v) final {
switch (v->type) {
case ast_pragma_no_arg:
on_pragma_no_arg(v->as<ast_pragma_no_arg>());
break;
case ast_pragma_version:
on_pragma_version(v->as<ast_pragma_version>());
break;
case ast_include_statement:
on_include_statement(v->as<ast_include_statement>());
break;
case ast_constant_declaration_list:
for (const auto& v_decl : v->as<ast_constant_declaration_list>()->get_declarations()) {
on_constant_declaration(v_decl->as<ast_constant_declaration>());
}
break;
case ast_global_var_declaration_list:
for (const auto& v_decl : v->as<ast_global_var_declaration_list>()->get_declarations()) {
on_global_var_declaration(v_decl->as<ast_global_var_declaration>());
}
break;
case ast_function_declaration:
on_function_declaration(v->as<ast_function_declaration>());
break;
default:
throw UnexpectedASTNodeType(v, "ASTVisitorToplevelDeclarations::visit");
}
}
public:
void start_visiting_file(V<ast_tolk_file> v_file) {
for (AnyV v : v_file->get_toplevel_declarations()) {
visit(v);
}
}
};
} // namespace tolk

View file

@ -60,11 +60,15 @@ int Vertex<ast_forall_list>::lookup_idx(std::string_view nameT) const {
int Vertex<ast_argument_list>::lookup_idx(std::string_view arg_name) const {
for (size_t idx = 0; idx < children.size(); ++idx) {
if (children[idx] && children[idx]->as<ast_argument>()->arg_name == arg_name) {
if (children[idx] && children[idx]->as<ast_argument>()->get_identifier()->name == arg_name) {
return static_cast<int>(idx);
}
}
return -1;
}
void Vertex<ast_include_statement>::mutate_set_src_file(const SrcFile* file) const {
const_cast<Vertex*>(this)->file = file;
}
} // namespace tolk

View file

@ -27,7 +27,7 @@
* Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on.
* There was no way to perform any more or less semantic analysis.
* In Tolk, I've implemented parsing .tolk files into AST at first, and then converting this AST
* into legacy representation (see ast-to-legacy.cpp).
* into legacy representation (see pipe-ast-to-legacy.cpp).
* In the future, more and more code analysis will be moved out of legacy to AST-level.
*
* From the user's point of view, all AST vertices are constant. All API is based on constancy.
@ -206,8 +206,8 @@ public:
template<>
struct Vertex<ast_empty> final : ASTNodeLeaf {
Vertex()
: ASTNodeLeaf(ast_empty, SrcLocation()) {}
explicit Vertex(SrcLocation loc)
: ASTNodeLeaf(ast_empty, loc) {}
};
template<>
@ -268,12 +268,13 @@ struct Vertex<ast_parenthesized_expr> final : ASTNodeUnary {
};
template<>
struct Vertex<ast_global_var_declaration> final : ASTNodeLeaf {
std::string_view var_name;
struct Vertex<ast_global_var_declaration> final : ASTNodeUnary {
TypeExpr* declared_type; // may be nullptr
Vertex(SrcLocation loc, std::string_view var_name, TypeExpr* declared_type)
: ASTNodeLeaf(ast_global_var_declaration, loc), var_name(var_name), declared_type(declared_type) {}
auto get_identifier() const { return child->as<ast_identifier>(); }
Vertex(SrcLocation loc, V<ast_identifier> var_identifier, TypeExpr* declared_type)
: ASTNodeUnary(ast_global_var_declaration, loc, var_identifier), declared_type(declared_type) {}
};
template<>
@ -285,14 +286,14 @@ struct Vertex<ast_global_var_declaration_list> final : ASTNodeVararg {
};
template<>
struct Vertex<ast_constant_declaration> final : ASTNodeUnary {
std::string_view const_name;
struct Vertex<ast_constant_declaration> final : ASTNodeBinary {
TypeExpr* declared_type; // may be nullptr
AnyV get_init_value() const { return child; }
auto get_identifier() const { return lhs->as<ast_identifier>(); }
AnyV get_init_value() const { return rhs; }
Vertex(SrcLocation loc, std::string_view const_name, TypeExpr* declared_type, AnyV init_value)
: ASTNodeUnary(ast_constant_declaration, loc, init_value), const_name(const_name), declared_type(declared_type) {}
Vertex(SrcLocation loc, V<ast_identifier> const_identifier, TypeExpr* declared_type, AnyV init_value)
: ASTNodeBinary(ast_constant_declaration, loc, const_identifier, init_value), declared_type(declared_type) {}
};
template<>
@ -478,12 +479,13 @@ struct Vertex<ast_forall_list> final : ASTNodeVararg {
};
template<>
struct Vertex<ast_argument> final : ASTNodeLeaf {
std::string_view arg_name;
struct Vertex<ast_argument> final : ASTNodeUnary {
TypeExpr* arg_type;
Vertex(SrcLocation loc, std::string_view arg_name, TypeExpr* arg_type)
: ASTNodeLeaf(ast_argument, loc), arg_name(arg_name), arg_type(arg_type) {}
auto get_identifier() const { return child->as<ast_identifier>(); }
Vertex(SrcLocation loc, V<ast_identifier> arg_identifier, TypeExpr* arg_type)
: ASTNodeUnary(ast_argument, loc, arg_identifier), arg_type(arg_type) {}
};
template<>
@ -509,13 +511,13 @@ struct Vertex<ast_argument_list> final : ASTNodeVararg {
};
template<>
struct Vertex<ast_function_declaration> final : ASTNodeBinary {
int get_num_args() const { return lhs->as<ast_argument_list>()->size(); }
auto get_arg_list() const { return lhs->as<ast_argument_list>(); }
auto get_arg(int i) const { return lhs->as<ast_argument_list>()->get_arg(i); }
AnyV get_body() const { return rhs; } // ast_sequence / ast_asm_body / ast_empty
struct Vertex<ast_function_declaration> final : ASTNodeVararg {
auto get_identifier() const { return children.at(0)->as<ast_identifier>(); }
int get_num_args() const { return children.at(1)->as<ast_argument_list>()->size(); }
auto get_arg_list() const { return children.at(1)->as<ast_argument_list>(); }
auto get_arg(int i) const { return children.at(1)->as<ast_argument_list>()->get_arg(i); }
AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body
std::string name;
TypeExpr* ret_type = nullptr;
V<ast_forall_list> forall_list = nullptr;
bool marked_as_pure = false;
@ -525,8 +527,10 @@ struct Vertex<ast_function_declaration> final : ASTNodeBinary {
bool marked_as_inline_ref = false;
V<ast_int_const> method_id = nullptr;
Vertex(SrcLocation loc, std::string name, V<ast_argument_list> args, AnyV body)
: ASTNodeBinary(ast_function_declaration, loc, args, body), name(std::move(name)) {}
bool is_asm_function() const { return children.at(2)->type == ast_asm_body; }
Vertex(SrcLocation loc, V<ast_identifier> name_identifier, V<ast_argument_list> args, AnyV body)
: ASTNodeVararg(ast_function_declaration, loc, {name_identifier, args, body}) {}
};
template<>
@ -547,11 +551,17 @@ struct Vertex<ast_pragma_version> final : ASTNodeLeaf {
};
template<>
struct Vertex<ast_include_statement> final : ASTNodeLeaf {
std::string_view file_name;
struct Vertex<ast_include_statement> final : ASTNodeUnary {
const SrcFile* file = nullptr; // assigned after includes have been resolved
Vertex(SrcLocation loc, std::string_view file_name)
: ASTNodeLeaf(ast_include_statement, loc), file_name(file_name) {}
auto get_file_leaf() const { return child->as<ast_string_const>(); }
std::string get_file_name() const { return static_cast<std::string>(child->as<ast_string_const>()->str_val); }
void mutate_set_src_file(const SrcFile* file) const;
Vertex(SrcLocation loc, V<ast_string_const> file_name)
: ASTNodeUnary(ast_include_statement, loc, file_name) {}
};
template<>

View file

@ -27,11 +27,8 @@ using namespace std::literals::string_literals;
*/
SymDef* define_builtin_func_impl(const std::string& name, SymValAsmFunc* func_val) {
if (name.back() == '_') {
G.prohibited_var_names.insert(name);
}
sym_idx_t name_idx = G.symbols.lookup(name, 1);
SymDef* def = define_global_symbol(name_idx, true);
sym_idx_t name_idx = G.symbols.lookup_add(name);
SymDef* def = define_global_symbol(name_idx);
if (!def) {
std::cerr << "fatal: global function `" << name << "` already defined" << std::endl;
std::exit(1);

View file

@ -74,12 +74,9 @@ struct CompilerState {
std::vector<std::pair<int, SymDef>> symbol_stack;
std::vector<SrcLocation> scope_opened_at;
std::vector<SymDef*> all_code_functions, all_global_vars, all_get_methods, all_constants;
AllRegisteredSrcFiles all_src_files;
int glob_func_cnt = 0, glob_var_cnt = 0, const_cnt = 0;
std::vector<SymDef*> glob_func, glob_vars, glob_get_methods;
std::set<std::string> prohibited_var_names;
std::string generated_from;
GlobalPragma pragma_allow_post_modification{"allow-post-modification"};
GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"};

View file

@ -185,10 +185,6 @@ int Expr::predefine_vars() {
case _Var:
if (!sym) {
tolk_assert(val < 0 && here.is_defined());
if (G.prohibited_var_names.count(G.symbols.get_name(~val))) {
throw ParseError{
here, PSTRING() << "symbol `" << G.symbols.get_name(~val) << "` cannot be redefined as a variable"};
}
sym = define_symbol(~val, false, here);
// std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl;
if (!sym) {
@ -319,7 +315,13 @@ std::vector<var_idx_t> Expr::pre_compile(CodeBlob& code, std::vector<std::pair<S
SymDef* applied_sym = sym;
auto func = dynamic_cast<SymValFunc*>(applied_sym->value);
// replace `beginCell()` with `begin_cell()`
// todo it should be done at AST level, see comment above detect_if_function_just_wraps_another()
if (func && func->is_just_wrapper_for_another_f()) {
// todo currently, f is inlined only if anotherF is declared (and processed) before
if (!dynamic_cast<SymValCodeFunc*>(func)->code) { // if anotherF is processed after
func->flags |= SymValFunc::flagUsedAsNonCall;
res = pre_compile_tensor(args, code, lval_globs);
} else {
// body is { Op::_Import; Op::_Call; Op::_Return; }
const std::unique_ptr<Op>& op_call = dynamic_cast<SymValCodeFunc*>(func)->code->ops->next;
applied_sym = op_call->fun_ref;
@ -331,6 +333,7 @@ std::vector<var_idx_t> Expr::pre_compile(CodeBlob& code, std::vector<std::pair<S
for (var_idx_t right_idx : op_call->right) {
res.emplace_back(res_inner[right_idx]);
}
}
} else {
res = pre_compile_tensor(args, code, lval_globs);
}

View file

@ -629,7 +629,7 @@ void lexer_init() {
// Hence, it's difficult to measure Lexer performance separately.
// This function can be called just to tick Lexer performance, it just scans all input files.
// There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful.
void lexer_measure_performance(const std::vector<SrcFile*>& files_to_just_parse) {
void lexer_measure_performance(const AllSrcFiles& files_to_just_parse) {
for (const SrcFile* file : files_to_just_parse) {
Lexer lex(file);
while (!lex.is_eof()) {

View file

@ -234,6 +234,6 @@ public:
void lexer_init();
// todo #ifdef TOLK_PROFILING
void lexer_measure_performance(const std::vector<SrcFile*>& files_to_just_parse);
void lexer_measure_performance(const AllSrcFiles& files_to_just_parse);
} // namespace tolk

View file

@ -14,18 +14,15 @@
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ast-to-legacy.h"
#include "ast.h"
#include "ast-visitor.h"
#include "ast-from-tokens.h" // todo should be deleted
#include "compiler-state.h"
#include "src-file.h"
#include "tolk.h"
#include "td/utils/crypto.h"
#include "src-file.h"
#include "ast.h"
#include "compiler-state.h"
#include "common/refint.h"
#include "openssl/digest.hpp"
#include "block/block.h"
#include "block-parse.h"
#include "td/utils/crypto.h"
/*
* In this module, we convert modern AST representation to legacy representation
@ -40,35 +37,12 @@ static int calc_sym_idx(std::string_view sym_name) {
return G.symbols.lookup_add(sym_name);
}
Expr* process_expr(AnyV v, CodeBlob& code, bool nv = false);
static SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) {
SymValCodeFunc* res = new SymValCodeFunc{G.glob_func_cnt, func_type, marked_as_pure};
#ifdef TOLK_DEBUG
res->name = func_sym->name();
#endif
func_sym->value = res;
G.glob_func.push_back(func_sym);
G.glob_func_cnt++;
return res;
}
static bool check_global_func(SrcLocation loc, sym_idx_t func_name) {
SymDef* def = lookup_symbol(func_name);
if (!def) {
static void check_global_func(SrcLocation loc, sym_idx_t func_name) {
SymDef* sym_def = lookup_symbol(func_name);
if (!sym_def) {
throw ParseError(loc, "undefined symbol `" + G.symbols.get_name(func_name) + "`");
return false;
}
SymVal* val = dynamic_cast<SymVal*>(def->value);
if (!val) {
throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no value and no type");
return false;
} else if (!val->get_type()) {
throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no type, possibly not a function");
return false;
} else {
return true;
}
}
@ -103,7 +77,7 @@ static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_
}
}
Expr* process_expr(V<ast_binary_operator> v, CodeBlob& code, bool nv) {
static Expr* process_expr(V<ast_binary_operator> v, CodeBlob& code, bool nv) {
TokenType t = v->tok;
std::string operator_name = static_cast<std::string>(v->operator_name);
@ -114,7 +88,6 @@ Expr* process_expr(V<ast_binary_operator> v, CodeBlob& code, bool nv) {
x->chk_lvalue();
x->chk_rvalue();
sym_idx_t name = G.symbols.lookup_add("^_" + operator_name + "_");
check_global_func(v->loc, name);
Expr* y = process_expr(v->get_rhs(), code, false);
y->chk_rvalue();
Expr* z = new Expr{Expr::_Apply, name, {x, y}};
@ -152,7 +125,6 @@ Expr* process_expr(V<ast_binary_operator> v, CodeBlob& code, bool nv) {
Expr* res = process_expr(v->get_lhs(), code, nv);
res->chk_rvalue();
sym_idx_t name = G.symbols.lookup_add("_" + operator_name + "_");
check_global_func(v->loc, name);
Expr* x = process_expr(v->get_rhs(), code, false);
x->chk_rvalue();
res = new Expr{Expr::_Apply, name, {res, x}};
@ -166,10 +138,9 @@ Expr* process_expr(V<ast_binary_operator> v, CodeBlob& code, bool nv) {
v->error("unsupported binary operator");
}
Expr* process_expr(V<ast_unary_operator> v, CodeBlob& code) {
static Expr* process_expr(V<ast_unary_operator> v, CodeBlob& code) {
TokenType t = v->tok;
sym_idx_t name = G.symbols.lookup_add(static_cast<std::string>(v->operator_name) + "_");
check_global_func(v->loc, name);
Expr* x = process_expr(v->get_rhs(), code, false);
x->chk_rvalue();
@ -200,7 +171,7 @@ Expr* process_expr(V<ast_unary_operator> v, CodeBlob& code) {
return res;
}
Expr* process_expr(V<ast_dot_tilde_call> v, CodeBlob& code, bool nv) {
static Expr* process_expr(V<ast_dot_tilde_call> v, CodeBlob& code, bool nv) {
Expr* res = process_expr(v->get_lhs(), code, nv);
bool modify = v->method_name[0] == '~';
Expr* obj = res;
@ -209,23 +180,20 @@ Expr* process_expr(V<ast_dot_tilde_call> v, CodeBlob& code, bool nv) {
} else {
obj->chk_rvalue();
}
sym_idx_t name = calc_sym_idx(v->method_name);
const SymDef* sym = lookup_symbol(name);
sym_idx_t name_idx = calc_sym_idx(v->method_name);
const SymDef* sym = lookup_symbol(name_idx);
if (!sym || !dynamic_cast<SymValFunc*>(sym->value)) {
sym_idx_t name1 = G.symbols.lookup(v->method_name.substr(1));
if (name1) {
const SymDef* sym1 = lookup_symbol(name1);
if (sym1 && dynamic_cast<SymValFunc*>(sym1->value)) {
name = name1;
name_idx = name1;
sym = sym1;
}
}
}
check_global_func(v->loc, name);
if (G.is_verbosity(2)) {
std::cerr << "using symbol `" << G.symbols.get_name(name) << "` for method call of " << v->method_name << std::endl;
}
sym = lookup_symbol(name);
check_global_func(v->loc, name_idx);
sym = lookup_symbol(name_idx);
SymValFunc* val = sym ? dynamic_cast<SymValFunc*>(sym->value) : nullptr;
if (!val) {
v->error("undefined method call");
@ -233,10 +201,10 @@ Expr* process_expr(V<ast_dot_tilde_call> v, CodeBlob& code, bool nv) {
Expr* x = process_expr(v->get_arg(), code, false);
x->chk_rvalue();
if (x->cls == Expr::_Tensor) {
res = new Expr{Expr::_Apply, name, {obj}};
res = new Expr{Expr::_Apply, name_idx, {obj}};
res->args.insert(res->args.end(), x->args.begin(), x->args.end());
} else {
res = new Expr{Expr::_Apply, name, {obj, x}};
res = new Expr{Expr::_Apply, name_idx, {obj, x}};
}
res->here = v->loc;
res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure);
@ -246,13 +214,13 @@ Expr* process_expr(V<ast_dot_tilde_call> v, CodeBlob& code, bool nv) {
res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}};
res->here = v->loc;
res->flags = tmp->flags;
res->set_val(name);
res->set_val(name_idx);
res->deduce_type();
}
return res;
}
Expr* process_expr(V<ast_ternary_operator> v, CodeBlob& code, bool nv) {
static Expr* process_expr(V<ast_ternary_operator> v, CodeBlob& code, bool nv) {
Expr* cond = process_expr(v->get_cond(), code, nv);
cond->chk_rvalue();
Expr* x = process_expr(v->get_when_true(), code, false);
@ -266,7 +234,7 @@ Expr* process_expr(V<ast_ternary_operator> v, CodeBlob& code, bool nv) {
return res;
}
Expr* process_expr(V<ast_function_call> v, CodeBlob& code, bool nv) {
static Expr* process_expr(V<ast_function_call> v, CodeBlob& code, bool nv) {
Expr* res = process_expr(v->get_called_f(), code, nv);
Expr* x = process_expr(v->get_called_arg(), code, false);
x->chk_rvalue();
@ -276,7 +244,7 @@ Expr* process_expr(V<ast_function_call> v, CodeBlob& code, bool nv) {
return res;
}
Expr* process_expr(V<ast_tensor> v, CodeBlob& code, bool nv) {
static Expr* process_expr(V<ast_tensor> v, CodeBlob& code, bool nv) {
if (v->empty()) {
Expr* res = new Expr{Expr::_Tensor, {}};
res->flags = Expr::_IsRvalue;
@ -302,7 +270,7 @@ Expr* process_expr(V<ast_tensor> v, CodeBlob& code, bool nv) {
return res;
}
Expr* process_expr(V<ast_variable_declaration> v, CodeBlob& code) {
static Expr* process_expr(V<ast_variable_declaration> v, CodeBlob& code) {
Expr* x = process_expr(v->get_variable_or_list(), code, true);
x->chk_lvalue(); // chk_lrvalue() ?
Expr* res = new Expr{Expr::_TypeApply, {x}};
@ -320,7 +288,7 @@ Expr* process_expr(V<ast_variable_declaration> v, CodeBlob& code) {
return res;
}
Expr* process_expr(V<ast_tensor_square> v, CodeBlob& code, bool nv) {
static Expr* process_expr(V<ast_tensor_square> v, CodeBlob& code, bool nv) {
if (v->empty()) {
Expr* res = new Expr{Expr::_Tensor, {}};
res->flags = Expr::_IsRvalue;
@ -354,7 +322,7 @@ Expr* process_expr(V<ast_tensor_square> v, CodeBlob& code, bool nv) {
return res;
}
Expr* process_expr(V<ast_int_const> v) {
static Expr* process_expr(V<ast_int_const> v) {
Expr* res = new Expr{Expr::_Const, v->loc};
res->flags = Expr::_IsRvalue;
res->intval = td::string_to_int256(static_cast<std::string>(v->int_val));
@ -365,7 +333,7 @@ Expr* process_expr(V<ast_int_const> v) {
return res;
}
Expr* process_expr(V<ast_string_const> v) {
static Expr* process_expr(V<ast_string_const> v) {
std::string str = static_cast<std::string>(v->str_val);
Expr* res;
switch (v->modifier) {
@ -432,12 +400,12 @@ Expr* process_expr(V<ast_string_const> v) {
break;
}
default:
__builtin_unreachable();
tolk_assert(false);
}
return res;
}
Expr* process_expr(V<ast_bool_const> v) {
static Expr* process_expr(V<ast_bool_const> v) {
SymDef* sym = lookup_symbol(calc_sym_idx(v->bool_val ? "true" : "false"));
tolk_assert(sym);
Expr* res = new Expr{Expr::_Apply, sym, {}};
@ -446,7 +414,7 @@ Expr* process_expr(V<ast_bool_const> v) {
return res;
}
Expr* process_expr([[maybe_unused]] V<ast_nil_tuple> v) {
static Expr* process_expr([[maybe_unused]] V<ast_nil_tuple> v) {
SymDef* sym = lookup_symbol(calc_sym_idx("nil"));
tolk_assert(sym);
Expr* res = new Expr{Expr::_Apply, sym, {}};
@ -455,8 +423,15 @@ Expr* process_expr([[maybe_unused]] V<ast_nil_tuple> v) {
return res;
}
Expr* process_expr(V<ast_identifier> v, bool nv) {
static Expr* process_expr(V<ast_identifier> v, bool nv) {
SymDef* sym = lookup_symbol(calc_sym_idx(v->name));
if (nv && sym) {
if (sym->level != G.scope_level) {
sym = nullptr; // declaring a new variable with the same name, but in another scope
} else {
v->error("redeclaration of local variable `" + static_cast<std::string>(v->name) + "`");
}
}
if (sym && dynamic_cast<SymValGlobVar*>(sym->value)) {
check_import_exists_when_using_sym(v, sym);
auto val = dynamic_cast<SymValGlobVar*>(sym->value);
@ -587,7 +562,7 @@ void combine_parallel(val& x, const val y) {
}
} // namespace blk_fl
blk_fl::val process_vertex(V<ast_return_statement> v, CodeBlob& code) {
static blk_fl::val process_vertex(V<ast_return_statement> v, CodeBlob& code) {
Expr* expr = process_expr(v->get_return_value(), code);
expr->chk_rvalue();
try {
@ -604,7 +579,7 @@ blk_fl::val process_vertex(V<ast_return_statement> v, CodeBlob& code) {
return blk_fl::ret;
}
void append_implicit_ret_stmt(V<ast_sequence> v, CodeBlob& code) {
static void append_implicit_ret_stmt(V<ast_sequence> v, CodeBlob& code) {
TypeExpr* ret_type = TypeExpr::new_unit();
try {
// std::cerr << "in implicit return: ";
@ -620,7 +595,7 @@ void append_implicit_ret_stmt(V<ast_sequence> v, CodeBlob& code) {
blk_fl::val process_stmt(AnyV v, CodeBlob& code);
blk_fl::val process_vertex(V<ast_sequence> v, CodeBlob& code, bool no_new_scope = false) {
static blk_fl::val process_vertex(V<ast_sequence> v, CodeBlob& code, bool no_new_scope = false) {
if (!no_new_scope) {
open_scope(v->loc);
}
@ -639,7 +614,7 @@ blk_fl::val process_vertex(V<ast_sequence> v, CodeBlob& code, bool no_new_scope
return res;
}
blk_fl::val process_vertex(V<ast_repeat_statement> v, CodeBlob& code) {
static blk_fl::val process_vertex(V<ast_repeat_statement> v, CodeBlob& code) {
Expr* expr = process_expr(v->get_cond(), code);
expr->chk_rvalue();
auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int);
@ -661,7 +636,7 @@ blk_fl::val process_vertex(V<ast_repeat_statement> v, CodeBlob& code) {
return res | blk_fl::end;
}
blk_fl::val process_vertex(V<ast_while_statement> v, CodeBlob& code) {
static blk_fl::val process_vertex(V<ast_while_statement> v, CodeBlob& code) {
Expr* expr = process_expr(v->get_cond(), code);
expr->chk_rvalue();
auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int);
@ -685,7 +660,7 @@ blk_fl::val process_vertex(V<ast_while_statement> v, CodeBlob& code) {
return res1 | blk_fl::end;
}
blk_fl::val process_vertex(V<ast_do_until_statement> v, CodeBlob& code) {
static blk_fl::val process_vertex(V<ast_do_until_statement> v, CodeBlob& code) {
Op& until_op = code.emplace_back(v->loc, Op::_Until);
code.push_set_cur(until_op.block0);
open_scope(v->loc);
@ -709,7 +684,7 @@ blk_fl::val process_vertex(V<ast_do_until_statement> v, CodeBlob& code) {
return res & ~blk_fl::empty;
}
blk_fl::val process_vertex(V<ast_try_catch_statement> v, CodeBlob& code) {
static blk_fl::val process_vertex(V<ast_try_catch_statement> v, CodeBlob& code) {
code.require_callxargs = true;
Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch);
code.push_set_cur(try_catch_op.block0);
@ -738,7 +713,7 @@ blk_fl::val process_vertex(V<ast_try_catch_statement> v, CodeBlob& code) {
return res0;
}
blk_fl::val process_vertex(V<ast_if_statement> v, CodeBlob& code, TokenType first_lex = tok_if) {
static blk_fl::val process_vertex(V<ast_if_statement> v, CodeBlob& code) {
Expr* expr = process_expr(v->get_cond(), code);
expr->chk_rvalue();
auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int);
@ -795,14 +770,11 @@ blk_fl::val process_stmt(AnyV v, CodeBlob& code) {
}
}
FormalArg process_vertex(V<ast_argument> v, int fa_idx) {
if (v->arg_name.empty()) {
static FormalArg process_vertex(V<ast_argument> v, int fa_idx) {
if (v->get_identifier()->name.empty()) {
return std::make_tuple(v->arg_type, (SymDef*)nullptr, v->loc);
}
if (G.prohibited_var_names.count(static_cast<std::string>(v->arg_name))) {
v->error("symbol `" + static_cast<std::string>(v->arg_name) + "` cannot be redefined as a variable");
}
SymDef* new_sym_def = define_symbol(calc_sym_idx(v->arg_name), true, v->loc);
SymDef* new_sym_def = define_symbol(calc_sym_idx(v->get_identifier()->name), true, v->loc);
if (!new_sym_def) {
v->error("cannot define symbol");
}
@ -813,16 +785,22 @@ FormalArg process_vertex(V<ast_argument> v, int fa_idx) {
return std::make_tuple(v->arg_type, new_sym_def, v->loc);
}
CodeBlob* process_vertex(V<ast_sequence> v_body, V<ast_argument_list> arg_list, TypeExpr* ret_type, bool marked_as_pure) {
CodeBlob* blob = new CodeBlob{ret_type};
if (marked_as_pure) {
static void convert_function_body_to_CodeBlob(V<ast_function_declaration> v, V<ast_sequence> v_body) {
SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name));
SymValCodeFunc* sym_val = dynamic_cast<SymValCodeFunc*>(sym_def->value);
tolk_assert(sym_val != nullptr);
open_scope(v->loc);
CodeBlob* blob = new CodeBlob{static_cast<std::string>(v->get_identifier()->name), v->loc, v->ret_type};
if (v->marked_as_pure) {
blob->flags |= CodeBlob::_ForbidImpure;
}
FormalArgList legacy_arg_list;
for (int i = 0; i < arg_list->size(); ++i) {
legacy_arg_list.emplace_back(process_vertex(arg_list->get_arg(i), i));
for (int i = 0; i < v->get_num_args(); ++i) {
legacy_arg_list.emplace_back(process_vertex(v->get_arg(i), i));
}
blob->import_params(std::move(legacy_arg_list));
blk_fl::val res = blk_fl::init;
bool warned = false;
for (AnyV item : v_body->get_items()) {
@ -835,69 +813,24 @@ CodeBlob* process_vertex(V<ast_sequence> v_body, V<ast_argument_list> arg_list,
if (res & blk_fl::end) {
append_implicit_ret_stmt(v_body, *blob);
}
blob->close_blk(v_body->loc_end);
return blob;
close_scope();
sym_val->set_code(blob);
}
SymValAsmFunc* process_vertex(V<ast_asm_body> v_body, TypeExpr* func_type, V<ast_argument_list> arg_list, TypeExpr* ret_type,
bool marked_as_pure) {
int cnt = arg_list->size();
int width = ret_type->get_width();
if (width < 0 || width > 16) {
v_body->error("return type of an assembler built-in function must have a well-defined fixed width");
}
if (cnt > 16) {
v_body->error("assembler built-in function must have at most 16 arguments");
}
std::vector<int> cum_arg_width;
cum_arg_width.push_back(0);
int tot_width = 0;
for (int i = 0; i < cnt; ++i) {
V<ast_argument> arg = arg_list->get_arg(i);
int arg_width = arg->arg_type->get_width();
if (arg_width < 0 || arg_width > 16) {
arg->error("parameters of an assembler built-in function must have a well-defined fixed width");
}
cum_arg_width.push_back(tot_width += arg_width);
}
static void convert_asm_body_to_AsmOp(V<ast_function_declaration> v, V<ast_asm_body> v_body) {
SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name));
SymValAsmFunc* sym_val = dynamic_cast<SymValAsmFunc*>(sym_def->value);
tolk_assert(sym_val != nullptr);
int cnt = v->get_num_args();
int width = v->ret_type->get_width();
std::vector<AsmOp> asm_ops;
std::vector<int> arg_order, ret_order;
if (!v_body->arg_order.empty()) {
if (static_cast<int>(v_body->arg_order.size()) != cnt) {
v_body->error("arg_order of asm function must specify all arguments");
}
std::vector<bool> visited(cnt, false);
for (int i = 0; i < cnt; ++i) {
int j = v_body->arg_order[i];
if (visited[j]) {
v_body->error("arg_order of asm function contains duplicates");
}
visited[j] = true;
int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1];
while (c1 < c2) {
arg_order.push_back(c1++);
}
}
tolk_assert(arg_order.size() == (unsigned)tot_width);
}
if (!v_body->ret_order.empty()) {
if (static_cast<int>(v_body->ret_order.size()) != width) {
v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width));
}
std::vector<bool> visited(width, false);
for (int i = 0; i < width; ++i) {
int j = v_body->ret_order[i];
if (j < 0 || j >= width || visited[j]) {
v_body->error("ret_order contains invalid integer, not in range 0 .. width-1");
}
visited[j] = true;
}
ret_order = v_body->ret_order;
}
for (AnyV v_child : v_body->get_asm_commands()) {
std::string_view ops = v_child->as<ast_string_const>()->str_val; // <op>\n<op>\n...
std::string op;
for (const char& c : ops) {
for (char c : ops) {
if (c == '\n' || c == '\r') {
if (!op.empty()) {
asm_ops.push_back(AsmOp::Parse(op, cnt, width));
@ -917,522 +850,31 @@ SymValAsmFunc* process_vertex(V<ast_asm_body> v_body, TypeExpr* func_type, V<ast
}
}
}
std::string crc_s;
for (const AsmOp& asm_op : asm_ops) {
crc_s += asm_op.op;
}
crc_s.push_back(!marked_as_pure);
for (const int& x : arg_order) {
crc_s += std::string((const char*) (&x), (const char*) (&x + 1));
}
for (const int& x : ret_order) {
crc_s += std::string((const char*) (&x), (const char*) (&x + 1));
}
auto res = new SymValAsmFunc{func_type, std::move(asm_ops), marked_as_pure};
res->arg_order = std::move(arg_order);
res->ret_order = std::move(ret_order);
res->crc = td::crc64(crc_s);
return res;
sym_val->set_code(std::move(asm_ops));
}
// if a function looks like `T f(...args) { return anotherF(...args); }`,
// set a bit to flags
// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)`
void detect_if_function_just_wraps_another(SymValCodeFunc* v_current, const td::RefInt256 &method_id) {
const std::string& function_name = v_current->code->name;
// in "AST" representation, the first is Op::_Import (input arguments, even if none)
const auto& op_import = v_current->code->ops;
tolk_assert(op_import && op_import->cl == Op::_Import);
void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) {
for (const SrcFile* file : all_src_files) {
tolk_assert(file->ast);
// then Op::_Call (anotherF)
const Op* op_call = op_import->next.get();
if (!op_call || op_call->cl != Op::_Call)
return;
tolk_assert(op_call->left.size() == 1);
if (!file->is_stdlib_file()) {
// file->ast->debug_print();
G.generated_from += file->rel_filename;
G.generated_from += ", ";
}
const auto& op_return = op_call->next;
if (!op_return || op_return->cl != Op::_Return || op_return->left.size() != 1)
return;
bool indices_expected = op_import->left.size() == op_call->left[0] && op_call->left[0] == op_return->left[0];
if (!indices_expected)
return;
const SymDef* f_called = op_call->fun_ref;
const SymValFunc* v_called = dynamic_cast<SymValFunc*>(f_called->value);
if (!v_called)
return;
// `return` must use all arguments, e.g. `return (_0,_2,_1)`, not `return (_0,_1,_1)`
int args_used_mask = 0;
for (var_idx_t arg_idx : op_call->right) {
args_used_mask |= 1 << arg_idx;
}
if (args_used_mask != (1 << op_call->right.size()) - 1)
return;
// detect getters (having method_id), they should not be treated as wrappers
// v_current->method_id will be assigned later; todo refactor function parsing completely, it's weird
// moreover, `recv_external()` and others are also exported, but FunC is unaware of method_id
// (it's assigned by Fift later)
// so, for now, just handle "special" function names, the same as in Asm.fif
if (!method_id.is_null())
return;
if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" ||
function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install")
return;
// all types must be strictly defined (on mismatch, a compilation error will be triggered anyway)
if (v_called->sym_type->has_unknown_inside() || v_current->sym_type->has_unknown_inside())
return;
// avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome
if (v_current->get_arg_type()->get_width() != op_call->right.size())
return;
// 'return true;' (false, nil) are (surprisingly) also function calls
if (f_called->name() == "true" || f_called->name() == "false" || f_called->name() == "nil")
return;
// if an original is marked `pure`, and this one doesn't, it's okay; just check for inline_ref storage
if (v_current->is_inline_ref())
return;
// ok, f_current is a wrapper
v_current->flags |= SymValFunc::flagWrapsAnotherF;
if (G.is_verbosity(2)) {
std::cerr << function_name << " -> " << f_called->name() << std::endl;
}
}
static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) {
unsigned int crc = td::crc16(static_cast<std::string>(func_name));
return td::make_refint((crc & 0xffff) | 0x10000);
}
void process_vertex(V<ast_function_declaration> v_function) {
open_scope(v_function->loc);
std::vector<TypeExpr*> type_vars;
if (v_function->forall_list) {
type_vars.reserve(v_function->forall_list->size());
for (int idx = 0; idx < v_function->forall_list->size(); ++idx) {
type_vars.emplace_back(v_function->forall_list->get_item(idx)->created_type);
}
}
std::string func_name = v_function->name;
int func_sym_idx = calc_sym_idx(func_name);
int flags_inline = 0;
if (v_function->marked_as_inline) {
flags_inline = SymValFunc::flagInline;
} else if (v_function->marked_as_inline_ref) {
flags_inline = SymValFunc::flagInlineRef;
}
td::RefInt256 method_id;
if (v_function->method_id) {
method_id = td::string_to_int256(static_cast<std::string>(v_function->method_id->int_val));
if (method_id.is_null()) {
v_function->method_id->error("invalid integer constant");
}
} else if (v_function->marked_as_get_method) {
method_id = calculate_method_id_by_func_name(func_name);
for (const SymDef* other : G.glob_get_methods) {
if (!td::cmp(dynamic_cast<const SymValFunc*>(other->value)->method_id, method_id)) {
v_function->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name + "` produce the same hash. Consider renaming one of these functions.");
}
}
}
TypeExpr* arg_list_type = nullptr;
if (int n_args = v_function->get_num_args()) {
std::vector<TypeExpr*> arg_types;
arg_types.reserve(n_args);
for (int idx = 0; idx < n_args; ++idx) {
arg_types.emplace_back(v_function->get_arg(idx)->arg_type);
}
arg_list_type = TypeExpr::new_tensor(std::move(arg_types));
} else {
arg_list_type = TypeExpr::new_unit();
}
TypeExpr* func_type = TypeExpr::new_map(arg_list_type, v_function->ret_type);
if (!type_vars.empty()) {
func_type = TypeExpr::new_forall(std::move(type_vars), func_type);
}
if (v_function->marked_as_builtin) {
const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name));
const SymValFunc* func_val = builtin_func ? dynamic_cast<SymValFunc*>(builtin_func->value) : nullptr;
if (!func_val || !func_val->is_builtin()) {
v_function->error("`builtin` used for non-builtin function");
}
#ifdef TOLK_DEBUG
// in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility
if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != v_function->marked_as_pure) {
v_function->error("declaration for `builtin` function doesn't match an actual one");
}
#endif
close_scope();
return;
}
if (G.is_verbosity(1)) {
std::cerr << "fun " << func_name << " : " << func_type << std::endl;
}
SymDef* func_sym = define_global_symbol(func_sym_idx, 0, v_function->loc);
tolk_assert(func_sym);
SymValFunc* func_sym_val = dynamic_cast<SymValFunc*>(func_sym->value);
if (func_sym->value) {
// todo remove all about pre-declarations and prototypes
if (func_sym->value->kind != SymValKind::_Func || !func_sym_val) {
v_function->error("was not defined as a function before");
}
try {
unify(func_sym_val->sym_type, func_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "previous type of function " << func_name << " : " << func_sym_val->sym_type
<< " cannot be unified with new type " << func_type << ": " << ue;
v_function->error(os.str());
}
}
if (v_function->get_body()->type == ast_empty) {
make_new_glob_func(func_sym, func_type, v_function->marked_as_pure);
} else if (const auto* v_seq = v_function->get_body()->try_as<ast_sequence>()) {
if (dynamic_cast<SymValAsmFunc*>(func_sym_val)) {
v_function->error("function `" + func_name + "` has been already defined as an assembler built-in");
}
SymValCodeFunc* func_sym_code;
if (func_sym_val) {
func_sym_code = dynamic_cast<SymValCodeFunc*>(func_sym_val);
if (!func_sym_code) {
v_function->error("function `" + func_name + "` has been already defined in an yet-unknown way");
}
} else {
func_sym_code = make_new_glob_func(func_sym, func_type, v_function->marked_as_pure);
}
if (func_sym_code->code) {
v_function->error("redefinition of function `" + func_name + "`");
}
if (v_function->marked_as_pure && v_function->ret_type->get_width() == 0) {
v_function->error("a pure function should return something, otherwise it will be optimized out anyway");
}
CodeBlob* code = process_vertex(v_seq, v_function->get_arg_list(), v_function->ret_type, v_function->marked_as_pure);
code->name = func_name;
code->loc = v_function->loc;
func_sym_code->code = code;
// todo it should be done not here, it should be on ast level, it should work when functions are declared swapped
detect_if_function_just_wraps_another(func_sym_code, method_id);
} else if (const auto* v_asm = v_function->get_body()->try_as<ast_asm_body>()) {
SymValAsmFunc* asm_func = process_vertex(v_asm, func_type, v_function->get_arg_list(), v_function->ret_type, v_function->marked_as_pure);
#ifdef TOLK_DEBUG
asm_func->name = func_name;
#endif
if (func_sym_val) {
if (dynamic_cast<SymValCodeFunc*>(func_sym_val)) {
v_function->error("function `" + func_name + "` was already declared as an ordinary function");
}
SymValAsmFunc* asm_func_old = dynamic_cast<SymValAsmFunc*>(func_sym_val);
if (asm_func_old) {
if (asm_func->crc != asm_func_old->crc) {
v_function->error("redefinition of built-in assembler function `" + func_name + "`");
for (AnyV v : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
if (auto v_func = v->try_as<ast_function_declaration>()) {
if (v_func->is_asm_function()) {
convert_asm_body_to_AsmOp(v_func, v_func->get_body()->as<ast_asm_body>());
} else if (!v_func->marked_as_builtin) {
convert_function_body_to_CodeBlob(v_func, v_func->get_body()->as<ast_sequence>());
}
} else {
v_function->error("redefinition of previously (somehow) defined function `" + func_name + "`");
}
}
func_sym->value = asm_func;
}
if (method_id.not_null()) {
auto val = dynamic_cast<SymValFunc*>(func_sym->value);
if (!val) {
v_function->error("cannot set method id for unknown function `" + func_name + "`");
}
if (val->method_id.is_null()) {
val->method_id = std::move(method_id);
} else if (td::cmp(val->method_id, method_id) != 0) {
v_function->error("integer method identifier for `" + func_name + "` changed from " +
val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string());
}
}
if (flags_inline) {
auto val = dynamic_cast<SymValFunc*>(func_sym->value);
if (!val) {
v_function->error("cannot set unknown function `" + func_name + "` as an inline");
}
if (!val->is_inline() && !val->is_inline_ref()) {
val->flags |= flags_inline;
} else if ((val->flags & (SymValFunc::flagInline | SymValFunc::flagInlineRef)) != flags_inline) {
v_function->error("inline mode for `" + func_name + "` changed with respect to a previous declaration");
}
}
if (v_function->marked_as_get_method) {
auto val = dynamic_cast<SymValFunc*>(func_sym->value);
if (!val) {
v_function->error("cannot set unknown function `" + func_name + "` as a get method");
}
val->flags |= SymValFunc::flagGetMethod;
G.glob_get_methods.push_back(func_sym);
}
close_scope();
}
td::Result<SrcFile*> locate_source_file(const std::string& rel_filename) {
td::Result<std::string> path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str());
if (path.is_error()) {
return path.move_as_error();
}
std::string abs_filename = path.move_as_ok();
if (SrcFile* file = G.all_src_files.find_file(abs_filename)) {
return file; // file was already parsed (imported from somewhere else)
}
td::Result<std::string> text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str());
if (text.is_error()) {
return text.move_as_error();
}
return G.all_src_files.register_file(rel_filename, abs_filename, text.move_as_ok());
}
void process_vertex(V<ast_pragma_no_arg> v) {
std::string_view pragma_name = v->pragma_name;
if (pragma_name == G.pragma_allow_post_modification.name()) {
G.pragma_allow_post_modification.enable(v->loc);
} else if (pragma_name == G.pragma_compute_asm_ltr.name()) {
G.pragma_compute_asm_ltr.enable(v->loc);
} else if (pragma_name == G.pragma_remove_unused_functions.name()) {
G.pragma_remove_unused_functions.enable(v->loc);
} else {
v->error("unknown pragma name");
}
}
void process_vertex(V<ast_pragma_version> v) {
char op = '='; bool eq = false;
TokenType cmp_tok = v->cmp_tok;
if (cmp_tok == tok_gt || cmp_tok == tok_geq) {
op = '>';
eq = cmp_tok == tok_geq;
} else if (cmp_tok == tok_lt || cmp_tok == tok_leq) {
op = '<';
eq = cmp_tok == tok_leq;
} else if (cmp_tok == tok_eq) {
op = '=';
} else if (cmp_tok == tok_bitwise_xor) {
op = '^';
} else {
v->error("invalid comparison operator");
}
std::string_view pragma_value = v->semver;
int sem_ver[3] = {0, 0, 0};
char segs = 1;
auto stoi = [&](std::string_view s) {
auto R = td::to_integer_safe<int>(static_cast<std::string>(s));
if (R.is_error()) {
v->error("invalid semver format");
}
return R.move_as_ok();
};
std::istringstream iss_value(static_cast<std::string>(pragma_value));
for (int idx = 0; idx < 3; idx++) {
std::string s{"0"};
std::getline(iss_value, s, '.');
sem_ver[idx] = stoi(s);
}
// End reading semver from source code
int tolk_ver[3] = {0, 0, 0};
std::istringstream iss(tolk_version);
for (int idx = 0; idx < 3; idx++) {
std::string s;
std::getline(iss, s, '.');
tolk_ver[idx] = stoi(s);
}
// End parsing embedded semver
bool match = true;
switch (op) {
case '=':
if ((tolk_ver[0] != sem_ver[0]) ||
(tolk_ver[1] != sem_ver[1]) ||
(tolk_ver[2] != sem_ver[2])) {
match = false;
}
break;
case '>':
if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) ||
((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) ||
((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) ||
((tolk_ver[0] < sem_ver[0])) ) {
match = false;
}
break;
case '<':
if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) ||
((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) ||
((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) ||
((tolk_ver[0] > sem_ver[0])) ) {
match = false;
}
break;
case '^':
if ( ((segs == 3) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2])))
|| ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1])))
|| ((segs == 1) && ((tolk_ver[0] < sem_ver[0]))) ) {
match = false;
}
break;
default:
__builtin_unreachable();
}
if (!match) {
v->error("Tolk version " + tolk_version + " does not satisfy this condition");
}
}
void process_vertex(V<ast_include_statement> v, SrcFile* current_file) {
std::string rel_filename = static_cast<std::string>(v->file_name);
if (size_t rc = current_file->rel_filename.rfind('/'); rc != std::string::npos) {
rel_filename = current_file->rel_filename.substr(0, rc + 1) + rel_filename;
}
td::Result<SrcFile*> locate_res = locate_source_file(rel_filename);
if (locate_res.is_error()) {
v->error("Failed to import: " + locate_res.move_as_error().message().str());
}
SrcFile* imported_file = locate_res.move_as_ok();
current_file->imports.emplace_back(SrcFile::ImportStatement{imported_file});
if (!imported_file->was_parsed) {
// todo it's wrong, but ok for now
process_file_ast(parse_src_file_to_ast(imported_file));
}
}
void process_vertex(V<ast_constant_declaration> v) {
AnyV init_value = v->get_init_value();
SymDef* sym_def = define_global_symbol(calc_sym_idx(v->const_name), false, v->loc);
if (!sym_def) {
v->error("cannot define global symbol");
}
if (sym_def->value) {
v->error("symbol already exists");
}
CodeBlob code;
Expr* x = process_expr(init_value, code, false);
if (!x->is_rvalue()) {
v->get_init_value()->error("expression is not strictly Rvalue");
}
if (v->declared_type && !v->declared_type->equals_to(x->e_type)) {
v->error("expression type does not match declared type");
}
SymValConst* new_value = nullptr;
if (x->cls == Expr::_Const) { // Integer constant
new_value = new SymValConst{G.const_cnt++, x->intval};
} else if (x->cls == Expr::_SliceConst) { // Slice constant (string)
new_value = new SymValConst{G.const_cnt++, x->strval};
} else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`)
code.emplace_back(v->loc, Op::_Import, std::vector<var_idx_t>());
auto tmp_vars = x->pre_compile(code);
code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars));
code.emplace_back(v->loc, Op::_Nop);
// It is REQUIRED to execute "optimizations" as in tolk.cpp
code.simplify_var_types();
code.prune_unreachable_code();
code.split_vars(true);
for (int i = 0; i < 16; i++) {
code.compute_used_code_vars();
code.fwd_analyze();
code.prune_unreachable_code();
}
code.mark_noreturn();
AsmOpList out_list(0, &code.vars);
code.generate_code(out_list);
if (out_list.list_.size() != 1) {
init_value->error("precompiled expression must result in single operation");
}
auto op = out_list.list_[0];
if (!op.is_const()) {
init_value->error("precompiled expression must result in compilation time constant");
}
if (op.origin.is_null() || !op.origin->is_valid()) {
init_value->error("precompiled expression did not result in a valid integer constant");
}
new_value = new SymValConst{G.const_cnt++, op.origin};
} else {
init_value->error("integer or slice literal or constant expected");
}
sym_def->value = new_value;
}
void process_vertex(V<ast_global_var_declaration> v) {
TypeExpr* var_type = v->declared_type;
SymDef* sym_def = define_global_symbol(calc_sym_idx(v->var_name), false, v->loc);
if (!sym_def) {
v->error("cannot define global symbol");
}
if (sym_def->value) {
auto val = dynamic_cast<SymValGlobVar*>(sym_def->value);
if (!val) {
v->error("symbol cannot be redefined as a global variable");
}
try {
unify(var_type, val->sym_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot unify new type " << var_type << " of global variable `" << sym_def->name()
<< "` with its previous type " << val->sym_type << ": " << ue;
v->error(os.str());
}
} else {
sym_def->value = new SymValGlobVar{G.glob_var_cnt++, var_type};
#ifdef TOLK_DEBUG
dynamic_cast<SymValGlobVar*>(sym_def->value)->name = v->var_name;
#endif
G.glob_vars.push_back(sym_def);
}
}
class FileToLegacyVisitor final : public ASTVisitorToplevelDeclarations {
SrcFile* current_file;
// todo inline here all these
void on_pragma_no_arg(V<ast_pragma_no_arg> v) override {
process_vertex(v);
}
void on_pragma_version(V<ast_pragma_version> v) override {
process_vertex(v);
}
void on_include_statement(V<ast_include_statement> v) override {
process_vertex(v, current_file);
}
void on_function_declaration(V<ast_function_declaration> v) override {
process_vertex(v);
}
void on_constant_declaration(V<ast_constant_declaration> v) override {
process_vertex(v);
}
void on_global_var_declaration(V<ast_global_var_declaration> v) override {
process_vertex(v);
}
public:
explicit FileToLegacyVisitor(SrcFile* file) : current_file(file) {
}
};
void process_file_ast(AnyV file_ast) {
auto v = file_ast->try_as<ast_tolk_file>();
if (!v) {
throw UnexpectedASTNodeType(file_ast, "process_file_ast");
}
const SrcFile* file = v->file;
if (!file->is_stdlib_file()) {
// v->debug_print();
G.generated_from += file->rel_filename;
G.generated_from += ", ";
}
FileToLegacyVisitor(const_cast<SrcFile*>(file)).start_visiting_file(v);
}
} // namespace tolk

View file

@ -0,0 +1,62 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include "ast.h"
#include "ast-from-tokens.h"
#include "compiler-state.h"
namespace tolk {
AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) {
G.all_src_files.locate_and_register_source_file(stdlib_filename, {});
G.all_src_files.locate_and_register_source_file(entrypoint_filename, {});
while (SrcFile* file = G.all_src_files.get_next_unparsed_file()) {
tolk_assert(!file->ast);
file->ast = parse_src_file_to_ast(file);
for (AnyV v_toplevel : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
if (auto v_include = v_toplevel->try_as<ast_include_statement>()) {
size_t pos = file->rel_filename.rfind('/');
std::string rel_filename = pos == std::string::npos
? v_include->get_file_name()
: file->rel_filename.substr(0, pos + 1) + v_include->get_file_name();
SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_include->loc);
file->imports.push_back(SrcFile::ImportStatement{imported});
v_include->mutate_set_src_file(imported);
}
}
}
// todo #ifdef TOLK_PROFILING
// lexer_measure_performance(G.all_src_files.get_all_files());
return G.all_src_files.get_all_files();
}
} // namespace tolk

View file

@ -0,0 +1,90 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include "src-file.h"
#include "compiler-state.h"
/*
* Here we find unused symbols (global functions and variables) to strip them off codegen.
* Note, that currently it's implemented as a standalone step after AST has been transformed to legacy Expr/Op.
* The reason why it's not done on AST level is that symbol resolving is done too late. For instance,
* having `beginCell()` there is not enough information in AST whether if points to a global function
* or it's a local variable application.
* In the future, this should be done on AST level.
*/
namespace tolk {
static void mark_function_used_dfs(const std::unique_ptr<Op>& op);
static void mark_function_used(SymValCodeFunc* func_val) {
if (!func_val->code || func_val->is_really_used) { // already handled
return;
}
func_val->is_really_used = true;
mark_function_used_dfs(func_val->code->ops);
}
static void mark_global_var_used(SymValGlobVar* glob_val) {
glob_val->is_really_used = true;
}
static void mark_function_used_dfs(const std::unique_ptr<Op>& op) {
if (!op) {
return;
}
// op->fun_ref, despite its name, may actually ref global var
// note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar
// (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also)
if (op->fun_ref) {
if (auto* func_val = dynamic_cast<SymValCodeFunc*>(op->fun_ref->value)) {
mark_function_used(func_val);
} else if (auto* glob_val = dynamic_cast<SymValGlobVar*>(op->fun_ref->value)) {
mark_global_var_used(glob_val);
} else if (auto* asm_val = dynamic_cast<SymValAsmFunc*>(op->fun_ref->value)) {
} else {
tolk_assert(false);
}
}
mark_function_used_dfs(op->next);
mark_function_used_dfs(op->block0);
mark_function_used_dfs(op->block1);
}
void pipeline_find_unused_symbols() {
for (SymDef* func_sym : G.all_code_functions) {
auto* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
std::string name = G.symbols.get_name(func_sym->sym_idx);
if (func_val->method_id.not_null() ||
name == "main" || name == "recv_internal" || name == "recv_external" ||
name == "run_ticktock" || name == "split_prepare" || name == "split_install") {
mark_function_used(func_val);
}
}
}
} // namespace tolk

View file

@ -0,0 +1,186 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include "src-file.h"
#include "ast.h"
#include "compiler-state.h"
namespace tolk {
bool SymValCodeFunc::does_need_codegen() const {
// when a function is declared, but not referenced from code in any way, don't generate its body
if (!is_really_used && G.pragma_remove_unused_functions.enabled()) {
return false;
}
// when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist
if (flags & flagUsedAsNonCall) {
return true;
}
// when a function f() is just `return anotherF(...args)`, it doesn't need to be codegenerated at all,
// since all its usages are inlined
return !is_just_wrapper_for_another_f();
// in the future, we may want to implement a true AST inlining for `inline` functions also
}
void SymValCodeFunc::set_code(CodeBlob* code) {
this->code = code;
}
void SymValAsmFunc::set_code(std::vector<AsmOp> code) {
this->ext_compile = make_ext_compile(std::move(code));
}
static void generate_output_func(SymDef* func_sym) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
tolk_assert(func_val);
std::string name = G.symbols.get_name(func_sym->sym_idx);
if (G.is_verbosity(2)) {
std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl;
}
if (!func_val->code) {
throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented");
} else {
CodeBlob& code = *(func_val->code);
if (G.is_verbosity(3)) {
code.print(std::cerr, 9);
}
code.simplify_var_types();
if (G.is_verbosity(5)) {
std::cerr << "after simplify_var_types: \n";
code.print(std::cerr, 0);
}
code.prune_unreachable_code();
if (G.is_verbosity(5)) {
std::cerr << "after prune_unreachable: \n";
code.print(std::cerr, 0);
}
code.split_vars(true);
if (G.is_verbosity(5)) {
std::cerr << "after split_vars: \n";
code.print(std::cerr, 0);
}
for (int i = 0; i < 8; i++) {
code.compute_used_code_vars();
if (G.is_verbosity(4)) {
std::cerr << "after compute_used_vars: \n";
code.print(std::cerr, 6);
}
code.fwd_analyze();
if (G.is_verbosity(5)) {
std::cerr << "after fwd_analyze: \n";
code.print(std::cerr, 6);
}
code.prune_unreachable_code();
if (G.is_verbosity(5)) {
std::cerr << "after prune_unreachable: \n";
code.print(std::cerr, 6);
}
}
code.mark_noreturn();
if (G.is_verbosity(3)) {
code.print(std::cerr, 15);
}
if (G.is_verbosity(2)) {
std::cerr << "\n---------- resulting code for " << name << " -------------\n";
}
const char* modifier = "";
if (func_val->is_inline()) {
modifier = "INLINE";
} else if (func_val->is_inline_ref()) {
modifier = "REF";
}
std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n";
int mode = 0;
if (G.settings.stack_layout_comments) {
mode |= Stack::_StkCmt | Stack::_CptStkCmt;
}
if (func_val->is_inline() && code.ops->noreturn()) {
mode |= Stack::_InlineFunc;
}
if (func_val->is_inline() || func_val->is_inline_ref()) {
mode |= Stack::_InlineAny;
}
code.generate_code(std::cout, mode, 2);
std::cout << std::string(2, ' ') << "}>\n";
if (G.is_verbosity(2)) {
std::cerr << "--------------\n";
}
}
}
void pipeline_generate_fif_output_to_std_cout() {
std::cout << "\"Asm.fif\" include\n";
std::cout << "// automatically generated from " << G.generated_from << std::endl;
std::cout << "PROGRAM{\n";
for (SymDef* func_sym : G.all_code_functions) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
tolk_assert(func_val);
if (!func_val->does_need_codegen()) {
if (G.is_verbosity(2)) {
std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n";
}
continue;
}
std::string name = G.symbols.get_name(func_sym->sym_idx);
std::cout << std::string(2, ' ');
if (func_val->method_id.is_null()) {
std::cout << "DECLPROC " << name << "\n";
} else {
std::cout << func_val->method_id << " DECLMETHOD " << name << "\n";
}
}
for (SymDef* gvar_sym : G.all_global_vars) {
auto* glob_val = dynamic_cast<SymValGlobVar*>(gvar_sym->value);
tolk_assert(glob_val);
if (!glob_val->is_really_used && G.pragma_remove_unused_functions.enabled()) {
if (G.is_verbosity(2)) {
std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n";
}
continue;
}
std::string name = G.symbols.get_name(gvar_sym->sym_idx);
std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n";
}
for (SymDef* func_sym : G.all_code_functions) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
if (!func_val->does_need_codegen()) {
continue;
}
generate_output_func(func_sym);
}
std::cout << "}END>c\n";
if (!G.settings.boc_output_filename.empty()) {
std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n";
}
}
} // namespace tolk

View file

@ -0,0 +1,140 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include "src-file.h"
#include "ast.h"
#include "compiler-state.h"
#include "td/utils/misc.h"
namespace tolk {
static void handle_pragma_no_arg(V<ast_pragma_no_arg> v) {
std::string_view pragma_name = v->pragma_name;
if (pragma_name == G.pragma_allow_post_modification.name()) {
G.pragma_allow_post_modification.enable(v->loc);
} else if (pragma_name == G.pragma_compute_asm_ltr.name()) {
G.pragma_compute_asm_ltr.enable(v->loc);
} else if (pragma_name == G.pragma_remove_unused_functions.name()) {
G.pragma_remove_unused_functions.enable(v->loc);
} else {
v->error("unknown pragma name");
}
}
static void handle_pragma_version(V<ast_pragma_version> v) {
char op = '=';
bool eq = false;
TokenType cmp_tok = v->cmp_tok;
if (cmp_tok == tok_gt || cmp_tok == tok_geq) {
op = '>';
eq = cmp_tok == tok_geq;
} else if (cmp_tok == tok_lt || cmp_tok == tok_leq) {
op = '<';
eq = cmp_tok == tok_leq;
} else if (cmp_tok == tok_eq) {
op = '=';
} else if (cmp_tok == tok_bitwise_xor) {
op = '^';
} else {
v->error("invalid comparison operator");
}
std::string_view pragma_value = v->semver;
int sem_ver[3] = {0, 0, 0};
char segs = 1;
auto stoi = [&](std::string_view s) {
auto R = td::to_integer_safe<int>(static_cast<std::string>(s));
if (R.is_error()) {
v->error("invalid semver format");
}
return R.move_as_ok();
};
std::istringstream iss_value(static_cast<std::string>(pragma_value));
for (int idx = 0; idx < 3; idx++) {
std::string s{"0"};
std::getline(iss_value, s, '.');
sem_ver[idx] = stoi(s);
}
// End reading semver from source code
int tolk_ver[3] = {0, 0, 0};
std::istringstream iss(tolk_version);
for (int idx = 0; idx < 3; idx++) {
std::string s;
std::getline(iss, s, '.');
tolk_ver[idx] = stoi(s);
}
// End parsing embedded semver
bool match = true;
switch (op) {
case '=':
if ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] != sem_ver[2])) {
match = false;
}
break;
case '>':
if (((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) ||
((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) ||
((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || ((tolk_ver[0] < sem_ver[0]))) {
match = false;
}
break;
case '<':
if (((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) ||
((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) ||
((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || ((tolk_ver[0] > sem_ver[0]))) {
match = false;
}
break;
case '^':
if (((segs == 3) &&
((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) ||
((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) ||
((segs == 1) && ((tolk_ver[0] < sem_ver[0])))) {
match = false;
}
break;
default:
tolk_assert(false);
}
if (!match) {
v->error("Tolk version " + tolk_version + " does not satisfy this condition");
}
}
void pipeline_handle_pragmas(const AllSrcFiles& all_src_files) {
for (const SrcFile* file : all_src_files) {
tolk_assert(file->ast);
for (AnyV v : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
if (auto v_no_arg = v->try_as<ast_pragma_no_arg>()) {
handle_pragma_no_arg(v_no_arg);
} else if (auto v_version = v->try_as<ast_pragma_version>()) {
handle_pragma_version(v_version);
}
}
}
}
} // namespace tolk

View file

@ -0,0 +1,402 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include "platform-utils.h"
#include "src-file.h"
#include "ast.h"
#include "compiler-state.h"
#include "td/utils/crypto.h"
#include <unordered_set>
namespace tolk {
Expr* process_expr(AnyV v, CodeBlob& code, bool nv = false);
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_redefinition_of_symbol(V<ast_identifier> v_ident, SymDef* existing) {
if (existing->loc.is_stdlib()) {
v_ident->error("redefinition of a symbol from stdlib");
} else if (existing->loc.is_defined()) {
v_ident->error("redefinition of symbol, previous was at: " + existing->loc.to_string());
} else {
v_ident->error("redefinition of built-in symbol");
}
}
static int calc_sym_idx(std::string_view sym_name) {
return G.symbols.lookup_add(sym_name);
}
static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) {
unsigned int crc = td::crc16(static_cast<std::string>(func_name));
return td::make_refint((crc & 0xffff) | 0x10000);
}
static bool is_argument_of_function(AnyV v_variable, V<ast_function_declaration> v_func) {
return v_variable->type == ast_identifier && v_func->get_arg_list()->lookup_idx(v_variable->as<ast_identifier>()->name) != -1;
}
// if a function looks like `T f(...args) { return anotherF(...args); }`,
// set a bit to flags
// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)`
// todo this function (and optimization) was done before implementing AST, but after AST and registering symbols in advance,
// its behavior became a bit wrong: if anotherF is declared before f, than it's detected here, but still not inlined,
// since inlining is done is legacy code, using Expr
// in the future, inlining should be done on AST level, but it's impossible until all names resolving (including scopes)
// is also done on AST level
// in the future, when working on AST level, inlining should become much more powerful
// (for instance, it should inline `return anotherF(constants)`, etc.)
static bool detect_if_function_just_wraps_another(V<ast_function_declaration> v) {
if (v->method_id || v->marked_as_get_method || v->marked_as_inline_ref || v->ret_type->has_unknown_inside()) {
return false;
}
for (int i = 0; i < v->get_num_args(); ++i) {
if (v->get_arg(i)->arg_type->get_width() != 1) {
return false; // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome
}
}
auto v_body = v->get_body()->try_as<ast_sequence>();
if (!v_body || v_body->size() != 1 || v_body->get_item(0)->type != ast_return_statement) {
return false;
}
auto v_return = v_body->get_item(0)->as<ast_return_statement>();
auto v_anotherF = v_return->get_return_value()->try_as<ast_function_call>();
if (!v_anotherF) {
return false;
}
// todo simplify when removing ability of calling a function without parentheses
AnyV called_arg = v_anotherF->get_called_arg();
bool ok_arg = called_arg->type == ast_tensor || called_arg->type == ast_parenthesized_expr;
if (!ok_arg || v_anotherF->get_called_f()->type != ast_identifier) {
return false;
}
std::string_view called_name = v_anotherF->get_called_f()->try_as<ast_identifier>()->name;
std::string_view function_name = v->get_identifier()->name;
if (called_arg->type == ast_tensor) {
const std::vector<AnyV>& v_arg_items = called_arg->as<ast_tensor>()->get_items();
std::set<std::string_view> used_args;
for (AnyV v_arg : v_arg_items) {
if (!is_argument_of_function(v_arg, v)) {
return false;
}
used_args.emplace(v_arg->as<ast_identifier>()->name);
}
if (used_args.size() != v->get_num_args() || used_args.size() != v_arg_items.size()) {
return false;
}
} else if (called_arg->type == ast_parenthesized_expr) {
AnyV v_arg = called_arg->as<ast_parenthesized_expr>()->get_expr();
if (!is_argument_of_function(v_arg, v)) {
return false;
}
}
if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" ||
function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install") {
return false;
}
// ok, f_current is a wrapper
if (G.is_verbosity(2)) {
std::cerr << function_name << " -> " << called_name << std::endl;
}
return true;
}
static void calc_arg_ret_order_of_asm_function(V<ast_asm_body> v_body, V<ast_argument_list> arg_list, TypeExpr* ret_type,
std::vector<int>& arg_order, std::vector<int>& ret_order) {
int cnt = arg_list->size();
int width = ret_type->get_width();
if (width < 0 || width > 16) {
v_body->error("return type of an assembler built-in function must have a well-defined fixed width");
}
if (cnt > 16) {
v_body->error("assembler built-in function must have at most 16 arguments");
}
std::vector<int> cum_arg_width;
cum_arg_width.push_back(0);
int tot_width = 0;
for (int i = 0; i < cnt; ++i) {
V<ast_argument> arg = arg_list->get_arg(i);
int arg_width = arg->arg_type->get_width();
if (arg_width < 0 || arg_width > 16) {
arg->error("parameters of an assembler built-in function must have a well-defined fixed width");
}
cum_arg_width.push_back(tot_width += arg_width);
}
if (!v_body->arg_order.empty()) {
if (static_cast<int>(v_body->arg_order.size()) != cnt) {
v_body->error("arg_order of asm function must specify all arguments");
}
std::vector<bool> visited(cnt, false);
for (int i = 0; i < cnt; ++i) {
int j = v_body->arg_order[i];
if (visited[j]) {
v_body->error("arg_order of asm function contains duplicates");
}
visited[j] = true;
int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1];
while (c1 < c2) {
arg_order.push_back(c1++);
}
}
tolk_assert(arg_order.size() == (unsigned)tot_width);
}
if (!v_body->ret_order.empty()) {
if (static_cast<int>(v_body->ret_order.size()) != width) {
v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width));
}
std::vector<bool> visited(width, false);
for (int i = 0; i < width; ++i) {
int j = v_body->ret_order[i];
if (j < 0 || j >= width || visited[j]) {
v_body->error("ret_order contains invalid integer, not in range 0 .. width-1");
}
visited[j] = true;
}
ret_order = v_body->ret_order;
}
}
static void register_constant(V<ast_constant_declaration> v) {
AnyV init_value = v->get_init_value();
SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc);
if (sym_def->value) {
fire_error_redefinition_of_symbol(v->get_identifier(), sym_def);
}
// todo currently, constant value calculation is dirty and roughly: init_value is evaluated to fif code
// and waited to be a single expression
// although it works, of course it should be later rewritten using AST calculations, as well as lots of other parts
CodeBlob code("tmp", v->loc, nullptr);
Expr* x = process_expr(init_value, code, false);
if (!x->is_rvalue()) {
v->get_init_value()->error("expression is not strictly Rvalue");
}
if (v->declared_type && !v->declared_type->equals_to(x->e_type)) {
v->error("expression type does not match declared type");
}
SymValConst* sym_val = nullptr;
if (x->cls == Expr::_Const) { // Integer constant
sym_val = new SymValConst{static_cast<int>(G.all_constants.size()), x->intval};
} else if (x->cls == Expr::_SliceConst) { // Slice constant (string)
sym_val = new SymValConst{static_cast<int>(G.all_constants.size()), x->strval};
} else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`)
code.emplace_back(v->loc, Op::_Import, std::vector<var_idx_t>());
auto tmp_vars = x->pre_compile(code);
code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars));
code.emplace_back(v->loc, Op::_Nop);
// It is REQUIRED to execute "optimizations" as in tolk.cpp
code.simplify_var_types();
code.prune_unreachable_code();
code.split_vars(true);
for (int i = 0; i < 16; i++) {
code.compute_used_code_vars();
code.fwd_analyze();
code.prune_unreachable_code();
}
code.mark_noreturn();
AsmOpList out_list(0, &code.vars);
code.generate_code(out_list);
if (out_list.list_.size() != 1) {
init_value->error("precompiled expression must result in single operation");
}
auto op = out_list.list_[0];
if (!op.is_const()) {
init_value->error("precompiled expression must result in compilation time constant");
}
if (op.origin.is_null() || !op.origin->is_valid()) {
init_value->error("precompiled expression did not result in a valid integer constant");
}
sym_val = new SymValConst{static_cast<int>(G.all_constants.size()), op.origin};
} else {
init_value->error("integer or slice literal or constant expected");
}
sym_def->value = sym_val;
#ifdef TOLK_DEBUG
dynamic_cast<SymValConst*>(sym_def->value)->name = v->get_identifier()->name;
#endif
G.all_constants.push_back(sym_def);
}
static void register_global_var(V<ast_global_var_declaration> v) {
SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc);
if (sym_def->value) {
fire_error_redefinition_of_symbol(v->get_identifier(), sym_def);
}
sym_def->value = new SymValGlobVar{static_cast<int>(G.all_global_vars.size()), v->declared_type};
#ifdef TOLK_DEBUG
dynamic_cast<SymValGlobVar*>(sym_def->value)->name = v->get_identifier()->name;
#endif
G.all_global_vars.push_back(sym_def);
}
static void register_function(V<ast_function_declaration> v) {
std::string_view func_name = v->get_identifier()->name;
// calculate TypeExpr of a function: it's a map (args -> ret), probably surrounded by forall
TypeExpr* func_type = nullptr;
if (int n_args = v->get_num_args()) {
std::vector<TypeExpr*> arg_types;
arg_types.reserve(n_args);
for (int idx = 0; idx < n_args; ++idx) {
arg_types.emplace_back(v->get_arg(idx)->arg_type);
}
func_type = TypeExpr::new_map(TypeExpr::new_tensor(std::move(arg_types)), v->ret_type);
} else {
func_type = TypeExpr::new_map(TypeExpr::new_unit(), v->ret_type);
}
if (v->forall_list) {
std::vector<TypeExpr*> type_vars;
type_vars.reserve(v->forall_list->size());
for (int idx = 0; idx < v->forall_list->size(); ++idx) {
type_vars.emplace_back(v->forall_list->get_item(idx)->created_type);
}
func_type = TypeExpr::new_forall(std::move(type_vars), func_type);
}
if (v->marked_as_builtin) {
const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name));
const SymValFunc* func_val = builtin_func ? dynamic_cast<SymValFunc*>(builtin_func->value) : nullptr;
if (!func_val || !func_val->is_builtin()) {
v->error("`builtin` used for non-builtin function");
}
#ifdef TOLK_DEBUG
// in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility
if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != v->marked_as_pure) {
v->error("declaration for `builtin` function doesn't match an actual one");
}
#endif
return;
}
SymDef* sym_def = define_global_symbol(calc_sym_idx(func_name), v->loc);
if (sym_def->value) {
fire_error_redefinition_of_symbol(v->get_identifier(), sym_def);
}
if (G.is_verbosity(1)) {
std::cerr << "fun " << func_name << " : " << func_type << std::endl;
}
if (v->marked_as_pure && v->ret_type->get_width() == 0) {
v->error("a pure function should return something, otherwise it will be optimized out anyway");
}
SymValFunc* sym_val = nullptr;
if (const auto* v_seq = v->get_body()->try_as<ast_sequence>()) {
sym_val = new SymValCodeFunc{static_cast<int>(G.all_code_functions.size()), func_type, v->marked_as_pure};
} else if (const auto* v_asm = v->get_body()->try_as<ast_asm_body>()) {
std::vector<int> arg_order, ret_order;
calc_arg_ret_order_of_asm_function(v_asm, v->get_arg_list(), v->ret_type, arg_order, ret_order);
sym_val = new SymValAsmFunc{func_type, std::move(arg_order), std::move(ret_order), v->marked_as_pure};
} else {
v->error("Unexpected function body statement");
}
if (v->method_id) {
sym_val->method_id = td::string_to_int256(static_cast<std::string>(v->method_id->int_val));
if (sym_val->method_id.is_null()) {
v->method_id->error("invalid integer constant");
}
} else if (v->marked_as_get_method) {
sym_val->method_id = calculate_method_id_by_func_name(func_name);
for (const SymDef* other : G.all_get_methods) {
if (!td::cmp(dynamic_cast<const SymValFunc*>(other->value)->method_id, sym_val->method_id)) {
v->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" << static_cast<std::string>(func_name) << "` produce the same hash. Consider renaming one of these functions.");
}
}
}
if (v->marked_as_inline) {
sym_val->flags |= SymValFunc::flagInline;
}
if (v->marked_as_inline_ref) {
sym_val->flags |= SymValFunc::flagInlineRef;
}
if (v->marked_as_get_method) {
sym_val->flags |= SymValFunc::flagGetMethod;
}
if (detect_if_function_just_wraps_another(v)) {
sym_val->flags |= SymValFunc::flagWrapsAnotherF;
}
sym_def->value = sym_val;
#ifdef TOLK_DEBUG
dynamic_cast<SymValFunc*>(sym_def->value)->name = func_name;
#endif
if (dynamic_cast<SymValCodeFunc*>(sym_val)) {
G.all_code_functions.push_back(sym_def);
}
if (sym_val->is_get_method()) {
G.all_get_methods.push_back(sym_def);
}
}
static void iterate_through_file_symbols(const SrcFile* file) {
static std::unordered_set<const SrcFile*> seen;
if (!seen.insert(file).second) {
return;
}
tolk_assert(file && file->ast);
for (AnyV v : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
switch (v->type) {
case ast_include_statement:
// on `import "another-file.tolk"`, register symbols from that file at first
// (for instance, it can calculate constants, which are used in init_val of constants in current file below import)
iterate_through_file_symbols(v->as<ast_include_statement>()->file);
break;
case ast_constant_declaration_list:
for (AnyV v_decl : v->as<ast_constant_declaration_list>()->get_declarations()) {
register_constant(v_decl->as<ast_constant_declaration>());
}
break;
case ast_global_var_declaration_list:
for (AnyV v_decl : v->as<ast_global_var_declaration_list>()->get_declarations()) {
register_global_var(v_decl->as<ast_global_var_declaration>());
}
break;
case ast_function_declaration:
register_function(v->as<ast_function_declaration>());
break;
default:
break;
}
}
}
void pipeline_register_global_symbols(const AllSrcFiles& all_src_files) {
for (const SrcFile* file : all_src_files) {
iterate_through_file_symbols(file);
}
}
} // namespace tolk

42
tolk/pipeline.h Normal file
View file

@ -0,0 +1,42 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#pragma once
#include "src-file.h"
#include <string>
namespace tolk {
AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename);
void pipeline_handle_pragmas(const AllSrcFiles&);
void pipeline_register_global_symbols(const AllSrcFiles&);
void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&);
void pipeline_find_unused_symbols();
void pipeline_generate_fif_output_to_std_cout();
} // namespace tolk

View file

@ -17,6 +17,7 @@
#include "src-file.h"
#include "compiler-state.h"
#include <iostream>
#include <sstream>
namespace tolk {
@ -40,12 +41,51 @@ SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const
return nullptr;
}
SrcFile* AllRegisteredSrcFiles::register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text) {
SrcFile* created = new SrcFile(++last_file_id, rel_filename, abs_filename, std::move(text));
SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) {
td::Result<std::string> path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str());
if (path.is_error()) {
if (included_from.is_defined()) {
throw ParseError(included_from, "Failed to import: " + path.move_as_error().message().str());
}
throw Fatal("Failed to locate " + rel_filename + ": " + path.move_as_error().message().str());
}
std::string abs_filename = path.move_as_ok();
if (SrcFile* file = find_file(abs_filename)) {
return file;
}
td::Result<std::string> text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str());
if (text.is_error()) {
if (included_from.is_defined()) {
throw ParseError(included_from, "Failed to import: " + text.move_as_error().message().str());
}
throw Fatal("Failed to read " + rel_filename + ": " + text.move_as_error().message().str());
}
SrcFile* created = new SrcFile(++last_registered_file_id, rel_filename, std::move(abs_filename), text.move_as_ok());
if (G.is_verbosity(1)) {
std::cerr << "register file_id " << created->file_id << " " << created->abs_filename << std::endl;
}
all_src_files.push_back(created);
return created;
}
SrcFile* AllRegisteredSrcFiles::get_next_unparsed_file() {
if (last_parsed_file_id >= last_registered_file_id) {
return nullptr;
}
return all_src_files[++last_parsed_file_id];
}
AllSrcFiles AllRegisteredSrcFiles::get_all_files() const {
AllSrcFiles src_files_immutable;
src_files_immutable.reserve(all_src_files.size());
for (const SrcFile* file : all_src_files) {
src_files_immutable.push_back(file);
}
return src_files_immutable;
}
bool SrcFile::is_offset_valid(int offset) const {
return offset >= 0 && offset < static_cast<int>(text.size());
@ -119,6 +159,12 @@ void SrcLocation::show_context(std::ostream& os) const {
os << '^' << "\n";
}
std::string SrcLocation::to_string() const {
std::ostringstream os;
show(os);
return os.str();
}
std::ostream& operator<<(std::ostream& os, SrcLocation loc) {
loc.show(os);
return os;

View file

@ -21,6 +21,8 @@
namespace tolk {
struct ASTNodeBase;
struct SrcFile {
struct SrcPosition {
int offset;
@ -36,8 +38,8 @@ struct SrcFile {
int file_id; // an incremental counter through all parsed files
std::string rel_filename; // relative to cwd
std::string abs_filename; // absolute from root
std::string text; // file contents loaded into memory, Token::str_val points into it
bool was_parsed = false; // to prevent double parsing when a file is imported multiple times
std::string text; // file contents loaded into memory, every Token::str_val points inside it
const ASTNodeBase* ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here
std::vector<ImportStatement> imports; // to check strictness (can't use a symbol without importing its file)
SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text)
@ -56,16 +58,6 @@ struct SrcFile {
SrcPosition convert_offset(int offset) const;
};
class AllRegisteredSrcFiles {
std::vector<SrcFile*> all_src_files;
int last_file_id = -1;
public:
SrcFile *find_file(int file_id) const;
SrcFile* find_file(const std::string& abs_filename) const;
SrcFile* register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text);
const std::vector<SrcFile*>& get_all_files() const { return all_src_files; }
};
// SrcLocation points to a location (line, column) in some loaded .tolk source SrcFile.
// Note, that instead of storing src_file, line_no, etc., only 2 ints are stored.
@ -84,6 +76,7 @@ public:
}
bool is_defined() const { return file_id != -1; }
bool is_stdlib() const { return file_id == 0; }
const SrcFile* get_src_file() const;
// similar to `this->get_src_file() == symbol->get_src_file() || symbol->get_src_file()->is_stdlib()`
@ -94,6 +87,7 @@ public:
void show(std::ostream& os) const;
void show_context(std::ostream& os) const;
std::string to_string() const;
void show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const;
void show_note(const std::string& err_msg) const;
@ -103,6 +97,23 @@ public:
std::ostream& operator<<(std::ostream& os, SrcLocation loc);
using AllSrcFiles = std::vector<const SrcFile*>;
class AllRegisteredSrcFiles {
std::vector<SrcFile*> all_src_files;
int last_registered_file_id = -1;
int last_parsed_file_id = -1;
public:
SrcFile *find_file(int file_id) const;
SrcFile* find_file(const std::string& abs_filename) const;
SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from);
SrcFile* get_next_unparsed_file();
AllSrcFiles get_all_files() const;
};
struct Fatal final : std::exception {
std::string message;

View file

@ -114,19 +114,16 @@ SymDef* lookup_symbol(sym_idx_t idx) {
return nullptr;
}
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
if (!name_idx) {
return nullptr;
SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc) {
if (SymDef* found = G.global_sym_def[name_idx]) {
return found; // found->value is filled; it means, that a symbol is redefined
}
auto found = G.global_sym_def[name_idx];
if (found) {
return force_new && found->value ? nullptr : found;
}
found = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc);
SymDef* registered = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc);
#ifdef TOLK_DEBUG
found->sym_name = found->name();
registered->sym_name = registered->name();
#endif
return found;
return registered; // registered->value is nullptr; it means, it's just created
}
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
@ -134,7 +131,7 @@ SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
return nullptr;
}
if (!G.scope_level) {
return define_global_symbol(name_idx, force_new, loc);
throw Fatal("unexpected scope_level = 0");
}
auto found = G.sym_def[name_idx];
if (found) {

View file

@ -97,7 +97,7 @@ void open_scope(SrcLocation loc);
void close_scope();
SymDef* lookup_symbol(sym_idx_t idx);
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, SrcLocation loc = {});
SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc = {});
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc);
} // namespace tolk

View file

@ -40,18 +40,18 @@ td::Result<std::string> compile_internal(char *config_json) {
TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2));
TRY_RESULT(stdlib_tolk, td::get_json_object_string_field(config, "stdlibLocation", false));
TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false));
TRY_RESULT(entrypoint_file_name, td::get_json_object_string_field(config, "entrypointFileName", false));
TRY_RESULT(entrypoint_filename, td::get_json_object_string_field(config, "entrypointFileName", false));
G.settings.verbosity = 0;
G.settings.optimization_level = std::max(0, opt_level);
G.settings.stdlib_filename = stdlib_tolk;
G.settings.stack_layout_comments = stack_comments;
G.settings.entrypoint_filename = entrypoint_file_name;
G.settings.entrypoint_filename = entrypoint_filename;
std::ostringstream outs, errs;
std::cout.rdbuf(outs.rdbuf());
std::cerr.rdbuf(errs.rdbuf());
int tolk_res = tolk::tolk_proceed(entrypoint_file_name);
int tolk_res = tolk::tolk_proceed(entrypoint_filename);
if (tolk_res != 0) {
return td::Status::Error("Tolk compilation error: " + errs.str());
}

View file

@ -24,264 +24,37 @@
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include "pipeline.h"
#include "compiler-state.h"
#include "lexer.h"
#include <getopt.h>
#include "ast-from-tokens.h"
#include "ast-to-legacy.h"
#include <fstream>
#include "td/utils/port/path.h"
#include <sys/stat.h>
#include "ast.h"
namespace tolk {
// returns argument type of a function
// note, that when a function has multiple arguments, its arg type is a tensor (no arguments — an empty tensor)
// in other words, `f(int a, int b)` and `f((int,int) ab)` is the same when we speak about types
const TypeExpr *SymValFunc::get_arg_type() const {
if (!sym_type)
return nullptr;
tolk_assert(sym_type->constr == TypeExpr::te_Map || sym_type->constr == TypeExpr::te_ForAll);
const TypeExpr *te_map = sym_type->constr == TypeExpr::te_ForAll ? sym_type->args[0] : sym_type;
const TypeExpr *arg_type = te_map->args[0];
while (arg_type->constr == TypeExpr::te_Indirect) {
arg_type = arg_type->args[0];
}
return arg_type;
}
bool SymValCodeFunc::does_need_codegen() const {
// when a function is declared, but not referenced from code in any way, don't generate its body
if (!is_really_used && G.pragma_remove_unused_functions.enabled()) {
return false;
}
// when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist
if (flags & flagUsedAsNonCall) {
return true;
}
// when a function f() is just `return anotherF(...args)`, it doesn't need to be codegenerated at all,
// since all its usages are inlined
return !is_just_wrapper_for_another_f();
// in the future, we may want to implement a true AST inlining for `inline` functions also
}
void mark_function_used_dfs(const std::unique_ptr<Op>& op);
void mark_function_used(SymValCodeFunc* func_val) {
if (!func_val->code || func_val->is_really_used) { // already handled
return;
}
func_val->is_really_used = true;
mark_function_used_dfs(func_val->code->ops);
}
void mark_global_var_used(SymValGlobVar* glob_val) {
glob_val->is_really_used = true;
}
void mark_function_used_dfs(const std::unique_ptr<Op>& op) {
if (!op) {
return;
}
// op->fun_ref, despite its name, may actually ref global var
// note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar
// (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also)
if (op->fun_ref) {
if (auto* func_val = dynamic_cast<SymValCodeFunc*>(op->fun_ref->value)) {
mark_function_used(func_val);
} else if (auto* glob_val = dynamic_cast<SymValGlobVar*>(op->fun_ref->value)) {
mark_global_var_used(glob_val);
} else if (auto* asm_val = dynamic_cast<SymValAsmFunc*>(op->fun_ref->value)) {
} else {
tolk_assert(false);
}
}
mark_function_used_dfs(op->next);
mark_function_used_dfs(op->block0);
mark_function_used_dfs(op->block1);
}
void mark_used_symbols() {
for (SymDef* func_sym : G.glob_func) {
auto* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
std::string name = G.symbols.get_name(func_sym->sym_idx);
if (func_val->method_id.not_null() ||
name == "main" || name == "recv_internal" || name == "recv_external" ||
name == "run_ticktock" || name == "split_prepare" || name == "split_install") {
mark_function_used(func_val);
}
}
}
/*
*
* OUTPUT CODE GENERATOR
*
*/
void generate_output_func(SymDef* func_sym) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
tolk_assert(func_val);
std::string name = G.symbols.get_name(func_sym->sym_idx);
if (G.is_verbosity(2)) {
std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl;
}
if (!func_val->code) {
throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented");
} else {
CodeBlob& code = *(func_val->code);
if (G.is_verbosity(3)) {
code.print(std::cerr, 9);
}
code.simplify_var_types();
if (G.is_verbosity(5)) {
std::cerr << "after simplify_var_types: \n";
code.print(std::cerr, 0);
}
code.prune_unreachable_code();
if (G.is_verbosity(5)) {
std::cerr << "after prune_unreachable: \n";
code.print(std::cerr, 0);
}
code.split_vars(true);
if (G.is_verbosity(5)) {
std::cerr << "after split_vars: \n";
code.print(std::cerr, 0);
}
for (int i = 0; i < 8; i++) {
code.compute_used_code_vars();
if (G.is_verbosity(4)) {
std::cerr << "after compute_used_vars: \n";
code.print(std::cerr, 6);
}
code.fwd_analyze();
if (G.is_verbosity(5)) {
std::cerr << "after fwd_analyze: \n";
code.print(std::cerr, 6);
}
code.prune_unreachable_code();
if (G.is_verbosity(5)) {
std::cerr << "after prune_unreachable: \n";
code.print(std::cerr, 6);
}
}
code.mark_noreturn();
if (G.is_verbosity(3)) {
code.print(std::cerr, 15);
}
if (G.is_verbosity(2)) {
std::cerr << "\n---------- resulting code for " << name << " -------------\n";
}
const char* modifier = "";
if (func_val->is_inline()) {
modifier = "INLINE";
} else if (func_val->is_inline_ref()) {
modifier = "REF";
}
std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n";
int mode = 0;
if (G.settings.stack_layout_comments) {
mode |= Stack::_StkCmt | Stack::_CptStkCmt;
}
if (func_val->is_inline() && code.ops->noreturn()) {
mode |= Stack::_InlineFunc;
}
if (func_val->is_inline() || func_val->is_inline_ref()) {
mode |= Stack::_InlineAny;
}
code.generate_code(std::cout, mode, 2);
std::cout << std::string(2, ' ') << "}>\n";
if (G.is_verbosity(2)) {
std::cerr << "--------------\n";
}
}
}
// this function either throws or successfully prints whole program output to std::cout
void generate_output() {
std::cout << "\"Asm.fif\" include\n";
std::cout << "// automatically generated from " << G.generated_from << std::endl;
std::cout << "PROGRAM{\n";
mark_used_symbols();
for (SymDef* func_sym : G.glob_func) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
tolk_assert(func_val);
if (!func_val->does_need_codegen()) {
if (G.is_verbosity(2)) {
std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n";
}
continue;
}
std::string name = G.symbols.get_name(func_sym->sym_idx);
std::cout << std::string(2, ' ');
if (func_val->method_id.is_null()) {
std::cout << "DECLPROC " << name << "\n";
} else {
std::cout << func_val->method_id << " DECLMETHOD " << name << "\n";
}
}
for (SymDef* gvar_sym : G.glob_vars) {
auto* glob_val = dynamic_cast<SymValGlobVar*>(gvar_sym->value);
tolk_assert(glob_val);
if (!glob_val->is_really_used && G.pragma_remove_unused_functions.enabled()) {
if (G.is_verbosity(2)) {
std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n";
}
continue;
}
std::string name = G.symbols.get_name(gvar_sym->sym_idx);
std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n";
}
for (SymDef* func_sym : G.glob_func) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
if (!func_val->does_need_codegen()) {
continue;
}
generate_output_func(func_sym);
}
std::cout << "}END>c\n";
if (!G.settings.boc_output_filename.empty()) {
std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n";
}
}
int tolk_proceed(const std::string &entrypoint_file_name) {
int tolk_proceed(const std::string &entrypoint_filename) {
define_builtins();
lexer_init();
G.pragma_allow_post_modification.always_on_and_deprecated("0.5.0");
G.pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0");
try {
{
if (G.settings.stdlib_filename.empty()) {
throw Fatal("stdlib filename not specified");
}
td::Result<SrcFile*> locate_res = locate_source_file(G.settings.stdlib_filename);
if (locate_res.is_error()) {
throw Fatal("Failed to locate stdlib: " + locate_res.error().message().str());
}
process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok()));
if (G.settings.stdlib_filename.empty()) {
throw Fatal("stdlib filename not specified");
}
td::Result<SrcFile*> locate_res = locate_source_file(entrypoint_file_name);
if (locate_res.is_error()) {
throw Fatal("Failed to locate " + entrypoint_file_name + ": " + locate_res.error().message().str());
}
process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok()));
// todo #ifdef TOLK_PROFILING + comment
// lexer_measure_performance(all_src_files.get_all_files());
// on any error, an exception is thrown, and the message is printed out below
// (currently, only a single error can be printed)
AllSrcFiles all_files = pipeline_discover_and_parse_sources(G.settings.stdlib_filename, entrypoint_filename);
pipeline_handle_pragmas(all_files);
pipeline_register_global_symbols(all_files);
pipeline_convert_ast_to_legacy_Expr_Op(all_files);
pipeline_find_unused_symbols();
pipeline_generate_fif_output_to_std_cout();
generate_output();
return 0;
} catch (Fatal& fatal) {
std::cerr << "fatal: " << fatal << std::endl;

View file

@ -446,7 +446,8 @@ struct CodeBlob {
std::stack<std::unique_ptr<Op>*> cur_ops_stack;
int flags = 0;
bool require_callxargs = false;
CodeBlob(TypeExpr* ret = nullptr) : var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), cur_ops(&ops) {
CodeBlob(std::string name, SrcLocation loc, TypeExpr* ret)
: var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), name(std::move(name)), loc(loc), cur_ops(&ops) {
}
template <typename... Args>
Op& emplace_back(Args&&... args) {
@ -539,7 +540,6 @@ struct SymValFunc : SymVal {
const std::vector<int>* get_ret_order() const {
return ret_order.empty() ? nullptr : &ret_order;
}
const TypeExpr* get_arg_type() const;
bool is_inline() const {
return flags & flagInline;
@ -568,6 +568,7 @@ struct SymValCodeFunc : SymValFunc {
SymValCodeFunc(int val, TypeExpr* _ft, bool marked_as_pure) : SymValFunc(val, _ft, marked_as_pure), code(nullptr) {
}
bool does_need_codegen() const;
void set_code(CodeBlob* code);
};
struct SymValGlobVar : SymValBase {
@ -592,6 +593,9 @@ struct SymValConst : SymValBase {
td::RefInt256 intval;
std::string strval;
ConstKind kind;
#ifdef TOLK_DEBUG
std::string name; // seeing const name in debugger makes it much easier to delve into Tolk sources
#endif
SymValConst(int idx, td::RefInt256 value)
: SymValBase(SymValKind::_Const, idx), intval(value), kind(IntConst) {
}
@ -611,17 +615,6 @@ struct SymValConst : SymValBase {
};
/*
*
* PARSE SOURCE
*
*/
// defined in parse-tolk.cpp
td::Result<SrcFile*> locate_source_file(const std::string& rel_filename);
/*
*
* EXPRESSIONS
@ -1432,10 +1425,11 @@ inline compile_func_t make_ext_compile(AsmOp op) {
struct SymValAsmFunc : SymValFunc {
simple_compile_func_t simple_compile;
compile_func_t ext_compile;
td::uint64 crc;
~SymValAsmFunc() override = default;
SymValAsmFunc(TypeExpr* ft, std::vector<AsmOp>&& _macro, bool marked_as_pure)
: SymValFunc(-1, ft, marked_as_pure), ext_compile(make_ext_compile(std::move(_macro))) {
SymValAsmFunc(TypeExpr* ft, std::vector<int>&& arg_order, std::vector<int>&& ret_order, bool marked_as_pure)
: SymValFunc(-1, ft, marked_as_pure) {
this->arg_order = std::move(arg_order);
this->ret_order = std::move(ret_order);
}
SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, bool marked_as_pure)
: SymValFunc(-1, ft, marked_as_pure), simple_compile(std::move(_compile)) {
@ -1451,6 +1445,7 @@ struct SymValAsmFunc : SymValFunc {
std::initializer_list<int> ret_order = {}, bool marked_as_pure = false)
: SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), ext_compile(std::move(_compile)) {
}
void set_code(std::vector<AsmOp> code);
bool compile(AsmOpList& dest, std::vector<VarDescr>& out, std::vector<VarDescr>& in, SrcLocation where) const;
};
@ -1472,7 +1467,7 @@ void define_builtins();
*
*/
int tolk_proceed(const std::string &entrypoint_file_name);
int tolk_proceed(const std::string &entrypoint_filename);
} // namespace tolk