mirror of
https://github.com/ton-blockchain/ton
synced 2025-03-09 15:40:10 +00:00
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals (previously forked from FunC). The goal is to convert AST directly to Op (a kind of IR representation), doing all code analysis at AST level. Noteable changes: - AST-based semantic kernel includes: registering global symbols, scope handling and resolving local/global identifiers, lvalue/rvalue calc and check, implicit return detection, mutability analysis, pure/impure validity checks, simple constant folding - values of `const` variables are calculated NOT based on CodeBlob, but via a newly-introduced AST-based constant evaluator - AST vertices are now inherited from expression/statement/other; expression vertices have common properties (TypeExpr, lvalue/rvalue) - symbol table is rewritten completely, SymDef/SymVal no longer exist, lexer now doesn't need to register identifiers - AST vertices have references to symbols, filled at different stages of pipeline - the remaining "FunC legacy part" is almost unchanged besides Expr which was fully dropped; AST is converted to Ops (IR) directly
This commit is contained in:
parent
ea0dc16163
commit
3540424aa1
71 changed files with 4270 additions and 3060 deletions
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
This file is part of TON Blockchain source code.
|
||||
This file is part of TON Blockchain source code->
|
||||
|
||||
TON Blockchain is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
|
@ -30,106 +30,86 @@
|
|||
|
||||
namespace tolk {
|
||||
|
||||
bool SymValCodeFunc::does_need_codegen() const {
|
||||
// when a function is declared, but not referenced from code in any way, don't generate its body
|
||||
if (!is_really_used && G.settings.remove_unused_functions) {
|
||||
return false;
|
||||
}
|
||||
// when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist
|
||||
if (flags & flagUsedAsNonCall) {
|
||||
return true;
|
||||
}
|
||||
// currently, there is no inlining, all functions are codegenerated
|
||||
// (but actually, unused ones are later removed by Fift)
|
||||
// in the future, we may want to implement a true AST inlining for "simple" functions
|
||||
return true;
|
||||
}
|
||||
|
||||
void SymValCodeFunc::set_code(CodeBlob* code) {
|
||||
void FunctionBodyCode::set_code(CodeBlob* code) {
|
||||
this->code = code;
|
||||
}
|
||||
|
||||
void SymValAsmFunc::set_code(std::vector<AsmOp> code) {
|
||||
this->ext_compile = make_ext_compile(std::move(code));
|
||||
void FunctionBodyAsm::set_code(std::vector<AsmOp>&& code) {
|
||||
this->ops = std::move(code);
|
||||
}
|
||||
|
||||
|
||||
static void generate_output_func(SymDef* func_sym) {
|
||||
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
||||
tolk_assert(func_val);
|
||||
std::string name = G.symbols.get_name(func_sym->sym_idx);
|
||||
static void generate_output_func(const FunctionData* fun_ref) {
|
||||
tolk_assert(fun_ref->is_regular_function());
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl;
|
||||
std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->full_type << std::endl;
|
||||
}
|
||||
if (!func_val->code) {
|
||||
throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented");
|
||||
} else {
|
||||
CodeBlob& code = *(func_val->code);
|
||||
if (G.is_verbosity(3)) {
|
||||
code.print(std::cerr, 9);
|
||||
|
||||
CodeBlob* code = std::get<FunctionBodyCode*>(fun_ref->body)->code;
|
||||
if (G.is_verbosity(3)) {
|
||||
code->print(std::cerr, 9);
|
||||
}
|
||||
code->simplify_var_types();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after simplify_var_types: \n";
|
||||
code->print(std::cerr, 0);
|
||||
}
|
||||
code->prune_unreachable_code();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after prune_unreachable: \n";
|
||||
code->print(std::cerr, 0);
|
||||
}
|
||||
code->split_vars(true);
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after split_vars: \n";
|
||||
code->print(std::cerr, 0);
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
code->compute_used_code_vars();
|
||||
if (G.is_verbosity(4)) {
|
||||
std::cerr << "after compute_used_vars: \n";
|
||||
code->print(std::cerr, 6);
|
||||
}
|
||||
code.simplify_var_types();
|
||||
code->fwd_analyze();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after simplify_var_types: \n";
|
||||
code.print(std::cerr, 0);
|
||||
std::cerr << "after fwd_analyze: \n";
|
||||
code->print(std::cerr, 6);
|
||||
}
|
||||
code.prune_unreachable_code();
|
||||
code->prune_unreachable_code();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after prune_unreachable: \n";
|
||||
code.print(std::cerr, 0);
|
||||
}
|
||||
code.split_vars(true);
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after split_vars: \n";
|
||||
code.print(std::cerr, 0);
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
code.compute_used_code_vars();
|
||||
if (G.is_verbosity(4)) {
|
||||
std::cerr << "after compute_used_vars: \n";
|
||||
code.print(std::cerr, 6);
|
||||
}
|
||||
code.fwd_analyze();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after fwd_analyze: \n";
|
||||
code.print(std::cerr, 6);
|
||||
}
|
||||
code.prune_unreachable_code();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after prune_unreachable: \n";
|
||||
code.print(std::cerr, 6);
|
||||
}
|
||||
}
|
||||
code.mark_noreturn();
|
||||
if (G.is_verbosity(3)) {
|
||||
code.print(std::cerr, 15);
|
||||
}
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << "\n---------- resulting code for " << name << " -------------\n";
|
||||
}
|
||||
const char* modifier = "";
|
||||
if (func_val->is_inline()) {
|
||||
modifier = "INLINE";
|
||||
} else if (func_val->is_inline_ref()) {
|
||||
modifier = "REF";
|
||||
}
|
||||
std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n";
|
||||
int mode = 0;
|
||||
if (G.settings.stack_layout_comments) {
|
||||
mode |= Stack::_StkCmt | Stack::_CptStkCmt;
|
||||
}
|
||||
if (func_val->is_inline() && code.ops->noreturn()) {
|
||||
mode |= Stack::_InlineFunc;
|
||||
}
|
||||
if (func_val->is_inline() || func_val->is_inline_ref()) {
|
||||
mode |= Stack::_InlineAny;
|
||||
}
|
||||
code.generate_code(std::cout, mode, 2);
|
||||
std::cout << std::string(2, ' ') << "}>\n";
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << "--------------\n";
|
||||
code->print(std::cerr, 6);
|
||||
}
|
||||
}
|
||||
code->mark_noreturn();
|
||||
if (G.is_verbosity(3)) {
|
||||
code->print(std::cerr, 15);
|
||||
}
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << "\n---------- resulting code for " << fun_ref->name << " -------------\n";
|
||||
}
|
||||
const char* modifier = "";
|
||||
if (fun_ref->is_inline()) {
|
||||
modifier = "INLINE";
|
||||
} else if (fun_ref->is_inline_ref()) {
|
||||
modifier = "REF";
|
||||
}
|
||||
std::cout << std::string(2, ' ') << fun_ref->name << " PROC" << modifier << ":<{\n";
|
||||
int mode = 0;
|
||||
if (G.settings.stack_layout_comments) {
|
||||
mode |= Stack::_StkCmt | Stack::_CptStkCmt;
|
||||
}
|
||||
if (fun_ref->is_inline() && code->ops->noreturn()) {
|
||||
mode |= Stack::_InlineFunc;
|
||||
}
|
||||
if (fun_ref->is_inline() || fun_ref->is_inline_ref()) {
|
||||
mode |= Stack::_InlineAny;
|
||||
}
|
||||
code->generate_code(std::cout, mode, 2);
|
||||
std::cout << std::string(2, ' ') << "}>\n";
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << "--------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) {
|
||||
|
@ -149,26 +129,23 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files)
|
|||
std::cout << "PROGRAM{\n";
|
||||
|
||||
bool has_main_procedure = false;
|
||||
for (SymDef* func_sym : G.all_code_functions) {
|
||||
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
||||
tolk_assert(func_val);
|
||||
if (!func_val->does_need_codegen()) {
|
||||
for (const FunctionData* fun_ref : G.all_code_functions) {
|
||||
if (!fun_ref->does_need_codegen()) {
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n";
|
||||
std::cerr << fun_ref->name << ": code not generated, function does not need codegen\n";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string name = G.symbols.get_name(func_sym->sym_idx);
|
||||
if (func_val->is_entrypoint() && (name == "main" || name == "onInternalMessage")) {
|
||||
if (fun_ref->is_entrypoint() && (fun_ref->name == "main" || fun_ref->name == "onInternalMessage")) {
|
||||
has_main_procedure = true;
|
||||
}
|
||||
|
||||
std::cout << std::string(2, ' ');
|
||||
if (func_val->method_id.is_null()) {
|
||||
std::cout << "DECLPROC " << name << "\n";
|
||||
if (fun_ref->is_method_id_not_empty()) {
|
||||
std::cout << fun_ref->method_id << " DECLMETHOD " << fun_ref->name << "\n";
|
||||
} else {
|
||||
std::cout << func_val->method_id << " DECLMETHOD " << name << "\n";
|
||||
std::cout << "DECLPROC " << fun_ref->name << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,25 +153,22 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files)
|
|||
throw Fatal("the contract has no entrypoint; forgot `fun onInternalMessage(...)`?");
|
||||
}
|
||||
|
||||
for (SymDef* gvar_sym : G.all_global_vars) {
|
||||
auto* glob_val = dynamic_cast<SymValGlobVar*>(gvar_sym->value);
|
||||
tolk_assert(glob_val);
|
||||
if (!glob_val->is_really_used && G.settings.remove_unused_functions) {
|
||||
for (const GlobalVarData* var_ref : G.all_global_vars) {
|
||||
if (!var_ref->is_really_used() && G.settings.remove_unused_functions) {
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n";
|
||||
std::cerr << var_ref->name << ": variable not generated, it's unused\n";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
std::string name = G.symbols.get_name(gvar_sym->sym_idx);
|
||||
std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n";
|
||||
|
||||
std::cout << std::string(2, ' ') << "DECLGLOBVAR " << var_ref->name << "\n";
|
||||
}
|
||||
|
||||
for (SymDef* func_sym : G.all_code_functions) {
|
||||
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
||||
if (!func_val->does_need_codegen()) {
|
||||
for (const FunctionData* fun_ref : G.all_code_functions) {
|
||||
if (!fun_ref->does_need_codegen()) {
|
||||
continue;
|
||||
}
|
||||
generate_output_func(func_sym);
|
||||
generate_output_func(fun_ref);
|
||||
}
|
||||
|
||||
std::cout << "}END>c\n";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue