mirror of
https://github.com/ton-blockchain/ton
synced 2025-03-09 15:40:10 +00:00
[Tolk] Compilation pipeline, register global symbols in advance
Since I've implemented AST, now I can drop forward declarations. Instead, I traverse AST of all files and register global symbols (functions, constants, global vars) as a separate step, in advance. That's why, while converting AST to Expr/Op, all available symbols are already registered. This greatly simplifies "intermediate state" of yet unknown functions and checking them afterward. Redeclaration of local variables (inside the same scope) is now also prohibited.
This commit is contained in:
parent
80001d1756
commit
5a3e3595d6
28 changed files with 1266 additions and 1134 deletions
259
tolk/tolk.cpp
259
tolk/tolk.cpp
|
@ -24,264 +24,37 @@
|
|||
from all source files in the program, then also delete it here.
|
||||
*/
|
||||
#include "tolk.h"
|
||||
#include "pipeline.h"
|
||||
#include "compiler-state.h"
|
||||
#include "lexer.h"
|
||||
#include <getopt.h>
|
||||
#include "ast-from-tokens.h"
|
||||
#include "ast-to-legacy.h"
|
||||
#include <fstream>
|
||||
#include "td/utils/port/path.h"
|
||||
#include <sys/stat.h>
|
||||
#include "ast.h"
|
||||
|
||||
namespace tolk {
|
||||
|
||||
// returns argument type of a function
|
||||
// note, that when a function has multiple arguments, its arg type is a tensor (no arguments — an empty tensor)
|
||||
// in other words, `f(int a, int b)` and `f((int,int) ab)` is the same when we speak about types
|
||||
const TypeExpr *SymValFunc::get_arg_type() const {
|
||||
if (!sym_type)
|
||||
return nullptr;
|
||||
|
||||
tolk_assert(sym_type->constr == TypeExpr::te_Map || sym_type->constr == TypeExpr::te_ForAll);
|
||||
const TypeExpr *te_map = sym_type->constr == TypeExpr::te_ForAll ? sym_type->args[0] : sym_type;
|
||||
const TypeExpr *arg_type = te_map->args[0];
|
||||
|
||||
while (arg_type->constr == TypeExpr::te_Indirect) {
|
||||
arg_type = arg_type->args[0];
|
||||
}
|
||||
return arg_type;
|
||||
}
|
||||
|
||||
|
||||
bool SymValCodeFunc::does_need_codegen() const {
|
||||
// when a function is declared, but not referenced from code in any way, don't generate its body
|
||||
if (!is_really_used && G.pragma_remove_unused_functions.enabled()) {
|
||||
return false;
|
||||
}
|
||||
// when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist
|
||||
if (flags & flagUsedAsNonCall) {
|
||||
return true;
|
||||
}
|
||||
// when a function f() is just `return anotherF(...args)`, it doesn't need to be codegenerated at all,
|
||||
// since all its usages are inlined
|
||||
return !is_just_wrapper_for_another_f();
|
||||
// in the future, we may want to implement a true AST inlining for `inline` functions also
|
||||
}
|
||||
|
||||
void mark_function_used_dfs(const std::unique_ptr<Op>& op);
|
||||
|
||||
void mark_function_used(SymValCodeFunc* func_val) {
|
||||
if (!func_val->code || func_val->is_really_used) { // already handled
|
||||
return;
|
||||
}
|
||||
|
||||
func_val->is_really_used = true;
|
||||
mark_function_used_dfs(func_val->code->ops);
|
||||
}
|
||||
|
||||
void mark_global_var_used(SymValGlobVar* glob_val) {
|
||||
glob_val->is_really_used = true;
|
||||
}
|
||||
|
||||
void mark_function_used_dfs(const std::unique_ptr<Op>& op) {
|
||||
if (!op) {
|
||||
return;
|
||||
}
|
||||
// op->fun_ref, despite its name, may actually ref global var
|
||||
// note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar
|
||||
// (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also)
|
||||
if (op->fun_ref) {
|
||||
if (auto* func_val = dynamic_cast<SymValCodeFunc*>(op->fun_ref->value)) {
|
||||
mark_function_used(func_val);
|
||||
} else if (auto* glob_val = dynamic_cast<SymValGlobVar*>(op->fun_ref->value)) {
|
||||
mark_global_var_used(glob_val);
|
||||
} else if (auto* asm_val = dynamic_cast<SymValAsmFunc*>(op->fun_ref->value)) {
|
||||
} else {
|
||||
tolk_assert(false);
|
||||
}
|
||||
}
|
||||
mark_function_used_dfs(op->next);
|
||||
mark_function_used_dfs(op->block0);
|
||||
mark_function_used_dfs(op->block1);
|
||||
}
|
||||
|
||||
void mark_used_symbols() {
|
||||
for (SymDef* func_sym : G.glob_func) {
|
||||
auto* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
||||
std::string name = G.symbols.get_name(func_sym->sym_idx);
|
||||
if (func_val->method_id.not_null() ||
|
||||
name == "main" || name == "recv_internal" || name == "recv_external" ||
|
||||
name == "run_ticktock" || name == "split_prepare" || name == "split_install") {
|
||||
mark_function_used(func_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* OUTPUT CODE GENERATOR
|
||||
*
|
||||
*/
|
||||
|
||||
void generate_output_func(SymDef* func_sym) {
|
||||
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
||||
tolk_assert(func_val);
|
||||
std::string name = G.symbols.get_name(func_sym->sym_idx);
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl;
|
||||
}
|
||||
if (!func_val->code) {
|
||||
throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented");
|
||||
} else {
|
||||
CodeBlob& code = *(func_val->code);
|
||||
if (G.is_verbosity(3)) {
|
||||
code.print(std::cerr, 9);
|
||||
}
|
||||
code.simplify_var_types();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after simplify_var_types: \n";
|
||||
code.print(std::cerr, 0);
|
||||
}
|
||||
code.prune_unreachable_code();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after prune_unreachable: \n";
|
||||
code.print(std::cerr, 0);
|
||||
}
|
||||
code.split_vars(true);
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after split_vars: \n";
|
||||
code.print(std::cerr, 0);
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
code.compute_used_code_vars();
|
||||
if (G.is_verbosity(4)) {
|
||||
std::cerr << "after compute_used_vars: \n";
|
||||
code.print(std::cerr, 6);
|
||||
}
|
||||
code.fwd_analyze();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after fwd_analyze: \n";
|
||||
code.print(std::cerr, 6);
|
||||
}
|
||||
code.prune_unreachable_code();
|
||||
if (G.is_verbosity(5)) {
|
||||
std::cerr << "after prune_unreachable: \n";
|
||||
code.print(std::cerr, 6);
|
||||
}
|
||||
}
|
||||
code.mark_noreturn();
|
||||
if (G.is_verbosity(3)) {
|
||||
code.print(std::cerr, 15);
|
||||
}
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << "\n---------- resulting code for " << name << " -------------\n";
|
||||
}
|
||||
const char* modifier = "";
|
||||
if (func_val->is_inline()) {
|
||||
modifier = "INLINE";
|
||||
} else if (func_val->is_inline_ref()) {
|
||||
modifier = "REF";
|
||||
}
|
||||
std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n";
|
||||
int mode = 0;
|
||||
if (G.settings.stack_layout_comments) {
|
||||
mode |= Stack::_StkCmt | Stack::_CptStkCmt;
|
||||
}
|
||||
if (func_val->is_inline() && code.ops->noreturn()) {
|
||||
mode |= Stack::_InlineFunc;
|
||||
}
|
||||
if (func_val->is_inline() || func_val->is_inline_ref()) {
|
||||
mode |= Stack::_InlineAny;
|
||||
}
|
||||
code.generate_code(std::cout, mode, 2);
|
||||
std::cout << std::string(2, ' ') << "}>\n";
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << "--------------\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// this function either throws or successfully prints whole program output to std::cout
|
||||
void generate_output() {
|
||||
std::cout << "\"Asm.fif\" include\n";
|
||||
std::cout << "// automatically generated from " << G.generated_from << std::endl;
|
||||
std::cout << "PROGRAM{\n";
|
||||
mark_used_symbols();
|
||||
|
||||
for (SymDef* func_sym : G.glob_func) {
|
||||
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
||||
tolk_assert(func_val);
|
||||
if (!func_val->does_need_codegen()) {
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string name = G.symbols.get_name(func_sym->sym_idx);
|
||||
std::cout << std::string(2, ' ');
|
||||
if (func_val->method_id.is_null()) {
|
||||
std::cout << "DECLPROC " << name << "\n";
|
||||
} else {
|
||||
std::cout << func_val->method_id << " DECLMETHOD " << name << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
for (SymDef* gvar_sym : G.glob_vars) {
|
||||
auto* glob_val = dynamic_cast<SymValGlobVar*>(gvar_sym->value);
|
||||
tolk_assert(glob_val);
|
||||
if (!glob_val->is_really_used && G.pragma_remove_unused_functions.enabled()) {
|
||||
if (G.is_verbosity(2)) {
|
||||
std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
std::string name = G.symbols.get_name(gvar_sym->sym_idx);
|
||||
std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n";
|
||||
}
|
||||
|
||||
for (SymDef* func_sym : G.glob_func) {
|
||||
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
||||
if (!func_val->does_need_codegen()) {
|
||||
continue;
|
||||
}
|
||||
generate_output_func(func_sym);
|
||||
}
|
||||
|
||||
std::cout << "}END>c\n";
|
||||
if (!G.settings.boc_output_filename.empty()) {
|
||||
std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int tolk_proceed(const std::string &entrypoint_file_name) {
|
||||
int tolk_proceed(const std::string &entrypoint_filename) {
|
||||
define_builtins();
|
||||
lexer_init();
|
||||
G.pragma_allow_post_modification.always_on_and_deprecated("0.5.0");
|
||||
G.pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0");
|
||||
|
||||
try {
|
||||
{
|
||||
if (G.settings.stdlib_filename.empty()) {
|
||||
throw Fatal("stdlib filename not specified");
|
||||
}
|
||||
td::Result<SrcFile*> locate_res = locate_source_file(G.settings.stdlib_filename);
|
||||
if (locate_res.is_error()) {
|
||||
throw Fatal("Failed to locate stdlib: " + locate_res.error().message().str());
|
||||
}
|
||||
process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok()));
|
||||
if (G.settings.stdlib_filename.empty()) {
|
||||
throw Fatal("stdlib filename not specified");
|
||||
}
|
||||
td::Result<SrcFile*> locate_res = locate_source_file(entrypoint_file_name);
|
||||
if (locate_res.is_error()) {
|
||||
throw Fatal("Failed to locate " + entrypoint_file_name + ": " + locate_res.error().message().str());
|
||||
}
|
||||
process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok()));
|
||||
|
||||
// todo #ifdef TOLK_PROFILING + comment
|
||||
// lexer_measure_performance(all_src_files.get_all_files());
|
||||
// on any error, an exception is thrown, and the message is printed out below
|
||||
// (currently, only a single error can be printed)
|
||||
|
||||
AllSrcFiles all_files = pipeline_discover_and_parse_sources(G.settings.stdlib_filename, entrypoint_filename);
|
||||
|
||||
pipeline_handle_pragmas(all_files);
|
||||
pipeline_register_global_symbols(all_files);
|
||||
pipeline_convert_ast_to_legacy_Expr_Op(all_files);
|
||||
|
||||
pipeline_find_unused_symbols();
|
||||
pipeline_generate_fif_output_to_std_cout();
|
||||
|
||||
generate_output();
|
||||
return 0;
|
||||
} catch (Fatal& fatal) {
|
||||
std::cerr << "fatal: " << fatal << std::endl;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue