1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

[Tolk] AST-based semantic analysis, get rid of Expr

This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.

Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
  scope handling and resolving local/global identifiers,
  lvalue/rvalue calc and check, implicit return detection,
  mutability analysis, pure/impure validity checks,
  simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
  but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
  expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
  lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
  stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
  which was fully dropped; AST is converted to Ops (IR) directly
This commit is contained in:
tolk-vm 2024-12-16 21:19:45 +03:00
parent ea0dc16163
commit 3540424aa1
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
71 changed files with 4270 additions and 3060 deletions

View file

@ -16,154 +16,85 @@
*/
#include "symtable.h"
#include "compiler-state.h"
#include "platform-utils.h"
#include <sstream>
#include <cassert>
namespace tolk {
bool FunctionData::does_need_codegen() const {
// when a function is declared, but not referenced from code in any way, don't generate its body
if (!is_really_used() && G.settings.remove_unused_functions) {
return false;
}
// when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist
if (is_used_as_noncall()) {
return true;
}
// currently, there is no inlining, all functions are codegenerated
// (but actually, unused ones are later removed by Fift)
// in the future, we may want to implement a true AST inlining for "simple" functions
return true;
}
std::string Symbol::unknown_symbol_name(sym_idx_t i) {
if (!i) {
return "_";
} else {
std::ostringstream os;
os << "SYM#" << i;
return os.str();
void FunctionData::assign_is_really_used() {
this->flags |= flagReallyUsed;
}
void FunctionData::assign_is_used_as_noncall() {
this->flags |= flagUsedAsNonCall;
}
void FunctionData::assign_is_implicit_return() {
this->flags |= flagImplicitReturn;
}
void GlobalVarData::assign_is_really_used() {
this->flags |= flagReallyUsed;
}
void LocalVarData::assign_idx(int idx) {
this->idx = idx;
}
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_redefinition_of_symbol(SrcLocation loc, const Symbol* previous) {
SrcLocation prev_loc = previous->loc;
if (prev_loc.is_stdlib()) {
throw ParseError(loc, "redefinition of a symbol from stdlib");
}
if (prev_loc.is_defined()) {
throw ParseError(loc, "redefinition of symbol, previous was at: " + prev_loc.to_string());
}
throw ParseError(loc, "redefinition of built-in symbol");
}
void GlobalSymbolTable::add_function(const FunctionData* f_sym) {
auto key = key_hash(f_sym->name);
auto [it, inserted] = entries.emplace(key, f_sym);
if (!inserted) {
fire_error_redefinition_of_symbol(f_sym->loc, it->second);
}
}
sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) {
unsigned long long h1 = 1, h2 = 1;
for (char c : str) {
h1 = ((h1 * 239) + (unsigned char)(c)) % SIZE_PRIME;
h2 = ((h2 * 17) + (unsigned char)(c)) % (SIZE_PRIME - 1);
}
++h2;
++h1;
while (true) {
if (sym[h1]) {
if (sym[h1]->str == str) {
return (mode & 2) ? not_found : sym_idx_t(h1);
}
h1 += h2;
if (h1 > SIZE_PRIME) {
h1 -= SIZE_PRIME;
}
} else {
if (!(mode & 1)) {
return not_found;
}
if (def_sym >= ((long long)SIZE_PRIME * 3) / 4) {
throw SymTableOverflow{def_sym};
}
sym[h1] = std::make_unique<Symbol>(static_cast<std::string>(str), idx <= 0 ? sym_idx_t(h1) : -idx);
++def_sym;
return sym_idx_t(h1);
}
void GlobalSymbolTable::add_global_var(const GlobalVarData* g_sym) {
auto key = key_hash(g_sym->name);
auto [it, inserted] = entries.emplace(key, g_sym);
if (!inserted) {
fire_error_redefinition_of_symbol(g_sym->loc, it->second);
}
}
std::string SymDef::name() const {
return G.symbols.get_name(sym_idx);
void GlobalSymbolTable::add_global_const(const GlobalConstData* c_sym) {
auto key = key_hash(c_sym->name);
auto [it, inserted] = entries.emplace(key, c_sym);
if (!inserted) {
fire_error_redefinition_of_symbol(c_sym->loc, it->second);
}
}
void open_scope(SrcLocation loc) {
++G.scope_level;
G.scope_opened_at.push_back(loc);
}
void close_scope() {
if (!G.scope_level) {
throw Fatal{"cannot close the outer scope"};
}
while (!G.symbol_stack.empty() && G.symbol_stack.back().first == G.scope_level) {
SymDef old_def = G.symbol_stack.back().second;
auto idx = old_def.sym_idx;
G.symbol_stack.pop_back();
SymDef* cur_def = G.sym_def[idx];
assert(cur_def);
assert(cur_def->level == G.scope_level && cur_def->sym_idx == idx);
//std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl;
if (cur_def->value) {
//std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl;
delete cur_def->value;
}
if (!old_def.level && !old_def.value) {
delete cur_def; // ??? keep the definition always?
G.sym_def[idx] = nullptr;
} else {
cur_def->value = old_def.value;
cur_def->level = old_def.level;
}
old_def.value = nullptr;
}
--G.scope_level;
G.scope_opened_at.pop_back();
}
SymDef* lookup_symbol(sym_idx_t idx) {
if (!idx) {
return nullptr;
}
if (G.sym_def[idx]) {
return G.sym_def[idx];
}
if (G.global_sym_def[idx]) {
return G.global_sym_def[idx];
}
return nullptr;
}
SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc) {
if (SymDef* found = G.global_sym_def[name_idx]) {
return found; // found->value is filled; it means, that a symbol is redefined
}
SymDef* registered = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc);
#ifdef TOLK_DEBUG
registered->sym_name = registered->name();
#endif
return registered; // registered->value is nullptr; it means, it's just created
}
SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc) {
// note, that parameters (defined at function declaration) are not inserted into symtable
// their SymDef is registered to be inserted into SymValFunc::parameters
// (and later ->value is filled with SymValVariable)
SymDef* registered = new SymDef(0, name_idx, loc);
#ifdef TOLK_DEBUG
registered->sym_name = registered->name();
#endif
return registered;
}
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) {
if (!name_idx) {
return nullptr;
}
if (!G.scope_level) {
throw Fatal("unexpected scope_level = 0");
}
auto found = G.sym_def[name_idx];
if (found) {
if (found->level < G.scope_level) {
G.symbol_stack.emplace_back(G.scope_level, *found);
found->level = G.scope_level;
} else if (found->value && force_new) {
return nullptr;
}
found->value = nullptr;
found->loc = loc;
return found;
}
found = G.sym_def[name_idx] = new SymDef(G.scope_level, name_idx, loc);
G.symbol_stack.emplace_back(G.scope_level, SymDef{0, name_idx, loc});
#ifdef TOLK_DEBUG
found->sym_name = found->name();
G.symbol_stack.back().second.sym_name = found->name();
#endif
return found;
const Symbol* lookup_global_symbol(std::string_view name) {
return G.symtable.lookup(name);
}
} // namespace tolk