1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

[Tolk] AST-based semantic analysis, get rid of Expr

This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.

Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
  scope handling and resolving local/global identifiers,
  lvalue/rvalue calc and check, implicit return detection,
  mutability analysis, pure/impure validity checks,
  simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
  but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
  expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
  lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
  stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
  which was fully dropped; AST is converted to Ops (IR) directly
This commit is contained in:
tolk-vm 2024-12-16 21:19:45 +03:00
parent ea0dc16163
commit 3540424aa1
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
71 changed files with 4270 additions and 3060 deletions

View file

@ -18,97 +18,194 @@
#include "src-file.h"
#include "type-expr.h"
#include <functional>
#include <memory>
#include "constant-evaluator.h"
#include "crypto/common/refint.h"
#include <unordered_map>
#include <variant>
#include <vector>
namespace tolk {
typedef int var_idx_t;
typedef int sym_idx_t;
enum class SymValKind { _Var, _Func, _GlobVar, _Const };
struct SymValBase {
SymValKind kind;
int idx;
TypeExpr* sym_type;
#ifdef TOLK_DEBUG
std::string sym_name; // seeing symbol name in debugger makes it much easier to delve into Tolk sources
#endif
SymValBase(SymValKind kind, int idx, TypeExpr* sym_type) : kind(kind), idx(idx), sym_type(sym_type) {
}
virtual ~SymValBase() = default;
TypeExpr* get_type() const {
return sym_type;
}
};
struct Symbol {
std::string str;
sym_idx_t idx;
Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {}
static std::string unknown_symbol_name(sym_idx_t i);
};
class SymTable {
public:
static constexpr int SIZE_PRIME = 100003;
private:
sym_idx_t def_sym{0};
std::unique_ptr<Symbol> sym[SIZE_PRIME + 1];
sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0);
public:
static constexpr sym_idx_t not_found = 0;
sym_idx_t lookup(std::string_view str) {
return gen_lookup(str, 0);
}
sym_idx_t lookup_add(std::string_view str) {
return gen_lookup(str, 1);
}
Symbol* operator[](sym_idx_t i) const {
return sym[i].get();
}
std::string get_name(sym_idx_t i) const {
return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i);
}
};
struct SymTableOverflow {
int sym_def;
explicit SymTableOverflow(int x) : sym_def(x) {
}
};
struct SymDef {
int level;
sym_idx_t sym_idx;
SymValBase* value;
std::string name;
SrcLocation loc;
#ifdef TOLK_DEBUG
std::string sym_name;
#endif
SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr)
: level(lvl), sym_idx(idx), value(val), loc(_loc) {
Symbol(std::string name, SrcLocation loc)
: name(std::move(name))
, loc(loc) {
}
virtual ~Symbol() = default;
template<class T>
const T* as() const {
#ifdef TOLK_DEBUG
assert(dynamic_cast<const T*>(this) != nullptr);
#endif
return dynamic_cast<const T*>(this);
}
template<class T>
const T* try_as() const {
return dynamic_cast<const T*>(this);
}
std::string name() const;
};
struct LocalVarData final : Symbol {
enum {
flagMutateParameter = 1, // parameter was declared with `mutate` keyword
flagImmutable = 2, // variable was declared via `val` (not `var`)
};
void open_scope(SrcLocation loc);
void close_scope();
SymDef* lookup_symbol(sym_idx_t idx);
TypeExpr* declared_type;
int flags = 0;
int idx;
SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc = {});
SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc);
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc);
LocalVarData(std::string name, SrcLocation loc, int idx, TypeExpr* declared_type)
: Symbol(std::move(name), loc)
, declared_type(declared_type)
, idx(idx) {
}
bool is_underscore() const { return name.empty(); }
bool is_immutable() const { return flags & flagImmutable; }
bool is_mutate_parameter() const { return flags & flagMutateParameter; }
LocalVarData* mutate() const { return const_cast<LocalVarData*>(this); }
void assign_idx(int idx);
};
struct FunctionBodyCode;
struct FunctionBodyAsm;
struct FunctionBodyBuiltin;
typedef std::variant<
FunctionBodyCode*,
FunctionBodyAsm*,
FunctionBodyBuiltin*
> FunctionBody;
struct FunctionData final : Symbol {
static constexpr int EMPTY_METHOD_ID = -10;
enum {
flagInline = 1, // marked `@inline`
flagInlineRef = 2, // marked `@inline_ref`
flagReallyUsed = 4, // calculated via dfs from used functions; declared but unused functions are not codegenerated
flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.)
flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out
flagImplicitReturn = 32, // control flow reaches end of function, so it needs implicit return at the end
flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned
flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc.
flagHasMutateParams = 256, // has parameters declared as `mutate`
flagAcceptsSelf = 512, // is a member function (has `self` first parameter)
flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable
};
int method_id = EMPTY_METHOD_ID;
int flags;
TypeExpr* full_type; // currently, TypeExpr::_Map, probably wrapped with forall
std::vector<LocalVarData> parameters;
std::vector<int> arg_order, ret_order;
FunctionBody body;
FunctionData(std::string name, SrcLocation loc, TypeExpr* full_type, std::vector<LocalVarData> parameters, int initial_flags, FunctionBody body)
: Symbol(std::move(name), loc)
, flags(initial_flags)
, full_type(full_type)
, parameters(std::move(parameters))
, body(body) {
}
const std::vector<int>* get_arg_order() const {
return arg_order.empty() ? nullptr : &arg_order;
}
const std::vector<int>* get_ret_order() const {
return ret_order.empty() ? nullptr : &ret_order;
}
bool is_regular_function() const { return std::holds_alternative<FunctionBodyCode*>(body); }
bool is_asm_function() const { return std::holds_alternative<FunctionBodyAsm*>(body); }
bool is_builtin_function() const { return std::holds_alternative<FunctionBodyBuiltin*>(body); }
bool is_inline() const { return flags & flagInline; }
bool is_inline_ref() const { return flags & flagInlineRef; }
bool is_really_used() const { return flags & flagReallyUsed; }
bool is_used_as_noncall() const { return flags & flagUsedAsNonCall; }
bool is_marked_as_pure() const { return flags & flagMarkedAsPure; }
bool is_implicit_return() const { return flags & flagImplicitReturn; }
bool is_get_method() const { return flags & flagGetMethod; }
bool is_method_id_not_empty() const { return method_id != EMPTY_METHOD_ID; }
bool is_entrypoint() const { return flags & flagIsEntrypoint; }
bool has_mutate_params() const { return flags & flagHasMutateParams; }
bool does_accept_self() const { return flags & flagAcceptsSelf; }
bool does_return_self() const { return flags & flagReturnsSelf; }
bool does_mutate_self() const { return (flags & flagAcceptsSelf) && parameters[0].is_mutate_parameter(); }
bool does_need_codegen() const;
FunctionData* mutate() const { return const_cast<FunctionData*>(this); }
void assign_is_really_used();
void assign_is_used_as_noncall();
void assign_is_implicit_return();
};
struct GlobalVarData final : Symbol {
enum {
flagReallyUsed = 1, // calculated via dfs from used functions; unused globals are not codegenerated
};
TypeExpr* declared_type;
int flags = 0;
GlobalVarData(std::string name, SrcLocation loc, TypeExpr* declared_type)
: Symbol(std::move(name), loc)
, declared_type(declared_type) {
}
bool is_really_used() const { return flags & flagReallyUsed; }
GlobalVarData* mutate() const { return const_cast<GlobalVarData*>(this); }
void assign_is_really_used();
};
struct GlobalConstData final : Symbol {
ConstantValue value;
TypeExpr* inferred_type;
GlobalConstData(std::string name, SrcLocation loc, ConstantValue&& value)
: Symbol(std::move(name), loc)
, value(std::move(value))
, inferred_type(TypeExpr::new_atomic(this->value.is_int() ? TypeExpr::_Int : TypeExpr::_Slice)) {
}
bool is_int_const() const { return value.is_int(); }
bool is_slice_const() const { return value.is_slice(); }
td::RefInt256 as_int_const() const { return value.as_int(); }
const std::string& as_slice_const() const { return value.as_slice(); }
};
class GlobalSymbolTable {
std::unordered_map<uint64_t, const Symbol*> entries;
static uint64_t key_hash(std::string_view name_key) {
return std::hash<std::string_view>{}(name_key);
}
public:
void add_function(const FunctionData* f_sym);
void add_global_var(const GlobalVarData* g_sym);
void add_global_const(const GlobalConstData* c_sym);
const Symbol* lookup(std::string_view name) const {
const auto it = entries.find(key_hash(name));
return it == entries.end() ? nullptr : it->second;
}
};
const Symbol* lookup_global_symbol(std::string_view name);
} // namespace tolk