mirror of
https://github.com/ton-blockchain/ton
synced 2025-03-09 15:40:10 +00:00
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner. This is a common approach for functional languages, where types are inferred from usage through unification. As a result, type declarations are not necessary: () f(a,b) { return a+b; } // a and b now int, since `+` (int, int) While this approach works for now, problems arise with the introduction of new types like bool, where `!x` must handle both int and bool. It will also become incompatible with int32 and other strict integers. This will clash with structure methods, struggle with proper generics, and become entirely impractical for union types. This PR completely rewrites the type system targeting the future. 1) type of any expression is inferred and never changed 2) this is available because dependent expressions already inferred 3) forall completely removed, generic functions introduced (they work like template functions actually, instantiated while inferring) 4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)` 5) `as` keyword, for example `t.tupleAt(0) as int` 6) methods binding is done along with type inferring, not before ("before", as worked previously, was always a wrong approach)
This commit is contained in:
parent
3540424aa1
commit
799e2d1265
101 changed files with 5402 additions and 2713 deletions
|
@ -17,7 +17,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "src-file.h"
|
||||
#include "type-expr.h"
|
||||
#include "fwd-declarations.h"
|
||||
#include "constant-evaluator.h"
|
||||
#include "crypto/common/refint.h"
|
||||
#include <unordered_map>
|
||||
|
@ -57,27 +57,31 @@ struct LocalVarData final : Symbol {
|
|||
flagImmutable = 2, // variable was declared via `val` (not `var`)
|
||||
};
|
||||
|
||||
TypeExpr* declared_type;
|
||||
int flags = 0;
|
||||
TypePtr declared_type; // either at declaration `var x:int`, or if omitted, from assigned value `var x=2`
|
||||
int flags;
|
||||
int idx;
|
||||
|
||||
LocalVarData(std::string name, SrcLocation loc, int idx, TypeExpr* declared_type)
|
||||
LocalVarData(std::string name, SrcLocation loc, TypePtr declared_type, int flags, int idx)
|
||||
: Symbol(std::move(name), loc)
|
||||
, declared_type(declared_type)
|
||||
, flags(flags)
|
||||
, idx(idx) {
|
||||
}
|
||||
|
||||
bool is_underscore() const { return name.empty(); }
|
||||
bool is_immutable() const { return flags & flagImmutable; }
|
||||
bool is_mutate_parameter() const { return flags & flagMutateParameter; }
|
||||
|
||||
LocalVarData* mutate() const { return const_cast<LocalVarData*>(this); }
|
||||
void assign_idx(int idx);
|
||||
void assign_resolved_type(TypePtr declared_type);
|
||||
void assign_inferred_type(TypePtr inferred_type);
|
||||
};
|
||||
|
||||
struct FunctionBodyCode;
|
||||
struct FunctionBodyAsm;
|
||||
struct FunctionBodyBuiltin;
|
||||
struct GenericsDeclaration;
|
||||
struct GenericsInstantiation;
|
||||
|
||||
typedef std::variant<
|
||||
FunctionBodyCode*,
|
||||
|
@ -91,7 +95,7 @@ struct FunctionData final : Symbol {
|
|||
enum {
|
||||
flagInline = 1, // marked `@inline`
|
||||
flagInlineRef = 2, // marked `@inline_ref`
|
||||
flagReallyUsed = 4, // calculated via dfs from used functions; declared but unused functions are not codegenerated
|
||||
flagTypeInferringDone = 4, // type inferring step of function's body (all AST nodes assigning v->inferred_type) is done
|
||||
flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.)
|
||||
flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out
|
||||
flagImplicitReturn = 32, // control flow reaches end of function, so it needs implicit return at the end
|
||||
|
@ -100,25 +104,36 @@ struct FunctionData final : Symbol {
|
|||
flagHasMutateParams = 256, // has parameters declared as `mutate`
|
||||
flagAcceptsSelf = 512, // is a member function (has `self` first parameter)
|
||||
flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable
|
||||
flagReallyUsed = 2048, // calculated via dfs from used functions; declared but unused functions are not codegenerated
|
||||
};
|
||||
|
||||
int method_id = EMPTY_METHOD_ID;
|
||||
int flags;
|
||||
TypeExpr* full_type; // currently, TypeExpr::_Map, probably wrapped with forall
|
||||
|
||||
std::vector<LocalVarData> parameters;
|
||||
std::vector<int> arg_order, ret_order;
|
||||
TypePtr declared_return_type; // may be nullptr, meaning "auto infer"
|
||||
TypePtr inferred_return_type = nullptr; // assigned on type inferring
|
||||
TypePtr inferred_full_type = nullptr; // assigned on type inferring, it's TypeDataFunCallable(params -> return)
|
||||
|
||||
const GenericsDeclaration* genericTs;
|
||||
const GenericsInstantiation* instantiationTs;
|
||||
FunctionBody body;
|
||||
AnyV ast_root; // V<ast_function_declaration> for user-defined (not builtin)
|
||||
|
||||
FunctionData(std::string name, SrcLocation loc, TypeExpr* full_type, std::vector<LocalVarData> parameters, int initial_flags, FunctionBody body)
|
||||
FunctionData(std::string name, SrcLocation loc, TypePtr declared_return_type, std::vector<LocalVarData> parameters, int initial_flags, const GenericsDeclaration* genericTs, const GenericsInstantiation* instantiationTs, FunctionBody body, AnyV ast_root)
|
||||
: Symbol(std::move(name), loc)
|
||||
, flags(initial_flags)
|
||||
, full_type(full_type)
|
||||
, parameters(std::move(parameters))
|
||||
, body(body) {
|
||||
, declared_return_type(declared_return_type)
|
||||
, genericTs(genericTs)
|
||||
, instantiationTs(instantiationTs)
|
||||
, body(body)
|
||||
, ast_root(ast_root) {
|
||||
}
|
||||
|
||||
std::string as_human_readable() const;
|
||||
|
||||
const std::vector<int>* get_arg_order() const {
|
||||
return arg_order.empty() ? nullptr : &arg_order;
|
||||
}
|
||||
|
@ -126,13 +141,19 @@ struct FunctionData final : Symbol {
|
|||
return ret_order.empty() ? nullptr : &ret_order;
|
||||
}
|
||||
|
||||
bool is_regular_function() const { return std::holds_alternative<FunctionBodyCode*>(body); }
|
||||
int get_num_params() const { return static_cast<int>(parameters.size()); }
|
||||
const LocalVarData& get_param(int idx) const { return parameters[idx]; }
|
||||
|
||||
bool is_code_function() const { return std::holds_alternative<FunctionBodyCode*>(body); }
|
||||
bool is_asm_function() const { return std::holds_alternative<FunctionBodyAsm*>(body); }
|
||||
bool is_builtin_function() const { return std::holds_alternative<FunctionBodyBuiltin*>(body); }
|
||||
bool is_builtin_function() const { return ast_root == nullptr; }
|
||||
|
||||
bool is_generic_function() const { return genericTs != nullptr; }
|
||||
bool is_instantiation_of_generic_function() const { return instantiationTs != nullptr; }
|
||||
|
||||
bool is_inline() const { return flags & flagInline; }
|
||||
bool is_inline_ref() const { return flags & flagInlineRef; }
|
||||
bool is_really_used() const { return flags & flagReallyUsed; }
|
||||
bool is_type_inferring_done() const { return flags & flagTypeInferringDone; }
|
||||
bool is_used_as_noncall() const { return flags & flagUsedAsNonCall; }
|
||||
bool is_marked_as_pure() const { return flags & flagMarkedAsPure; }
|
||||
bool is_implicit_return() const { return flags & flagImplicitReturn; }
|
||||
|
@ -143,13 +164,18 @@ struct FunctionData final : Symbol {
|
|||
bool does_accept_self() const { return flags & flagAcceptsSelf; }
|
||||
bool does_return_self() const { return flags & flagReturnsSelf; }
|
||||
bool does_mutate_self() const { return (flags & flagAcceptsSelf) && parameters[0].is_mutate_parameter(); }
|
||||
bool is_really_used() const { return flags & flagReallyUsed; }
|
||||
|
||||
bool does_need_codegen() const;
|
||||
|
||||
FunctionData* mutate() const { return const_cast<FunctionData*>(this); }
|
||||
void assign_is_really_used();
|
||||
void assign_resolved_type(TypePtr declared_return_type);
|
||||
void assign_inferred_type(TypePtr inferred_return_type, TypePtr inferred_full_type);
|
||||
void assign_is_used_as_noncall();
|
||||
void assign_is_implicit_return();
|
||||
void assign_is_type_inferring_done();
|
||||
void assign_is_really_used();
|
||||
void assign_arg_order(std::vector<int>&& arg_order);
|
||||
};
|
||||
|
||||
struct GlobalVarData final : Symbol {
|
||||
|
@ -157,10 +183,10 @@ struct GlobalVarData final : Symbol {
|
|||
flagReallyUsed = 1, // calculated via dfs from used functions; unused globals are not codegenerated
|
||||
};
|
||||
|
||||
TypeExpr* declared_type;
|
||||
TypePtr declared_type; // always exists, declaring globals without type is prohibited
|
||||
int flags = 0;
|
||||
|
||||
GlobalVarData(std::string name, SrcLocation loc, TypeExpr* declared_type)
|
||||
GlobalVarData(std::string name, SrcLocation loc, TypePtr declared_type)
|
||||
: Symbol(std::move(name), loc)
|
||||
, declared_type(declared_type) {
|
||||
}
|
||||
|
@ -168,17 +194,18 @@ struct GlobalVarData final : Symbol {
|
|||
bool is_really_used() const { return flags & flagReallyUsed; }
|
||||
|
||||
GlobalVarData* mutate() const { return const_cast<GlobalVarData*>(this); }
|
||||
void assign_resolved_type(TypePtr declared_type);
|
||||
void assign_is_really_used();
|
||||
};
|
||||
|
||||
struct GlobalConstData final : Symbol {
|
||||
ConstantValue value;
|
||||
TypeExpr* inferred_type;
|
||||
TypePtr declared_type; // may be nullptr
|
||||
|
||||
GlobalConstData(std::string name, SrcLocation loc, ConstantValue&& value)
|
||||
GlobalConstData(std::string name, SrcLocation loc, TypePtr declared_type, ConstantValue&& value)
|
||||
: Symbol(std::move(name), loc)
|
||||
, value(std::move(value))
|
||||
, inferred_type(TypeExpr::new_atomic(this->value.is_int() ? TypeExpr::_Int : TypeExpr::_Slice)) {
|
||||
, declared_type(declared_type) {
|
||||
}
|
||||
|
||||
bool is_int_const() const { return value.is_int(); }
|
||||
|
@ -186,6 +213,9 @@ struct GlobalConstData final : Symbol {
|
|||
|
||||
td::RefInt256 as_int_const() const { return value.as_int(); }
|
||||
const std::string& as_slice_const() const { return value.as_slice(); }
|
||||
|
||||
GlobalConstData* mutate() const { return const_cast<GlobalConstData*>(this); }
|
||||
void assign_resolved_type(TypePtr declared_type);
|
||||
};
|
||||
|
||||
class GlobalSymbolTable {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue