1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

[Tolk] Support syntax tensorVar.0 and tupleVar.0

It works both for reading and writing:
> var t = (1, 2);
> t.0;      // 1
> t.0 = 5;
> t;        // (5, 2)

It also works for typed/untyped tuples, producing INDEX and SETINDEX.

Global tensors and tuples works. Nesting `t.0.1.2` works. `mutate` works.
Even mixing tuples inside tensors inside a global for writing works.
This commit is contained in:
tolk-vm 2025-01-27 10:29:17 +03:00
parent 565bc59735
commit 7a1602f591
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
42 changed files with 1119 additions and 338 deletions

View file

@ -26,22 +26,28 @@ namespace tolk {
*
*/
void TmpVar::dump(std::ostream& os) const {
show(os);
os << " : " << v_type << " (width ";
os << v_type->calc_width_on_stack();
os << ")";
os << std::endl;
void TmpVar::show_as_stack_comment(std::ostream& os) const {
if (!name.empty()) {
os << name;
} else {
os << '\'' << ir_idx;
}
#ifdef TOLK_DEBUG
// uncomment for detailed stack output, like `'15(binary-op) '16(glob-var)`
// if (desc) os << desc;
#endif
}
void TmpVar::show(std::ostream& os, int omit_idx) const {
if (v_sym) {
os << v_sym->name;
if (omit_idx >= 2) {
return;
}
void TmpVar::show(std::ostream& os) const {
os << '\'' << ir_idx; // vars are printed out as `'1 '2` (in stack comments, debug info, etc.)
if (!name.empty()) {
os << '_' << name;
}
os << '_' << ir_idx;
#ifdef TOLK_DEBUG
if (desc) {
os << ' ' << desc; // "origin" of implicitly created tmp var, like `'15 (binary-op) '16 (glob-var)`
}
#endif
}
std::ostream& operator<<(std::ostream& os, const TmpVar& var) {
@ -95,7 +101,7 @@ void VarDescr::show(std::ostream& os, const char* name) const {
if (name) {
os << name;
}
os << '_' << idx;
os << '\'' << idx;
show_value(os);
}
@ -333,7 +339,7 @@ void Op::show_var_list(std::ostream& os, const std::vector<var_idx_t>& idx_list,
} else {
os << "(" << vars.at(idx_list[0]);
for (std::size_t i = 1; i < idx_list.size(); i++) {
os << "," << vars.at(idx_list[i]);
os << ", " << vars.at(idx_list[i]);
}
os << ")";
}
@ -378,11 +384,12 @@ void CodeBlob::print(std::ostream& os, int flags) const {
os << "CODE BLOB: " << var_cnt << " variables, " << in_var_cnt << " input\n";
if ((flags & 8) != 0) {
for (const auto& var : vars) {
var.dump(os);
if (var.where.is_defined() && (flags & 1) != 0) {
var.where.show(os);
var.show(os);
os << " : " << var.v_type << std::endl;
if (var.loc.is_defined() && (flags & 1) != 0) {
var.loc.show(os);
os << " defined here:\n";
var.where.show_context(os);
var.loc.show_context(os);
}
}
}
@ -393,21 +400,25 @@ void CodeBlob::print(std::ostream& os, int flags) const {
os << "-------- END ---------\n\n";
}
std::vector<var_idx_t> CodeBlob::create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc) {
std::vector<var_idx_t> CodeBlob::create_var(TypePtr var_type, SrcLocation loc, std::string name) {
std::vector<var_idx_t> ir_idx;
ir_idx.reserve(var_type->calc_width_on_stack());
int stack_w = var_type->calc_width_on_stack();
ir_idx.reserve(stack_w);
if (const TypeDataTensor* t_tensor = var_type->try_as<TypeDataTensor>()) {
for (TypePtr item : t_tensor->items) {
std::vector<var_idx_t> nested = create_var(item, v_sym, loc);
for (int i = 0; i < t_tensor->size(); ++i) {
std::string sub_name = name.empty() ? name : name + "." + std::to_string(i);
std::vector<var_idx_t> nested = create_var(t_tensor->items[i], loc, std::move(sub_name));
ir_idx.insert(ir_idx.end(), nested.begin(), nested.end());
}
} else if (var_type != TypeDataVoid::create()) {
tolk_assert(var_type->calc_width_on_stack() == 1);
vars.emplace_back(var_cnt, var_type, v_sym, loc);
#ifdef TOLK_DEBUG
tolk_assert(stack_w == 1);
#endif
vars.emplace_back(var_cnt, var_type, std::move(name), loc);
ir_idx.emplace_back(var_cnt);
var_cnt++;
}
tolk_assert(static_cast<int>(ir_idx.size()) == var_type->calc_width_on_stack());
tolk_assert(static_cast<int>(ir_idx.size()) == stack_w);
return ir_idx;
}

View file

@ -302,24 +302,13 @@ Const AsmOpList::get_const(const_idx_t idx) {
}
}
void AsmOpList::show_var(std::ostream& os, var_idx_t idx) const {
if (!var_names_ || (unsigned)idx >= var_names_->size()) {
os << '_' << idx;
} else {
var_names_->at(idx).show(os, 2);
}
}
void AsmOpList::show_var_ext(std::ostream& os, std::pair<var_idx_t, const_idx_t> idx_pair) const {
auto i = idx_pair.first;
auto j = idx_pair.second;
var_idx_t i = idx_pair.first;
const_idx_t j = idx_pair.second;
if (!var_names_ || (unsigned)i >= var_names_->size()) {
os << '_' << i;
os << '\'' << i;
} else {
var_names_->at(i).show(os, 2);
// if (!var_names_->at(i).v_type->is_int()) {
// os << '<'; var_names_->at(i).v_type->print(os); os << '>';
// }
var_names_->at(i).show_as_stack_comment(os);
}
if ((unsigned)j < constants_.size() && constants_[j].not_null()) {
os << '=' << constants_[j];

View file

@ -405,12 +405,15 @@ static AnyExprV parse_expr80(Lexer& lex) {
lex.next();
V<ast_identifier> v_ident = nullptr;
V<ast_instantiationT_list> v_instantiationTs = nullptr;
if (lex.tok() == tok_identifier) {
if (lex.tok() == tok_identifier) { // obj.field / obj.method
v_ident = createV<ast_identifier>(lex.cur_location(), lex.cur_str());
lex.next();
if (lex.tok() == tok_lt) {
v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex);
}
} else if (lex.tok() == tok_int_const) { // obj.0 (indexed access)
v_ident = createV<ast_identifier>(lex.cur_location(), lex.cur_str());
lex.next();
} else {
lex.unexpected("method name");
}

View file

@ -529,8 +529,14 @@ private:
public:
typedef const FunctionData* DotTarget; // for `t.tupleAt` target is `tupleAt` global function
DotTarget target = nullptr; // filled at type inferring
typedef std::variant<
const FunctionData*, // for `t.tupleAt` target is `tupleAt` global function
int // for `t.0` target is "indexed access" 0
> DotTarget;
DotTarget target = static_cast<FunctionData*>(nullptr); // filled at type inferring
bool is_target_fun_ref() const { return std::holds_alternative<const FunctionData*>(target); }
bool is_target_indexed_access() const { return std::holds_alternative<int>(target); }
AnyExprV get_obj() const { return child; }
auto get_identifier() const { return identifier; }

View file

@ -1060,6 +1060,17 @@ AsmOp compile_tuple_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args,
return exec_op("INDEXVAR", 2, 1);
}
// fun tupleSetAt<X>(mutate self: tuple, value: X, index: int): void asm "SETINDEXVAR";
AsmOp compile_tuple_set_at(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(args.size() == 3 && res.size() == 1);
auto& y = args[2];
if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) {
y.unused();
return exec_arg_op("SETINDEX", y.int_const, 1, 1);
}
return exec_op("SETINDEXVAR", 2, 1);
}
// fun __isNull<X>(X arg): bool
AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, SrcLocation) {
tolk_assert(args.size() == 1 && res.size() == 1);
@ -1246,6 +1257,9 @@ void define_builtins() {
define_builtin_func("tupleAt", {Tuple, Int}, typeT, declGenericT,
compile_tuple_at,
FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf);
define_builtin_func("tupleSetAt", {Tuple, typeT, Int}, Unit, declGenericT,
compile_tuple_set_at,
FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf);
define_builtin_func("debugPrint", {typeT}, Unit, declGenericT,
AsmOp::Custom("s0 DUMP DROP", 1, 1),
0);

View file

@ -132,7 +132,7 @@ int Stack::drop_vars_except(const VarDescrList& var_info, int excl_var) {
return dropped;
}
void Stack::show(int flags) {
void Stack::show() {
std::ostringstream os;
for (auto i : s) {
os << ' ';

View file

@ -21,6 +21,7 @@
#include "type-system.h"
#include "common/refint.h"
#include "constant-evaluator.h"
#include <unordered_set>
/*
* This pipe is the last one operating AST: it transforms AST to IR.
@ -28,38 +29,218 @@
* kernel (initially forked from FunC) comes into play.
* Up to this point, all types have been inferred, all validity checks have been passed, etc.
* All properties in AST nodes are assigned and can be safely used (fun_ref, etc.).
* So, if execution reaches this pass, the input is correct, and code generation should succeed.
* So, if execution reaches this pass, the input is (almost) correct, and code generation should succeed.
* The only thing additionally checked during this pass is tricky lvalue, like one and the same variable
* assigned/mutated multiple times in same expression, e.g. `(t.0, t.0) = rhs` / `f(mutate x.1.2, mutate x)`.
*/
namespace tolk {
struct LValGlobs {
std::vector<std::pair<const GlobalVarData*, std::vector<var_idx_t>>> globs;
// fire error on cases like `(a, a) = rhs` / `f(mutate t.1.0, mutate t.1.0)`
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_variable_modified_twice_inside_same_expression(SrcLocation loc) {
throw ParseError(loc, "one variable modified twice inside the same expression");
}
void add_modified_glob(const GlobalVarData* g_sym, std::vector<var_idx_t> local_ir_idx) {
globs.emplace_back(g_sym, std::move(local_ir_idx));
// fire error on cases like `(m.1.0, m.1) = rhs` (m.1 inside m.1.0 is "rval inside lval")
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_variable_modified_and_read_inside_same_expression(SrcLocation loc) {
throw ParseError(loc, "one variable both modified and read inside the same expression");
}
// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable
// exists, but on its change, something non-trivial should happen.
// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6`
// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)`
// Of course, mixing globals with tuples should also be supported.
// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue".
// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global.
// A challenging thing is handling "unique" parts, to be read/updated only once.
// Example: `f(mutate globalTensor.0, mutate globalTensor.1)`, then globalTensor should be read/written once.
// Example: `(t.0.0, t.0.1) = rhs` (m is [[int, int]]), then t.0 should be read/updated once.
// Solving this by calculating hashes of every lvalue or rvalue inside lvalue automatically gives an ability
// to detect and fire "multiple writes inside expression", like `(a, a) = rhs` / `[t.0, (t.0.1, c)] = rhs`.
// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global).
class LValContext {
// every global variable used as lvalue is registered here
// example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after
// global tensors are stored as tuples (unpacked on reading, packed on writing), then multiple tmp vars are created
struct ModifiedGlob {
const GlobalVarData* glob_ref;
std::vector<var_idx_t> local_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors)
void apply(CodeBlob& code, SrcLocation loc) const {
Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector<var_idx_t>{}, local_ir_idx, glob_ref);
op.set_impure_flag();
}
};
// every tuple index used as lvalue is registered here
// example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after
// for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times
struct ModifiedTupleIndex {
uint64_t hash;
var_idx_t tuple_ir_idx;
var_idx_t index_ir_idx;
var_idx_t field_ir_idx;
void apply(CodeBlob& code, SrcLocation loc) const {
const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as<FunctionData>();
code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx, field_ir_idx, index_ir_idx}, builtin_sym);
}
};
int level_rval_inside_lval = 0;
std::vector<std::variant<ModifiedGlob, ModifiedTupleIndex>> modifications;
std::unordered_set<uint64_t> all_modified_hashes;
void fire_if_one_variable_modified_twice(SrcLocation loc, uint64_t modified_hash) {
if (!is_rval_inside_lval()) {
if (!all_modified_hashes.insert(modified_hash).second) {
fire_error_variable_modified_twice_inside_same_expression(loc);
}
if (all_modified_hashes.contains(~modified_hash)) {
fire_error_variable_modified_and_read_inside_same_expression(loc);
}
} else {
all_modified_hashes.insert(~modified_hash);
if (all_modified_hashes.contains(modified_hash)) {
fire_error_variable_modified_and_read_inside_same_expression(loc);
}
}
}
void gen_ops_set_globs(CodeBlob& code, SrcLocation loc) const {
for (const auto& [g_sym, ir_idx] : globs) {
Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector<var_idx_t>{}, ir_idx, g_sym);
op.set_impure_flag();
public:
void enter_rval_inside_lval() { level_rval_inside_lval++; }
void exit_rval_inside_lval() { level_rval_inside_lval--; }
bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; }
uint64_t register_lval(SrcLocation loc, const LocalVarData* var_ref) {
uint64_t hash = reinterpret_cast<uint64_t>(var_ref);
fire_if_one_variable_modified_twice(loc, hash);
return hash;
}
uint64_t register_lval(SrcLocation loc, const GlobalVarData* glob_ref) {
uint64_t hash = reinterpret_cast<uint64_t>(glob_ref);
fire_if_one_variable_modified_twice(loc, hash);
return hash;
}
uint64_t register_lval(SrcLocation loc, V<ast_dot_access> v) {
uint64_t hash = 7;
AnyExprV leftmost_obj = v;
while (auto v_dot = leftmost_obj->try_as<ast_dot_access>()) {
if (!v_dot->is_target_indexed_access()) {
break;
}
hash = hash * 1915239017 + std::get<int>(v_dot->target);
leftmost_obj = v_dot->get_obj();
}
if (auto v_ref = leftmost_obj->try_as<ast_reference>()) {
hash *= reinterpret_cast<uint64_t>(v_ref->sym); // `v.0` and `v.0` in 2 places is the same
} else {
hash *= reinterpret_cast<uint64_t>(leftmost_obj); // unlike `f().0` and `f().0` (pointers to AST nodes differ)
}
fire_if_one_variable_modified_twice(loc, hash);
return hash;
}
const std::vector<var_idx_t>* exists_already_known_global(const GlobalVarData* glob_ref) const {
for (const auto& m : modifications) {
if (const auto* m_glob = std::get_if<ModifiedGlob>(&m); m_glob && m_glob->glob_ref == glob_ref) {
return &m_glob->local_ir_idx;
}
}
return nullptr;
}
const var_idx_t* exists_already_known_tuple_index(uint64_t hash) const {
for (const auto& m : modifications) {
if (const auto* m_tup = std::get_if<ModifiedTupleIndex>(&m); m_tup && m_tup->hash == hash) {
return &m_tup->field_ir_idx;
}
}
return nullptr;
}
void register_modified_global(const GlobalVarData* glob_ref, std::vector<var_idx_t> local_ir_idx) {
modifications.emplace_back(ModifiedGlob{glob_ref, std::move(local_ir_idx)});
}
void register_modified_tuple_index(uint64_t hash, var_idx_t tuple_ir_idx, var_idx_t index_ir_idx, var_idx_t field_ir_idx) {
modifications.emplace_back(ModifiedTupleIndex{hash, tuple_ir_idx, index_ir_idx, field_ir_idx});
}
void gen_ops_if_nonempty(CodeBlob& code, SrcLocation loc) const {
for (auto it = modifications.rbegin(); it != modifications.rend(); ++it) { // reverse, it's important
if (const auto* m_glob = std::get_if<ModifiedGlob>(&*it)) {
m_glob->apply(code, loc);
} else if (const auto* m_tup = std::get_if<ModifiedTupleIndex>(&*it)) {
m_tup->apply(code, loc);
}
}
}
};
std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs = nullptr);
// The goal of VarsModificationWatcher is to detect such cases: `return (x, x += y, x)`.
// Without any changes, ops will be { _Call $2 = +($0_x, $1_y); _Return $0_x, $2, $0_x } - incorrect
// Correct will be to introduce tmp var: { _Let $3 = $0_x; _Call $2 = ...; _Return $3, $2, $0_x }
// This "introducing" is done when compiling tensors, whereas this class allows to watch vars for modification.
class VarsModificationWatcher {
struct WatchedVar {
var_idx_t ir_idx;
std::function<void(SrcLocation, var_idx_t)> on_modification_callback;
WatchedVar(var_idx_t ir_idx, std::function<void(SrcLocation, var_idx_t)> on_modification_callback)
: ir_idx(ir_idx), on_modification_callback(std::move(on_modification_callback)) {}
};
std::vector<WatchedVar> all_callbacks;
public:
bool empty() const { return all_callbacks.empty(); }
void push_callback(var_idx_t ir_idx, std::function<void(SrcLocation, var_idx_t)> callback) {
all_callbacks.emplace_back(ir_idx, std::move(callback));
}
void pop_callback(var_idx_t ir_idx) {
for (auto it = all_callbacks.rbegin(); it != all_callbacks.rend(); ++it) {
if (it->ir_idx == ir_idx) {
all_callbacks.erase((it + 1).base());
return;
}
}
tolk_assert(false);
}
void trigger_callbacks(const std::vector<var_idx_t>& left_lval_indices, SrcLocation loc) const {
for (const WatchedVar& w : all_callbacks) {
for (var_idx_t changed_var : left_lval_indices) {
if (w.ir_idx == changed_var) {
w.on_modification_callback(loc, w.ir_idx);
}
}
}
}
};
static VarsModificationWatcher vars_modification_watcher;
std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr);
void process_any_statement(AnyV v, CodeBlob& code);
static std::vector<std::vector<var_idx_t>> pre_compile_tensor_inner(CodeBlob& code, const std::vector<AnyExprV>& args,
LValGlobs* lval_globs) {
LValContext* lval_ctx) {
const int n = static_cast<int>(args.size());
if (n == 0) { // just `()`
return {};
}
if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments
return {pre_compile_expr(args[0], code, lval_globs)};
return {pre_compile_expr(args[0], code, lval_ctx)};
}
// the purpose is to handle such cases: `return (x, x += y, x)`
@ -81,9 +262,9 @@ static std::vector<std::vector<var_idx_t>> pre_compile_tensor_inner(CodeBlob& co
void add_and_watch_modifications(std::vector<var_idx_t>&& vars_of_ith_arg, CodeBlob& code) {
for (var_idx_t ir_idx : vars_of_ith_arg) {
if (code.vars[ir_idx].v_sym && !is_watched(ir_idx)) {
if (!code.vars[ir_idx].name.empty() && !is_watched(ir_idx)) {
watched_vars.emplace_back(ir_idx);
code.vars[ir_idx].on_modification.emplace_back([this, &code, ir_idx](SrcLocation loc) {
vars_modification_watcher.push_callback(ir_idx, [this, &code](SrcLocation loc, var_idx_t ir_idx) {
on_var_modified(ir_idx, loc, code);
});
}
@ -93,7 +274,7 @@ static std::vector<std::vector<var_idx_t>> pre_compile_tensor_inner(CodeBlob& co
void on_var_modified(var_idx_t ir_idx, SrcLocation loc, CodeBlob& code) {
tolk_assert(is_watched(ir_idx));
std::vector<var_idx_t> tmp_idx_arr = code.create_tmp_var(code.vars[ir_idx].v_type, loc);
std::vector<var_idx_t> tmp_idx_arr = code.create_tmp_var(code.vars[ir_idx].v_type, loc, "(pre-modified)");
tolk_assert(tmp_idx_arr.size() == 1);
var_idx_t tmp_idx = tmp_idx_arr[0];
code.emplace_back(loc, Op::_Let, std::vector{tmp_idx}, std::vector{ir_idx});
@ -102,9 +283,9 @@ static std::vector<std::vector<var_idx_t>> pre_compile_tensor_inner(CodeBlob& co
}
}
std::vector<std::vector<var_idx_t>> clear_and_stop_watching(CodeBlob& code) {
std::vector<std::vector<var_idx_t>> clear_and_stop_watching() {
for (var_idx_t ir_idx : watched_vars) {
code.vars[ir_idx].on_modification.pop_back();
vars_modification_watcher.pop_callback(ir_idx);
}
watched_vars.clear();
return std::move(res_lists);
@ -113,15 +294,15 @@ static std::vector<std::vector<var_idx_t>> pre_compile_tensor_inner(CodeBlob& co
WatchingVarList watched_vars(n);
for (int arg_idx = 0; arg_idx < n; ++arg_idx) {
std::vector<var_idx_t> vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, lval_globs);
std::vector<var_idx_t> vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, lval_ctx);
watched_vars.add_and_watch_modifications(std::move(vars_of_ith_arg), code);
}
return watched_vars.clear_and_stop_watching(code);
return watched_vars.clear_and_stop_watching();
}
static std::vector<var_idx_t> pre_compile_tensor(CodeBlob& code, const std::vector<AnyExprV>& args,
LValGlobs* lval_globs = nullptr) {
std::vector<std::vector<var_idx_t>> res_lists = pre_compile_tensor_inner(code, args, lval_globs);
LValContext* lval_ctx = nullptr) {
std::vector<std::vector<var_idx_t>> res_lists = pre_compile_tensor_inner(code, args, lval_ctx);
std::vector<var_idx_t> res;
for (const std::vector<var_idx_t>& list : res_lists) {
res.insert(res.end(), list.cbegin(), list.cend());
@ -133,11 +314,11 @@ static std::vector<var_idx_t> pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE
// [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs"
if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) {
std::vector<var_idx_t> right = pre_compile_tensor(code, rhs->as<ast_typed_tuple>()->get_items());
LValGlobs globs;
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_typed_tuple>()->get_items(), &globs);
code.on_var_modification(left, loc);
LValContext local_lval;
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_typed_tuple>()->get_items(), &local_lval);
vars_modification_watcher.trigger_callbacks(left, loc);
code.emplace_back(loc, Op::_Let, std::move(left), right);
globs.gen_ops_set_globs(code, loc);
local_lval.gen_ops_if_nonempty(code, loc);
return right;
}
// [lhs] = rhs; it's un-tuple to N left vars
@ -145,29 +326,37 @@ static std::vector<var_idx_t> pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE
std::vector<var_idx_t> right = pre_compile_expr(rhs, code);
const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as<TypeDataTypedTuple>();
std::vector<TypePtr> types_list = inferred_tuple->items;
std::vector<var_idx_t> rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc, "(unpack-tuple)");
code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right));
LValGlobs globs;
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_typed_tuple>()->get_items(), &globs);
code.on_var_modification(left, loc);
LValContext local_lval;
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_typed_tuple>()->get_items(), &local_lval);
vars_modification_watcher.trigger_callbacks(left, loc);
code.emplace_back(loc, Op::_Let, std::move(left), rvect);
globs.gen_ops_set_globs(code, loc);
local_lval.gen_ops_if_nonempty(code, loc);
return rvect;
}
// small optimization: `var x = rhs` or `local_var = rhs` (90% cases), LValContext not needed actually
if (lhs->type == ast_local_var_lhs || (lhs->type == ast_reference && lhs->as<ast_reference>()->sym->try_as<LocalVarData>())) {
std::vector<var_idx_t> right = pre_compile_expr(rhs, code);
std::vector<var_idx_t> left = pre_compile_expr(lhs, code); // effectively, local_var->ir_idx
vars_modification_watcher.trigger_callbacks(left, loc);
code.emplace_back(loc, Op::_Let, std::move(left), right);
return right;
}
// lhs = rhs
std::vector<var_idx_t> right = pre_compile_expr(rhs, code);
LValGlobs globs;
std::vector<var_idx_t> left = pre_compile_expr(lhs, code, &globs);
code.on_var_modification(left, loc);
LValContext local_lval;
std::vector<var_idx_t> left = pre_compile_expr(lhs, code, &local_lval);
vars_modification_watcher.trigger_callbacks(left, loc);
code.emplace_back(loc, Op::_Let, std::move(left), right);
globs.gen_ops_set_globs(code, loc);
local_lval.gen_ops_if_nonempty(code, loc);
return right;
}
static std::vector<var_idx_t> gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation here,
std::vector<var_idx_t>&& args_vars, const FunctionData* fun_ref) {
std::vector<var_idx_t> rvect = code.create_tmp_var(ret_type, here);
Op& op = code.emplace_back(here, Op::_Call, rvect, std::move(args_vars), fun_ref);
static std::vector<var_idx_t> gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation loc,
std::vector<var_idx_t>&& args_vars, const FunctionData* fun_ref, const char* debug_desc) {
std::vector<var_idx_t> rvect = code.create_tmp_var(ret_type, loc, debug_desc);
Op& op = code.emplace_back(loc, Op::_Call, rvect, std::move(args_vars), fun_ref);
if (!fun_ref->is_marked_as_pure()) {
op.set_impure_flag();
}
@ -175,30 +364,42 @@ static std::vector<var_idx_t> gen_op_call(CodeBlob& code, TypePtr ret_type, SrcL
}
static std::vector<var_idx_t> process_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValGlobs* lval_globs) {
static std::vector<var_idx_t> pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) {
if (const auto* glob_ref = sym->try_as<GlobalVarData>()) {
std::vector<var_idx_t> rvect = code.create_tmp_var(glob_ref->declared_type, loc);
if (lval_globs) {
lval_globs->add_modified_glob(glob_ref, rvect);
return rvect;
if (!lval_ctx) {
// `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it
// note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type
std::vector<var_idx_t> local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)");
code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector<var_idx_t>{}, glob_ref);
return local_ir_idx;
} else {
code.emplace_back(loc, Op::_GlobVar, rvect, std::vector<var_idx_t>{}, glob_ref);
return rvect;
// `globalVar = rhs` / `mutate globalVar` / `globalTuple.0 = rhs`
lval_ctx->register_lval(loc, glob_ref);
if (const std::vector<var_idx_t>* local_ir_idx = lval_ctx->exists_already_known_global(glob_ref)) {
return *local_ir_idx; // `f(mutate g.0, mutate g.1)`, then g will be read only once
}
std::vector<var_idx_t> local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)");
if (lval_ctx->is_rval_inside_lval()) { // for `globalVar.0` "globalVar" is rvalue inside lvalue
// for `globalVar = rhs` don't read a global actually, but for `globalVar.0 = rhs` do
code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector<var_idx_t>{}, glob_ref);
}
lval_ctx->register_modified_global(glob_ref, local_ir_idx);
return local_ir_idx;
}
}
if (const auto* const_ref = sym->try_as<GlobalConstData>()) {
if (const_ref->is_int_const()) {
std::vector<var_idx_t> rvect = code.create_tmp_var(TypeDataInt::create(), loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(TypeDataInt::create(), loc, "(glob-const)");
code.emplace_back(loc, Op::_IntConst, rvect, const_ref->as_int_const());
return rvect;
} else {
std::vector<var_idx_t> rvect = code.create_tmp_var(TypeDataSlice::create(), loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(TypeDataSlice::create(), loc, "(glob-const)");
code.emplace_back(loc, Op::_SliceConst, rvect, const_ref->as_slice_const());
return rvect;
}
}
if (const auto* fun_ref = sym->try_as<FunctionData>()) {
std::vector<var_idx_t> rvect = code.create_tmp_var(fun_ref->inferred_full_type, loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(fun_ref->inferred_full_type, loc, "(glob-var-fun)");
code.emplace_back(loc, Op::_GlobVar, rvect, std::vector<var_idx_t>{}, fun_ref);
return rvect;
}
@ -206,9 +407,12 @@ static std::vector<var_idx_t> process_symbol(SrcLocation loc, const Symbol* sym,
#ifdef TOLK_DEBUG
tolk_assert(static_cast<int>(var_ref->ir_idx.size()) == var_ref->declared_type->calc_width_on_stack());
#endif
if (lval_ctx) {
lval_ctx->register_lval(loc, var_ref);
}
return var_ref->ir_idx;
}
throw Fatal("process_symbol");
throw Fatal("pre_compile_symbol");
}
static std::vector<var_idx_t> process_assign(V<ast_assign> v, CodeBlob& code) {
@ -234,7 +438,7 @@ static std::vector<var_idx_t> process_binary_operator(V<ast_binary_operator> v,
if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()});
return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref);
return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(binary-op)");
}
if (t == tok_logical_and || t == tok_logical_or) {
// do the following transformations:
@ -249,7 +453,7 @@ static std::vector<var_idx_t> process_binary_operator(V<ast_binary_operator> v,
v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as<FunctionData>());
std::vector<var_idx_t> cond = pre_compile_expr(v->get_lhs(), code);
tolk_assert(cond.size() == 1);
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)");
Op& if_op = code.emplace_back(v->loc, Op::_If, cond);
code.push_set_cur(if_op.block0);
code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code));
@ -265,13 +469,13 @@ static std::vector<var_idx_t> process_binary_operator(V<ast_binary_operator> v,
static std::vector<var_idx_t> process_unary_operator(V<ast_unary_operator> v, CodeBlob& code) {
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, {v->get_rhs()});
return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref);
return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(unary-op)");
}
static std::vector<var_idx_t> process_ternary_operator(V<ast_ternary_operator> v, CodeBlob& code) {
std::vector<var_idx_t> cond = pre_compile_expr(v->get_cond(), code);
tolk_assert(cond.size() == 1);
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)");
Op& if_op = code.emplace_back(v->loc, Op::_If, cond);
code.push_set_cur(if_op.block0);
code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code));
@ -282,13 +486,67 @@ static std::vector<var_idx_t> process_ternary_operator(V<ast_ternary_operator> v
return rvect;
}
static std::vector<var_idx_t> process_dot_access(V<ast_dot_access> v, CodeBlob& code, LValGlobs* lval_globs) {
static std::vector<var_idx_t> process_dot_access(V<ast_dot_access> v, CodeBlob& code, LValContext* lval_ctx) {
// it's NOT a method call `t.tupleSize()` (since such cases are handled by process_function_call)
// it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call)
// currently, nothing except a global function can be a target of dot access
const FunctionData* fun_ref = v->target;
if (!v->is_target_fun_ref()) {
TypePtr obj_type = v->get_obj()->inferred_type;
int index_at = std::get<int>(v->target);
// `tensorVar.0`; since a tensor of N elems are N vars on a stack actually, calculate offset
if (const auto* t_tensor = obj_type->try_as<TypeDataTensor>()) {
if (lval_ctx) lval_ctx->register_lval(v->loc, v);
if (lval_ctx) lval_ctx->enter_rval_inside_lval();
std::vector<var_idx_t> lhs_vars = pre_compile_expr(v->get_obj(), code, lval_ctx);
if (lval_ctx) lval_ctx->exit_rval_inside_lval();
int stack_width = t_tensor->items[index_at]->calc_width_on_stack();
int stack_offset = 0;
for (int i = 0; i < index_at; ++i) {
stack_offset += t_tensor->items[i]->calc_width_on_stack();
}
return {lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width};
}
// `tupleVar.0`; not to mess up, separate rvalue and lvalue cases
if (obj_type->try_as<TypeDataTypedTuple>() || obj_type->try_as<TypeDataTuple>()) {
if (!lval_ctx) {
// `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars
std::vector<var_idx_t> tuple_ir_idx = pre_compile_expr(v->get_obj(), code);
std::vector<var_idx_t> index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)");
code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at));
std::vector<var_idx_t> field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)");
tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values
const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as<FunctionData>();
code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym);
return field_ir_idx;
} else {
// `tupleVar.0 = rhs`: finally "tupleSetAt(tupleVar, rhs, 0)" will be done
uint64_t hash = lval_ctx->register_lval(v->loc, v);
if (const var_idx_t* field_ir_idx = lval_ctx->exists_already_known_tuple_index(hash)) {
return {*field_ir_idx}; // `(t.0.0, t.0.1) = rhs`, then "t.0" will be read (tupleAt) once
}
lval_ctx->enter_rval_inside_lval();
std::vector<var_idx_t> tuple_ir_idx = pre_compile_expr(v->get_obj(), code, lval_ctx);
lval_ctx->exit_rval_inside_lval();
std::vector<var_idx_t> index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)");
code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at));
std::vector<var_idx_t> field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)");
if (lval_ctx->is_rval_inside_lval()) { // for `t.0.1 = rhs` "t.0" is rvalue inside lvalue
// for `t.0 = rhs` don't call tupleAt, but for `t.0.1 = rhs` do for t.0 (still don't for t.0.1)
const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as<FunctionData>();
code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym);
}
lval_ctx->register_modified_tuple_index(hash, tuple_ir_idx[0], index_ir_idx[0], field_ir_idx[0]);
vars_modification_watcher.trigger_callbacks(tuple_ir_idx, v->loc);
return field_ir_idx;
}
}
tolk_assert(false);
}
// okay, v->target refs a function, like `obj.method`, filled at type inferring
// (currently, nothing except a global function can be referenced, no object-scope methods exist)
const FunctionData* fun_ref = std::get<const FunctionData*>(v->target);
tolk_assert(fun_ref);
return process_symbol(v->loc, fun_ref, code, lval_globs);
return pre_compile_symbol(v->loc, fun_ref, code, lval_ctx);
}
static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, CodeBlob& code) {
@ -304,7 +562,7 @@ static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, Code
std::vector<var_idx_t> tfunc = pre_compile_expr(v->get_callee(), code);
tolk_assert(tfunc.size() == 1);
args_vars.push_back(tfunc[0]);
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc, "(call-ind)");
Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars));
op.set_impure_flag();
return rvect;
@ -349,28 +607,28 @@ static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, Code
for (const std::vector<var_idx_t>& list : vars_per_arg) {
args_vars.insert(args_vars.end(), list.cbegin(), list.cend());
}
std::vector<var_idx_t> rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref);
std::vector<var_idx_t> rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref, "(fun-call)");
if (fun_ref->has_mutate_params()) {
LValGlobs local_globs;
LValContext local_lval;
std::vector<var_idx_t> left;
for (int i = 0; i < delta_self + v->get_num_args(); ++i) {
if (fun_ref->parameters[i].is_mutate_parameter()) {
AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[i];
tolk_assert(arg_i->is_lvalue || i == 0);
if (arg_i->is_lvalue) {
std::vector<var_idx_t> ith_var_idx = pre_compile_expr(arg_i, code, &local_globs);
std::vector<var_idx_t> ith_var_idx = pre_compile_expr(arg_i, code, &local_lval);
left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end());
} else {
left.insert(left.end(), vars_per_arg[0].begin(), vars_per_arg[0].end());
}
}
}
std::vector<var_idx_t> rvect = code.create_tmp_var(real_ret_type, v->loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(real_ret_type, v->loc, "(fun-call)");
left.insert(left.end(), rvect.begin(), rvect.end());
code.on_var_modification(left, v->loc);
vars_modification_watcher.trigger_callbacks(left, v->loc);
code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply);
local_globs.gen_ops_set_globs(code, v->loc);
local_lval.gen_ops_if_nonempty(code, v->loc);
rvect_apply = rvect;
}
@ -385,29 +643,29 @@ static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, Code
return rvect_apply;
}
static std::vector<var_idx_t> process_tensor(V<ast_tensor> v, CodeBlob& code, LValGlobs* lval_globs) {
return pre_compile_tensor(code, v->get_items(), lval_globs);
static std::vector<var_idx_t> process_tensor(V<ast_tensor> v, CodeBlob& code, LValContext* lval_ctx) {
return pre_compile_tensor(code, v->get_items(), lval_ctx);
}
static std::vector<var_idx_t> process_typed_tuple(V<ast_typed_tuple> v, CodeBlob& code, LValGlobs* lval_globs) {
if (lval_globs) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work
static std::vector<var_idx_t> process_typed_tuple(V<ast_typed_tuple> v, CodeBlob& code, LValContext* lval_ctx) {
if (lval_ctx) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work
v->error("[...] can not be used as lvalue here");
}
std::vector<var_idx_t> left = code.create_tmp_var(v->inferred_type, v->loc);
std::vector<var_idx_t> right = pre_compile_tensor(code, v->get_items());
std::vector<var_idx_t> left = code.create_tmp_var(v->inferred_type, v->loc, "(pack-tuple)");
std::vector<var_idx_t> right = pre_compile_tensor(code, v->get_items(), lval_ctx);
code.emplace_back(v->loc, Op::_Tuple, left, std::move(right));
return left;
}
static std::vector<var_idx_t> process_int_const(V<ast_int_const> v, CodeBlob& code) {
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc, "(int-const)");
code.emplace_back(v->loc, Op::_IntConst, rvect, v->intval);
return rvect;
}
static std::vector<var_idx_t> process_string_const(V<ast_string_const> v, CodeBlob& code) {
ConstantValue value = eval_const_init_value(v);
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc);
std::vector<var_idx_t> rvect = code.create_tmp_var(v->inferred_type, v->loc, "(str-const)");
if (value.is_int()) {
code.emplace_back(v->loc, Op::_IntConst, rvect, value.as_int());
} else {
@ -418,21 +676,21 @@ static std::vector<var_idx_t> process_string_const(V<ast_string_const> v, CodeBl
static std::vector<var_idx_t> process_bool_const(V<ast_bool_const> v, CodeBlob& code) {
const FunctionData* builtin_sym = lookup_global_symbol(v->bool_val ? "__true" : "__false")->as<FunctionData>();
return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym);
return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(bool-const)");
}
static std::vector<var_idx_t> process_null_keyword(V<ast_null_keyword> v, CodeBlob& code) {
const FunctionData* builtin_sym = lookup_global_symbol("__null")->as<FunctionData>();
return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym);
return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(null-literal)");
}
static std::vector<var_idx_t> process_local_var(V<ast_local_var_lhs> v, CodeBlob& code) {
if (v->marked_as_redef) {
return process_symbol(v->loc, v->var_ref, code, nullptr);
return pre_compile_symbol(v->loc, v->var_ref, code, nullptr);
}
tolk_assert(v->var_ref->ir_idx.empty());
v->var_ref->mutate()->assign_ir_idx(code.create_var(v->inferred_type, v->var_ref, v->loc));
v->var_ref->mutate()->assign_ir_idx(code.create_var(v->inferred_type, v->loc, v->var_ref->name));
return v->var_ref->ir_idx;
}
@ -444,13 +702,13 @@ static std::vector<var_idx_t> process_local_vars_declaration(V<ast_local_vars_de
static std::vector<var_idx_t> process_underscore(V<ast_underscore> v, CodeBlob& code) {
// when _ is used as left side of assignment, like `(cs, _) = cs.loadAndReturn()`
return code.create_tmp_var(v->inferred_type, v->loc);
return code.create_tmp_var(v->inferred_type, v->loc, "(underscore)");
}
std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs) {
std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx) {
switch (v->type) {
case ast_reference:
return process_symbol(v->loc, v->as<ast_reference>()->sym, code, lval_globs);
return pre_compile_symbol(v->loc, v->as<ast_reference>()->sym, code, lval_ctx);
case ast_assign:
return process_assign(v->as<ast_assign>(), code);
case ast_set_assign:
@ -462,17 +720,17 @@ std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* l
case ast_ternary_operator:
return process_ternary_operator(v->as<ast_ternary_operator>(), code);
case ast_cast_as_operator:
return pre_compile_expr(v->as<ast_cast_as_operator>()->get_expr(), code, lval_globs);
return pre_compile_expr(v->as<ast_cast_as_operator>()->get_expr(), code, lval_ctx);
case ast_dot_access:
return process_dot_access(v->as<ast_dot_access>(), code, lval_globs);
return process_dot_access(v->as<ast_dot_access>(), code, lval_ctx);
case ast_function_call:
return process_function_call(v->as<ast_function_call>(), code);
case ast_parenthesized_expression:
return pre_compile_expr(v->as<ast_parenthesized_expression>()->get_expr(), code, lval_globs);
return pre_compile_expr(v->as<ast_parenthesized_expression>()->get_expr(), code, lval_ctx);
case ast_tensor:
return process_tensor(v->as<ast_tensor>(), code, lval_globs);
return process_tensor(v->as<ast_tensor>(), code, lval_ctx);
case ast_typed_tuple:
return process_typed_tuple(v->as<ast_typed_tuple>(), code, lval_globs);
return process_typed_tuple(v->as<ast_typed_tuple>(), code, lval_ctx);
case ast_int_const:
return process_int_const(v->as<ast_int_const>(), code);
case ast_string_const:
@ -515,14 +773,14 @@ static void process_assert_statement(V<ast_assert_statement> v, CodeBlob& code)
const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as<FunctionData>();
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, args);
gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym);
gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)");
}
static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) {
if (auto v_ref = v_catch_var->try_as<ast_reference>(); v_ref && v_ref->sym) { // not underscore
const LocalVarData* var_ref = v_ref->sym->as<LocalVarData>();
tolk_assert(var_ref->ir_idx.empty());
var_ref->mutate()->assign_ir_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc));
var_ref->mutate()->assign_ir_idx(code.create_var(v_catch_var->inferred_type, v_catch_var->loc, var_ref->name));
}
}
@ -621,11 +879,11 @@ static void process_throw_statement(V<ast_throw_statement> v, CodeBlob& code) {
if (v->has_thrown_arg()) {
const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as<FunctionData>();
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()});
gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym);
gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)");
} else {
const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as<FunctionData>();
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, {v->get_thrown_code()});
gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym);
gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)");
}
}
@ -699,7 +957,7 @@ static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, Funct
for (int i = 0; i < fun_ref->get_num_params(); ++i) {
const LocalVarData& param_i = fun_ref->parameters[i];
std::vector<var_idx_t> ir_idx = blob->create_var(param_i.declared_type, &param_i, param_i.loc);
std::vector<var_idx_t> ir_idx = blob->create_var(param_i.declared_type, param_i.loc, param_i.name);
rvect_import.insert(rvect_import.end(), ir_idx.begin(), ir_idx.end());
param_i.mutate()->assign_ir_idx(std::move(ir_idx));
}
@ -716,6 +974,7 @@ static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, Funct
blob->close_blk(v_body->loc_end);
code_body->set_code(blob);
tolk_assert(vars_modification_watcher.empty());
}
static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyAsm* asm_body) {

View file

@ -123,8 +123,8 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody {
void visit(V<ast_dot_access> v) override {
// a reference to a method used as rvalue, like `var v = t.tupleAt`
if (const FunctionData* fun_ref = v->target; v->is_rvalue) {
validate_function_used_as_noncall(v, fun_ref);
if (v->is_rvalue && v->is_target_fun_ref()) {
validate_function_used_as_noncall(v, std::get<const FunctionData*>(v->target));
}
}

View file

@ -124,6 +124,19 @@ static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view o
throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type));
}
// fire an error on `untypedTupleVar.0` when used without a hint
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_cannot_deduce_untyped_tuple_access(SrcLocation loc, int index) {
std::string idx_access = "<tuple>." + std::to_string(index);
throw ParseError(loc, "can not deduce type of `" + idx_access + "`; either assign it to variable like `var c: int = " + idx_access + "` or cast the result like `" + idx_access + " as int`");
}
// fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_cannot_put_non1_stack_width_arg_to_tuple(SrcLocation loc, TypePtr inferred_type) {
throw ParseError(loc, "can not put " + to_string(inferred_type) + " into a tuple, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1");
}
// check correctness of called arguments counts and their type matching
static void check_function_arguments(const FunctionData* fun_ref, V<ast_argument_list> v, AnyExprV lhs_of_dot_call) {
int delta_self = lhs_of_dot_call ? 1 : 0;
@ -466,6 +479,22 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
return TypeDataTypedTuple::create(std::move(sub_hints));
}
// `a.0 = rhs` / `b.1.0 = rhs` (remember, its target is not assigned yet)
if (auto lhs_dot = lhs->try_as<ast_dot_access>()) {
TypePtr obj_hint = calc_hint_from_assignment_lhs(lhs_dot->get_obj());
std::string_view field_name = lhs_dot->get_field_name();
if (field_name[0] >= '0' && field_name[0] <= '9') {
int index_at = std::stoi(std::string(field_name));
if (const auto* t_tensor = obj_hint->try_as<TypeDataTensor>(); t_tensor && index_at < t_tensor->size()) {
return t_tensor->items[index_at];
}
if (const auto* t_tuple = obj_hint->try_as<TypeDataTypedTuple>(); t_tuple && index_at < t_tuple->size()) {
return t_tuple->items[index_at];
}
}
return TypeDataUnknown::create();
}
return TypeDataUnknown::create();
}
@ -562,8 +591,8 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
return;
}
// here is something strange and unhandled, like `f() = rhs`
// it will fail on later compilation steps (like rvalue/lvalue checks), but type inferring should pass
// here is something unhandled like `a.0 = rhs`, run regular inferring on rhs
// for something strange like `f() = rhs` type inferring will pass, but will fail later
infer_any_expr(lhs, rhs_type);
if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) {
err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs));
@ -839,25 +868,56 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
// it's NOT a method call `t.tupleSize()` (since such cases are handled by infer_function_call)
// it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call)
infer_any_expr(v->get_obj());
TypePtr obj_type = v->get_obj()->inferred_type;
// our goal is to fill v->target knowing type of obj
V<ast_identifier> v_ident = v->get_identifier(); // field/method name vertex
V<ast_instantiationT_list> v_instantiationTs = v->get_instantiationTs();
std::string_view field_name = v_ident->name;
// for now, Tolk doesn't have structures, properties, and object-scoped methods
// so, only `t.tupleSize` is allowed, look up a global function
const Symbol* sym = lookup_global_symbol(field_name);
if (!sym) {
v_ident->error("undefined symbol `" + static_cast<std::string>(field_name) + "`");
// it can be indexed access (`tensorVar.0`, `tupleVar.1`) or a method (`t.tupleSize`)
// at first, check for indexed access
if (field_name[0] >= '0' && field_name[0] <= '9') {
int index_at = std::stoi(std::string(field_name));
if (const auto* t_tensor = obj_type->try_as<TypeDataTensor>()) {
if (index_at >= t_tensor->size()) {
v_ident->error("invalid tensor index, expected 0.." + std::to_string(t_tensor->items.size() - 1));
}
v->mutate()->assign_target(index_at);
assign_inferred_type(v, t_tensor->items[index_at]);
return;
}
if (const auto* t_tuple = obj_type->try_as<TypeDataTypedTuple>()) {
if (index_at >= t_tuple->size()) {
v_ident->error("invalid tuple index, expected 0.." + std::to_string(t_tuple->items.size() - 1));
}
v->mutate()->assign_target(index_at);
assign_inferred_type(v, t_tuple->items[index_at]);
return;
}
if (obj_type->try_as<TypeDataTuple>()) {
if (hint == nullptr) {
fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at);
}
if (hint->calc_width_on_stack() != 1) {
fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->loc, hint);
}
v->mutate()->assign_target(index_at);
assign_inferred_type(v, hint);
return;
}
v_ident->error("type " + to_string(obj_type) + " is not indexable");
}
const FunctionData* fun_ref = sym->try_as<FunctionData>();
// for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize`
const Symbol* sym = lookup_global_symbol(field_name);
const FunctionData* fun_ref = sym ? sym->try_as<FunctionData>() : nullptr;
if (!fun_ref) {
v_ident->error("referencing a non-function");
v_ident->error("non-existing field `" + static_cast<std::string>(field_name) + "` of type " + to_string(obj_type));
}
// `t.tupleSize` is ok, `cs.tupleSize` not
if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(v->get_obj()->inferred_type)) {
v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0]) + " with an object of type " + to_string(v->get_obj()));
if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(obj_type)) {
v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0]) + " with object of type " + to_string(obj_type));
}
if (fun_ref->is_generic_function() && !v_instantiationTs) {
@ -896,21 +956,24 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
} else if (auto v_dot = callee->try_as<ast_dot_access>()) {
// `obj.someMethod()` / `obj.someMethod<int>()` / `getF().someMethod()` / `obj.SOME_CONST()`
// note, that dot_obj->target is not filled yet, since callee was not inferred yet
delta_self = 1;
dot_obj = v_dot->get_obj();
v_instantiationTs = v_dot->get_instantiationTs(); // present for `obj.someMethod<int>()`
infer_any_expr(dot_obj);
// for now, Tolk doesn't have object-scoped methods, so method resolving doesn't depend on obj type
// (in other words, `globalFunction(a)` = `a.globalFunction()`)
std::string_view method_name = v_dot->get_field_name();
const Symbol* sym = lookup_global_symbol(method_name);
if (!sym) {
v_dot->get_identifier()->error("undefined symbol `" + static_cast<std::string>(method_name) + "`");
}
fun_ref = sym->try_as<FunctionData>();
if (!fun_ref) {
v_dot->get_identifier()->error("calling a non-function");
// it can be indexed access (`tensorVar.0()`, `tupleVar.1()`) or a method (`t.tupleSize()`)
std::string_view field_name = v_dot->get_field_name();
if (field_name[0] >= '0' && field_name[0] <= '9') {
// indexed access `ab.2()`, then treat `ab.2` just like an expression, fun_ref remains nullptr
// infer_dot_access() will be called for a callee, it will check type, index correctness, etc.
} else {
// for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize`
const Symbol* sym = lookup_global_symbol(field_name);
fun_ref = sym ? sym->try_as<FunctionData>() : nullptr;
if (!fun_ref) {
v_dot->get_identifier()->error("non-existing method `" + static_cast<std::string>(field_name) + "` of type " + to_string(dot_obj));
}
}
} else {
@ -926,7 +989,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
assign_inferred_type(arg_i, arg_i->get_expr());
}
// handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()`
// handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` / `tensorVar.0()`
if (!fun_ref) {
// treat callee like a usual expression, which must have "callable" inferred type
infer_any_expr(callee);
@ -1017,6 +1080,9 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
for (int i = 0; i < v->size(); ++i) {
AnyExprV item = v->get_item(i);
infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr);
if (item->inferred_type->calc_width_on_stack() != 1) {
fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->get_item(i)->loc, item->inferred_type);
}
types_list.emplace_back(item->inferred_type);
}
assign_inferred_type(v, TypeDataTypedTuple::create(std::move(types_list)));

View file

@ -44,21 +44,23 @@ typedef int var_idx_t;
typedef int const_idx_t;
struct TmpVar {
TypePtr v_type;
var_idx_t ir_idx;
const LocalVarData* v_sym; // points to var defined in code; nullptr for implicitly created tmp vars
SrcLocation where;
std::vector<std::function<void(SrcLocation)>> on_modification;
var_idx_t ir_idx; // every var in IR represents 1 stack slot
TypePtr v_type; // calc_width_on_stack() is 1
std::string name; // "x" for vars originated from user sources; "x.0" for tensor components; empty for implicitly created tmp vars
SrcLocation loc; // location of var declaration in sources or where a tmp var was originated
#ifdef TOLK_DEBUG
const char* desc = nullptr; // "origin" of tmp var, for debug output like `'15 (binary-op) '16 (glob-var)`
#endif
TmpVar(var_idx_t ir_idx, TypePtr type, const LocalVarData* v_sym, SrcLocation loc)
: v_type(type)
, ir_idx(ir_idx)
, v_sym(v_sym)
, where(loc) {
TmpVar(var_idx_t ir_idx, TypePtr v_type, std::string name, SrcLocation loc)
: ir_idx(ir_idx)
, v_type(v_type)
, name(std::move(name))
, loc(loc) {
}
void show(std::ostream& os, int omit_idx = 0) const;
void dump(std::ostream& os) const;
void show_as_stack_comment(std::ostream& os) const;
void show(std::ostream& os) const;
};
struct VarDescr {
@ -602,7 +604,6 @@ struct AsmOpList {
}
const_idx_t register_const(Const new_const);
Const get_const(const_idx_t idx);
void show_var(std::ostream& os, var_idx_t idx) const;
void show_var_ext(std::ostream& os, std::pair<var_idx_t, const_idx_t> idx_pair) const;
void adjust_last() {
if (list_.back().is_nop()) {
@ -1018,13 +1019,10 @@ struct Stack {
void rearrange_top(var_idx_t top, bool last);
void merge_const(const Stack& req_stack);
void merge_state(const Stack& req_stack);
void show(int _mode);
void show() {
show(mode);
}
void show();
void opt_show() {
if ((mode & (_StkCmt | _Shown)) == _StkCmt) {
show(mode);
show();
}
}
bool operator==(const Stack& y) const & {
@ -1108,9 +1106,15 @@ struct CodeBlob {
#endif
return res;
}
std::vector<var_idx_t> create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc);
std::vector<var_idx_t> create_tmp_var(TypePtr var_type, SrcLocation loc) {
return create_var(var_type, nullptr, loc);
std::vector<var_idx_t> create_var(TypePtr var_type, SrcLocation loc, std::string name);
std::vector<var_idx_t> create_tmp_var(TypePtr var_type, SrcLocation loc, const char* desc) {
std::vector<var_idx_t> ir_idx = create_var(var_type, loc, {});
#ifdef TOLK_DEBUG
for (var_idx_t v : ir_idx) {
vars[v].desc = desc;
}
#endif
return ir_idx;
}
bool compute_used_code_vars();
bool compute_used_code_vars(std::unique_ptr<Op>& ops, const VarDescrList& var_info, bool edit) const;
@ -1135,14 +1139,6 @@ struct CodeBlob {
void mark_noreturn();
void generate_code(AsmOpList& out_list, int mode = 0);
void generate_code(std::ostream& os, int mode = 0, int indent = 0);
void on_var_modification(const std::vector<var_idx_t>& left_lval_indices, SrcLocation here) const {
for (var_idx_t ir_idx : left_lval_indices) {
for (auto& f : vars.at(ir_idx).on_modification) {
f(here);
}
}
}
};
// defined in builtins.cpp