1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

change order of assignment lhs=rhs evaluation

This commit is contained in:
tolk-vm 2025-02-10 12:57:25 +04:00
parent 3c245c6146
commit 01fc473801
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
20 changed files with 577 additions and 429 deletions

View file

@ -144,15 +144,16 @@ fun test95() {
"""
test95 PROC:<{
...
next GETGLOB // '10
3 PUSHINT // '10 '12=3
4 PUSHINT // '10 '12=3 '13=4
5 PUSHINT // '10 '12=3 '13=4 '14=5
TRIPLE // '15 '16
next SETGLOB
next GETGLOB // g_next
3 PUSHINT // g_next '14=3
4 PUSHINT // g_next '14=3 '15=4
5 PUSHINT // g_next '14=3 '15=4 '16=5
TRIPLE // '10 '11
SWAP
cur SETGLOB
cur GETGLOB // '17
next GETGLOB // '17 '18
next SETGLOB
cur GETGLOB // g_cur
next GETGLOB // g_cur g_next
}>
"""
*/

View file

@ -147,5 +147,5 @@ fun main() {
// x.0 x.1
"""
@code_hash 7627024945492125068389905298530400936797031708759561372406088054030801992712
@code_hash 61280273714870328160131559159866470128402169974050439159015534193532598351244
*/

View file

@ -26,6 +26,176 @@ fun typesAsIdentifiers(builder: builder) {
return int;
}
global callOrder: tuple;
fun getTensor_12() {
callOrder.tuplePush(100);
return (1, 2);
}
fun getTensor_1X(x: int) {
callOrder.tuplePush(101);
return (1, x);
}
fun getTuple_12() {
callOrder.tuplePush(110);
return [1, 2];
}
fun getTuple_1X(x: int) {
callOrder.tuplePush(111);
return [1, x];
}
fun getUntypedTuple_12() {
callOrder.tuplePush(120);
var t = createEmptyTuple(); t.tuplePush(1); t.tuplePush(2);
return t;
}
fun getUntypedTuple_1X(x: int) {
callOrder.tuplePush(121);
var t = createEmptyTuple(); t.tuplePush(1); t.tuplePush(x);
return t;
}
fun getIntValue5() {
callOrder.tuplePush(10);
return 5;
}
fun getIntValueX(x: int) {
callOrder.tuplePush(11);
return x;
}
@method_id(102)
fun test102() {
callOrder = createEmptyTuple();
var x = 0;
getTensor_12().0 = getIntValue5();
getTensor_1X(5).1 = getIntValue5();
getTensor_1X(x = 10).0 = getIntValueX(x);
return (callOrder, x);
}
@method_id(103)
fun test103() {
callOrder = createEmptyTuple();
var x = 0;
getTuple_12().0 = getIntValue5();
getTuple_1X(5).1 = getIntValue5();
getTuple_1X(x = 10).0 = getIntValueX(x);
return (callOrder, x);
}
@method_id(104)
fun test104() {
callOrder = createEmptyTuple();
var x = 0;
getUntypedTuple_12().0 = getIntValue5();
getUntypedTuple_1X(5).1 = getIntValue5();
getUntypedTuple_1X(x = 10).0 = getIntValueX(x);
return (callOrder, x);
}
@method_id(105)
fun test105() {
callOrder = createEmptyTuple();
getTensor_12().0 = getTensor_1X(getIntValue5()).1 = getIntValueX(getTensor_12().1);
return callOrder;
}
@method_id(106)
fun test106() {
callOrder = createEmptyTuple();
getTuple_12().0 = getTuple_1X(getIntValue5()).1 = getIntValueX(getTuple_12().1);
return callOrder;
}
global t107: (int, int);
@method_id(107)
fun test107() {
((t107 = (1, 2)).0, (t107 = (3, 4)).1) = (5, 6);
return t107;
}
global g108: int;
fun assertEq(a: int, b: int) {
assert(a == b, 10);
return b;
}
@method_id(108)
fun test108() {
callOrder = createEmptyTuple();
g108 = 0;
getTensor_1X(g108 = 8).1 = assertEq(g108, 8);
return (callOrder, g108);
}
@method_id(109)
fun test109() {
callOrder = createEmptyTuple();
var x = 0;
[getTuple_12().0, getTuple_1X(x = getIntValue5()).1, getTuple_1X(x += 10).0] = [getIntValue5(), getIntValue5(), getIntValueX(x)];
return (callOrder, x);
}
global g110: int;
global t110: (int, int);
@method_id(110)
fun test110() {
callOrder = createEmptyTuple();
var xy = [0, 0];
[xy.0, getTuple_1X(g110 = 8).0] = [g110 += 5, getIntValueX(g110 += 10)];
[xy.1, getTuple_1X((t110 = (8, 9)).0).1] = [t110.0 += 5, getIntValueX(t110.1 += 10)];
return (xy, callOrder, g110, t110);
}
@method_id(111)
fun test111() {
callOrder = createEmptyTuple();
var z = -1;
var xy = [0, z = 0];
var rhs = [getIntValueX(xy.1 += 10), xy.1, xy.0, z += 50];
[xy.0, getTuple_1X(g110 = 8 + getIntValueX(xy.1)).0, xy.1, z] = rhs;
return (xy, g110, callOrder, z);
}
@method_id(112)
fun test112() {
var xy = [1, 2];
((((xy))).0, ((xy.1))) = ((xy).1, ((xy.0)));
return xy;
}
@method_id(113)
fun test113() {
var (a, t, z) = (1, [2,3], (-1,-1));
(a, t, a, z, t.1, z.1) = (10, [a,12], 13, (a, t.1), 14, t.1);
return (a, t, z);
}
global g114: int;
global t114: [int, int];
global z114: (int, int);
@method_id(114)
fun test114() {
g114 = 1;
t114 = [2, 3];
(g114, t114, g114, z114, t114.1, z114.1) = (10, [g114,12], 13, (g114, t114.1), 14, t114.1);
return (g114, t114, z114);
}
@method_id(115)
fun test115() {
callOrder = createEmptyTuple();
var x = 0;
var y = 0;
[getTensor_1X(x = 5).0, y] = getTuple_1X(x = 9);
return (callOrder, x, y);
}
fun main(value: int) {
var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2));
if (x == null && y == null) { return null; }
@ -37,4 +207,18 @@ fun main(value: int) {
@testcase | 0 | 6 | -1
@testcase | 0 | 11 | (null)
@testcase | 101 | 78 | 88
@testcase | 102 | | [ 100 10 101 10 101 11 ] 10
@testcase | 103 | | [ 110 10 111 10 111 11 ] 10
@testcase | 104 | | [ 120 10 121 10 121 11 ] 10
@testcase | 105 | | [ 100 10 101 100 11 ]
@testcase | 106 | | [ 110 10 111 110 11 ]
@testcase | 107 | | 3 4
@testcase | 108 | | [ 101 ] 8
@testcase | 109 | | [ 110 10 111 111 10 10 11 ] 15
@testcase | 110 | | [ 13 13 ] [ 111 11 111 11 ] 23 13 19
@testcase | 111 | | [ 10 0 ] 18 [ 11 11 111 ] 50
@testcase | 112 | | [ 2 1 ]
@testcase | 113 | | 13 [ 1 14 ] 1 3
@testcase | 114 | | 13 [ 1 14 ] 1 3
@testcase | 115 | | [ 101 111 ] 9 9
*/

View file

@ -35,7 +35,7 @@ Below, I just give examples of @fif_codegen tag:
"""
main PROC:<{
// s
17 PUSHINT // s '1=17
17 PUSHINT // s '3=17
OVER // s z=17 t
WHILE:<{
...

View file

@ -21,6 +21,26 @@ fun plus(mutate self: int, y: int): int {
fun eq<X>(v: X): X { return v; }
global gTup: [int];
global gTens: (int, int);
@method_id(100)
fun testCodegenSimple() {
var t1 = [1];
t1.0 = 2;
debugPrintString("");
var t2 = [[1]];
t2.0.0 = 2;
debugPrintString("");
gTup = [1];
gTup.0 = 2;
debugPrintString("");
gTens = (1,2);
gTens.1 = 4;
debugPrintString("");
return (t1, t2, gTup, gTens);
}
@method_id(101)
fun test101() {
var t = (1, (2, 3), [4, 5, [6, 7]], 8);
@ -241,30 +261,60 @@ fun main(){}
@fif_codegen
"""
testCodegenNoPureIndexedAccess PROC:<{
testCodegenSimple PROC:<{
//
0 PUSHINT // '8=0
1 PUSHINT // '2=1
SINGLE // t1
2 PUSHINT // t1 '3=2
0 SETINDEX // t1
x{} PUSHSLICE // t1 '6
STRDUMP DROP
1 PUSHINT // t1 '10=1
SINGLE // t1 '9
SINGLE // t1 t2
2 PUSHINT // t1 t2 '11=2
OVER // t1 t2 '11=2 t2
0 INDEX // t1 t2 '11=2 '14
SWAP // t1 t2 '14 '11=2
0 SETINDEX // t1 t2 '14
0 SETINDEX // t1 t2
x{} PUSHSLICE // t1 t2 '17
STRDUMP DROP
1 PUSHINT // t1 t2 '20=1
SINGLE // t1 t2 '18
gTup SETGLOB
2 PUSHINT // t1 t2 '21=2
gTup GETGLOB // t1 t2 '21=2 g_gTup
SWAP // t1 t2 g_gTup '21=2
0 SETINDEX // t1 t2 g_gTup
gTup SETGLOB
x{} PUSHSLICE // t1 t2 '25
STRDUMP DROP
1 PUSHINT // t1 t2 '28=1
2 PUSHINT // t1 t2 '26=1 '27=2
PAIR
gTens SETGLOB
4 PUSHINT // t1 t2 g_gTens.1=4
gTens GETGLOB
UNPAIR // t1 t2 g_gTens.1=4 g_gTens.0 g_gTens.1
DROP // t1 t2 g_gTens.1=4 g_gTens.0
SWAP // t1 t2 g_gTens.0 g_gTens.1=4
PAIR
gTens SETGLOB
x{} PUSHSLICE // t1 t2 '36
STRDUMP DROP
gTup GETGLOB // t1 t2 g_gTup
gTens GETGLOB
UNPAIR // t1 t2 g_gTup g_gTens.0 g_gTens.1
}>
"""
@fif_codegen
"""
test104 PROC:<{
testCodegenNoPureIndexedAccess PROC:<{
//
5 PUSHINT // '2=5
DUP // '2=5 '3=5
PAIR // '1
SINGLE // m
10 PUSHINT // m '5=10
20 PUSHINT // m '5=10 '6=20
s2 PUSH // m '5=10 '6=20 m
0 INDEX // m '10=10 '12=20 '8
SWAP // m '10=10 '8 '12=20
1 SETINDEX // m '10=10 '8
SWAP // m '8 '10=10
0 SETINDEX // m '8
0 SETINDEX // m
...
0 PUSHINT // '8=0
}>
"""
@fif_codegen

View file

@ -1,9 +1,9 @@
fun main() {
var c = 1;
(c, c) = (2, 3);
var t = createEmptyTuple();
t.0 = (1, 2);
}
/**
@compilation_should_fail
@stderr one variable modified twice inside the same expression
@stderr a tuple can not have `(int, int)` inside, because it occupies 2 stack slots in TVM, not 1
*/

View file

@ -1,11 +1,8 @@
fun incThree(mutate a: int, mutate b: int, mutate c: int) {}
fun main() {
var c = [[[1, 2]]];
incThree(mutate c.0.0.0, mutate c.0.0.1, mutate c.0.0.0);
fun main(cs: slice) {
var cb = cs.tupleSize;
}
/**
@compilation_should_fail
@stderr one variable modified twice inside the same expression
@stderr referencing a method for `tuple` with object of type `slice`
*/

View file

@ -1,10 +1,9 @@
global gg: (int, int);
fun main() {
[gg.0, gg.1, gg.0] = [0, 1, 0];
var t = createEmptyTuple();
var xy = t.0 as (int, int);
}
/**
@compilation_should_fail
@stderr one variable modified twice inside the same expression
@stderr a tuple can not have `(int, int)` inside, because it occupies 2 stack slots in TVM, not 1
*/

View file

@ -1,10 +0,0 @@
global gg: (int, [int, int]);
fun main() {
(gg.1.0, gg.1, gg.1.1) = (0, [1, 2], 3);
}
/**
@compilation_should_fail
@stderr one variable both modified and read inside the same expression
*/

View file

@ -1,9 +0,0 @@
fun main() {
var ab = (1, 2);
(ab, ab.1) = ((2, 3), 4);
}
/**
@compilation_should_fail
@stderr one variable both modified and read inside the same expression
*/

View file

@ -1,9 +0,0 @@
fun main() {
var t = createEmptyTuple();
t.0 = (1, 2);
}
/**
@compilation_should_fail
@stderr can not put `(int, int)` into a tuple, because it occupies 2 stack slots in TVM, not 1
*/

View file

@ -1,8 +0,0 @@
fun main(cs: slice) {
var cb = cs.tupleSize;
}
/**
@compilation_should_fail
@stderr referencing a method for `tuple` with object of type `slice`
*/

View file

@ -7,5 +7,5 @@ fun main() {
/**
@compilation_should_fail
@stderr can not put `(int, builder)` into a tuple, because it occupies 2 stack slots in TVM, not 1
@stderr a tuple can not have `(int, builder)` inside, because it occupies 2 stack slots in TVM, not 1
*/

View file

@ -307,7 +307,7 @@ fun main(){}
...
incrementTwoInPlace CALLDICT // x y sum1
-ROT
10 PUSHINT // sum1 x y '10=10
10 PUSHINT // sum1 x y '11=10
incrementTwoInPlace CALLDICT // sum1 x y sum2
s1 s3 s0 XCHG3 // x y sum1 sum2
}>

View file

@ -133,7 +133,7 @@ fun main() {
"""
test7 PROC:<{
...
LDOPTREF // b '8 '7
LDOPTREF // b '9 '8
DROP // b c
ISNULL // b '11
10 MULCONST // b '13

View file

@ -193,7 +193,7 @@ class ASTStringifier final : public ASTVisitor {
}
case ast_local_var_lhs: {
std::ostringstream os;
os << (v->as<ast_local_var_lhs>()->inferred_type ? v->as<ast_local_var_lhs>()->inferred_type : v->as<ast_local_var_lhs>()->declared_type);
os << (v->as<ast_local_var_lhs>()->inferred_type ? v->as<ast_local_var_lhs>()->inferred_type->as_human_readable() : v->as<ast_local_var_lhs>()->declared_type->as_human_readable());
if (v->as<ast_local_var_lhs>()->get_name().empty()) {
return "_: " + os.str();
}

View file

@ -30,158 +30,17 @@
* Up to this point, all types have been inferred, all validity checks have been passed, etc.
* All properties in AST nodes are assigned and can be safely used (fun_ref, etc.).
* So, if execution reaches this pass, the input is (almost) correct, and code generation should succeed.
* The only thing additionally checked during this pass is tricky lvalue, like one and the same variable
* assigned/mutated multiple times in same expression, e.g. `(t.0, t.0) = rhs` / `f(mutate x.1.2, mutate x)`.
* (previously, there was a check for one variable modified twice like `(t.0, t.0) = rhs`, but after changing
* execution order of assignment to "first lhs, then lhs", it was removed for several reasons)
*/
namespace tolk {
// fire error on cases like `(a, a) = rhs` / `f(mutate t.1.0, mutate t.1.0)`
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_variable_modified_twice_inside_same_expression(SrcLocation loc) {
throw ParseError(loc, "one variable modified twice inside the same expression");
}
class LValContext;
std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr);
std::vector<var_idx_t> pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx);
void process_any_statement(AnyV v, CodeBlob& code);
// fire error on cases like `(m.1.0, m.1) = rhs` (m.1 inside m.1.0 is "rval inside lval")
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_variable_modified_and_read_inside_same_expression(SrcLocation loc) {
throw ParseError(loc, "one variable both modified and read inside the same expression");
}
// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable
// exists, but on its change, something non-trivial should happen.
// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6`
// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)`
// Of course, mixing globals with tuples should also be supported.
// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue".
// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global.
// A challenging thing is handling "unique" parts, to be read/updated only once.
// Example: `f(mutate globalTensor.0, mutate globalTensor.1)`, then globalTensor should be read/written once.
// Example: `(t.0.0, t.0.1) = rhs` (m is [[int, int]]), then t.0 should be read/updated once.
// Solving this by calculating hashes of every lvalue or rvalue inside lvalue automatically gives an ability
// to detect and fire "multiple writes inside expression", like `(a, a) = rhs` / `[t.0, (t.0.1, c)] = rhs`.
// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global).
class LValContext {
// every global variable used as lvalue is registered here
// example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after
// global tensors are stored as tuples (unpacked on reading, packed on writing), then multiple tmp vars are created
struct ModifiedGlob {
const GlobalVarData* glob_ref;
std::vector<var_idx_t> local_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors)
void apply(CodeBlob& code, SrcLocation loc) const {
Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector<var_idx_t>{}, local_ir_idx, glob_ref);
op.set_impure_flag();
}
};
// every tuple index used as lvalue is registered here
// example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after
// for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times
struct ModifiedTupleIndex {
uint64_t hash;
var_idx_t tuple_ir_idx;
var_idx_t index_ir_idx;
var_idx_t field_ir_idx;
void apply(CodeBlob& code, SrcLocation loc) const {
const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as<FunctionData>();
code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx, field_ir_idx, index_ir_idx}, builtin_sym);
}
};
int level_rval_inside_lval = 0;
std::vector<std::variant<ModifiedGlob, ModifiedTupleIndex>> modifications;
std::unordered_set<uint64_t> all_modified_hashes;
void fire_if_one_variable_modified_twice(SrcLocation loc, uint64_t modified_hash) {
if (!is_rval_inside_lval()) {
if (!all_modified_hashes.insert(modified_hash).second) {
fire_error_variable_modified_twice_inside_same_expression(loc);
}
if (all_modified_hashes.contains(~modified_hash)) {
fire_error_variable_modified_and_read_inside_same_expression(loc);
}
} else {
all_modified_hashes.insert(~modified_hash);
if (all_modified_hashes.contains(modified_hash)) {
fire_error_variable_modified_and_read_inside_same_expression(loc);
}
}
}
public:
void enter_rval_inside_lval() { level_rval_inside_lval++; }
void exit_rval_inside_lval() { level_rval_inside_lval--; }
bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; }
uint64_t register_lval(SrcLocation loc, const LocalVarData* var_ref) {
uint64_t hash = reinterpret_cast<uint64_t>(var_ref);
fire_if_one_variable_modified_twice(loc, hash);
return hash;
}
uint64_t register_lval(SrcLocation loc, const GlobalVarData* glob_ref) {
uint64_t hash = reinterpret_cast<uint64_t>(glob_ref);
fire_if_one_variable_modified_twice(loc, hash);
return hash;
}
uint64_t register_lval(SrcLocation loc, V<ast_dot_access> v) {
uint64_t hash = 7;
AnyExprV leftmost_obj = v;
while (auto v_dot = leftmost_obj->try_as<ast_dot_access>()) {
if (!v_dot->is_target_indexed_access()) {
break;
}
hash = hash * 1915239017 + std::get<int>(v_dot->target);
leftmost_obj = v_dot->get_obj();
}
if (auto v_ref = leftmost_obj->try_as<ast_reference>()) {
hash *= reinterpret_cast<uint64_t>(v_ref->sym); // `v.0` and `v.0` in 2 places is the same
} else {
hash *= reinterpret_cast<uint64_t>(leftmost_obj); // unlike `f().0` and `f().0` (pointers to AST nodes differ)
}
fire_if_one_variable_modified_twice(loc, hash);
return hash;
}
const std::vector<var_idx_t>* exists_already_known_global(const GlobalVarData* glob_ref) const {
for (const auto& m : modifications) {
if (const auto* m_glob = std::get_if<ModifiedGlob>(&m); m_glob && m_glob->glob_ref == glob_ref) {
return &m_glob->local_ir_idx;
}
}
return nullptr;
}
const var_idx_t* exists_already_known_tuple_index(uint64_t hash) const {
for (const auto& m : modifications) {
if (const auto* m_tup = std::get_if<ModifiedTupleIndex>(&m); m_tup && m_tup->hash == hash) {
return &m_tup->field_ir_idx;
}
}
return nullptr;
}
void register_modified_global(const GlobalVarData* glob_ref, std::vector<var_idx_t> local_ir_idx) {
modifications.emplace_back(ModifiedGlob{glob_ref, std::move(local_ir_idx)});
}
void register_modified_tuple_index(uint64_t hash, var_idx_t tuple_ir_idx, var_idx_t index_ir_idx, var_idx_t field_ir_idx) {
modifications.emplace_back(ModifiedTupleIndex{hash, tuple_ir_idx, index_ir_idx, field_ir_idx});
}
void gen_ops_if_nonempty(CodeBlob& code, SrcLocation loc) const {
for (auto it = modifications.rbegin(); it != modifications.rend(); ++it) { // reverse, it's important
if (const auto* m_glob = std::get_if<ModifiedGlob>(&*it)) {
m_glob->apply(code, loc);
} else if (const auto* m_tup = std::get_if<ModifiedTupleIndex>(&*it)) {
m_tup->apply(code, loc);
}
}
}
};
// The goal of VarsModificationWatcher is to detect such cases: `return (x, x += y, x)`.
// Without any changes, ops will be { _Call $2 = +($0_x, $1_y); _Return $0_x, $2, $0_x } - incorrect
@ -229,8 +88,176 @@ public:
static VarsModificationWatcher vars_modification_watcher;
std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr);
void process_any_statement(AnyV v, CodeBlob& code);
// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable
// exists, but on its change, something non-trivial should happen.
// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6`
// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)`
// Of course, mixing globals with tuples should also be supported.
// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue".
// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global.
// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global).
class LValContext {
// every global variable used as lvalue is registered here
// example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after
struct ModifiedGlobal {
const GlobalVarData* glob_ref;
std::vector<var_idx_t> lval_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors)
// for 1-slot globals int/cell/slice, assigning to them is just SETGLOB
// same for tensors, if they are fully rewritten in an expression: `gTensor = (5,6)`
void apply_fully_rewrite(CodeBlob& code, SrcLocation loc) const {
Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector<var_idx_t>{}, lval_ir_idx, glob_ref);
op.set_impure_flag();
}
// for N-slot globals tensor/struct/union, assigning to their parts, like `gTensor.1 = 6`
// we need to read gTensor as a whole (0-th and 1-th component), rewrite 1-th component, and SETGLOB a whole back
void apply_partially_rewrite(CodeBlob& code, SrcLocation loc, std::vector<bool>&& was_modified_by_let) const {
LValContext local_lval;
local_lval.enter_rval_inside_lval();
std::vector<var_idx_t> local_ir_idx = pre_compile_symbol(loc, glob_ref, code, &local_lval);
for (size_t i = 0; i < local_ir_idx.size(); ++i) {
if (was_modified_by_let[i]) {
code.emplace_back(loc, Op::_Let, std::vector{local_ir_idx[i]}, std::vector{lval_ir_idx[i]});
}
}
Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector<var_idx_t>{}, local_ir_idx, glob_ref);
op.set_impure_flag();
}
};
// every tensor index, when a tensor is a global, is registered here (same for structs and fields)
// example: `global v: (int, int); v.1 = 5`, implicit var is created `$tmp = 5`, and when it's modified,
// we need to partially update w; essentially, apply_partially_rewrite() above will be called
struct ModifiedFieldOfGlobal {
AnyExprV tensor_obj;
int index_at;
std::vector<var_idx_t> lval_ir_idx;
void apply(CodeBlob& code, SrcLocation loc) const {
LValContext local_lval;
local_lval.enter_rval_inside_lval();
std::vector<var_idx_t> obj_ir_idx = pre_compile_expr(tensor_obj, code, &local_lval);
const TypeDataTensor* t_tensor = tensor_obj->inferred_type->try_as<TypeDataTensor>();
tolk_assert(t_tensor);
int stack_width = t_tensor->items[index_at]->calc_width_on_stack();
int stack_offset = 0;
for (int i = 0; i < index_at; ++i) {
stack_offset += t_tensor->items[i]->calc_width_on_stack();
}
std::vector<var_idx_t> field_ir_idx = {obj_ir_idx.begin() + stack_offset, obj_ir_idx.begin() + stack_offset + stack_width};
tolk_assert(field_ir_idx.size() == lval_ir_idx.size());
vars_modification_watcher.trigger_callbacks(field_ir_idx, loc);
code.emplace_back(loc, Op::_Let, field_ir_idx, lval_ir_idx);
local_lval.after_let(std::move(field_ir_idx), code, loc);
}
};
// every tuple index used as lvalue is registered here
// example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after
// for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times
struct ModifiedTupleIndex {
AnyExprV tuple_obj;
int index_at;
std::vector<var_idx_t> lval_ir_idx;
void apply(CodeBlob& code, SrcLocation loc) const {
LValContext local_lval;
local_lval.enter_rval_inside_lval();
std::vector<var_idx_t> tuple_ir_idx = pre_compile_expr(tuple_obj, code, &local_lval);
std::vector<var_idx_t> index_ir_idx = code.create_tmp_var(TypeDataInt::create(), loc, "(tuple-idx)");
code.emplace_back(loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at));
vars_modification_watcher.trigger_callbacks(tuple_ir_idx, loc);
const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as<FunctionData>();
code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx[0], lval_ir_idx[0], index_ir_idx[0]}, builtin_sym);
local_lval.after_let(std::move(tuple_ir_idx), code, loc);
}
};
int level_rval_inside_lval = 0;
std::vector<std::variant<ModifiedGlobal, ModifiedTupleIndex, ModifiedFieldOfGlobal>> modifications;
static bool vector_contains(const std::vector<var_idx_t>& ir_vars, var_idx_t ir_idx) {
for (var_idx_t var_in_vector : ir_vars) {
if (var_in_vector == ir_idx) {
return true;
}
}
return false;
}
public:
void enter_rval_inside_lval() { level_rval_inside_lval++; }
void exit_rval_inside_lval() { level_rval_inside_lval--; }
bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; }
void capture_global_modification(const GlobalVarData* glob_ref, std::vector<var_idx_t> lval_ir_idx) {
modifications.emplace_back(ModifiedGlobal{glob_ref, std::move(lval_ir_idx)});
}
void capture_field_of_global_modification(AnyExprV tensor_obj, int index_at, std::vector<var_idx_t> lval_ir_idx) {
modifications.emplace_back(ModifiedFieldOfGlobal{tensor_obj, index_at, std::move(lval_ir_idx)});
}
void capture_tuple_index_modification(AnyExprV tuple_obj, int index_at, std::vector<var_idx_t> lval_ir_idx) {
modifications.emplace_back(ModifiedTupleIndex{tuple_obj, index_at, std::move(lval_ir_idx)});
}
void after_let(std::vector<var_idx_t>&& let_left_vars, CodeBlob& code, SrcLocation loc) const {
for (const auto &modification : modifications) {
if (const auto* m_glob = std::get_if<ModifiedGlobal>(&modification)) {
int n_modified_by_let = 0;
std::vector<bool> was_modified_by_let;
was_modified_by_let.resize(m_glob->lval_ir_idx.size());
for (size_t i = 0; i < m_glob->lval_ir_idx.size(); ++i) {
if (vector_contains(let_left_vars, m_glob->lval_ir_idx[i])) {
was_modified_by_let[i] = true;
n_modified_by_let++;
}
}
if (n_modified_by_let == static_cast<int>(m_glob->lval_ir_idx.size())) {
m_glob->apply_fully_rewrite(code, loc);
} else if (n_modified_by_let > 0) {
m_glob->apply_partially_rewrite(code, loc, std::move(was_modified_by_let));
}
} else if (const auto* m_tup = std::get_if<ModifiedTupleIndex>(&modification)) {
bool was_tuple_index_modified = false;
for (var_idx_t field_ir_idx : m_tup->lval_ir_idx) {
was_tuple_index_modified |= vector_contains(let_left_vars, field_ir_idx);
}
if (was_tuple_index_modified) {
m_tup->apply(code, loc);
}
} else if (const auto* m_tens = std::get_if<ModifiedFieldOfGlobal>(&modification)) {
bool was_tensor_index_modified = false;
for (var_idx_t field_ir_idx : m_tens->lval_ir_idx) {
was_tensor_index_modified |= vector_contains(let_left_vars, field_ir_idx);
}
if (was_tensor_index_modified) {
m_tens->apply(code, loc);
}
}
}
}
};
// given `{some_expr}.{i}`, check it for pattern `some_var.0` / `some_var.0.1` / etc.
// return some_var if satisfies (it may be a local or a global var, a tensor or a tuple)
// return nullptr otherwise: `f().0` / `(v = rhs).0` / `some_var.method().0` / etc.
static V<ast_reference> calc_sink_leftmost_obj(V<ast_dot_access> v) {
AnyExprV leftmost_obj = v->get_obj();
while (auto v_dot = leftmost_obj->try_as<ast_dot_access>()) {
if (!v_dot->is_target_indexed_access()) {
break;
}
leftmost_obj = v_dot->get_obj();
}
return leftmost_obj->type == ast_reference ? leftmost_obj->as<ast_reference>() : nullptr;
}
static std::vector<std::vector<var_idx_t>> pre_compile_tensor_inner(CodeBlob& code, const std::vector<AnyExprV>& args,
@ -313,43 +340,43 @@ static std::vector<var_idx_t> pre_compile_tensor(CodeBlob& code, const std::vect
static std::vector<var_idx_t> pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) {
// [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs"
if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) {
std::vector<var_idx_t> right = pre_compile_tensor(code, rhs->as<ast_typed_tuple>()->get_items());
LValContext local_lval;
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_typed_tuple>()->get_items(), &local_lval);
vars_modification_watcher.trigger_callbacks(left, loc);
code.emplace_back(loc, Op::_Let, std::move(left), right);
local_lval.gen_ops_if_nonempty(code, loc);
std::vector<var_idx_t> right = pre_compile_tensor(code, rhs->as<ast_typed_tuple>()->get_items());
code.emplace_back(loc, Op::_Let, left, right);
local_lval.after_let(std::move(left), code, loc);
return right;
}
// [lhs] = rhs; it's un-tuple to N left vars
if (lhs->type == ast_typed_tuple) {
LValContext local_lval;
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_typed_tuple>()->get_items(), &local_lval);
vars_modification_watcher.trigger_callbacks(left, loc);
std::vector<var_idx_t> right = pre_compile_expr(rhs, code);
const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as<TypeDataTypedTuple>();
std::vector<TypePtr> types_list = inferred_tuple->items;
std::vector<var_idx_t> rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc, "(unpack-tuple)");
code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right));
LValContext local_lval;
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_typed_tuple>()->get_items(), &local_lval);
vars_modification_watcher.trigger_callbacks(left, loc);
code.emplace_back(loc, Op::_Let, std::move(left), rvect);
local_lval.gen_ops_if_nonempty(code, loc);
code.emplace_back(loc, Op::_Let, left, rvect);
local_lval.after_let(std::move(left), code, loc);
return rvect;
}
// small optimization: `var x = rhs` or `local_var = rhs` (90% cases), LValContext not needed actually
if (lhs->type == ast_local_var_lhs || (lhs->type == ast_reference && lhs->as<ast_reference>()->sym->try_as<LocalVarData>())) {
std::vector<var_idx_t> right = pre_compile_expr(rhs, code);
std::vector<var_idx_t> left = pre_compile_expr(lhs, code); // effectively, local_var->ir_idx
vars_modification_watcher.trigger_callbacks(left, loc);
std::vector<var_idx_t> right = pre_compile_expr(rhs, code);
code.emplace_back(loc, Op::_Let, std::move(left), right);
return right;
}
// lhs = rhs
std::vector<var_idx_t> right = pre_compile_expr(rhs, code);
LValContext local_lval;
std::vector<var_idx_t> left = pre_compile_expr(lhs, code, &local_lval);
vars_modification_watcher.trigger_callbacks(left, loc);
code.emplace_back(loc, Op::_Let, std::move(left), right);
local_lval.gen_ops_if_nonempty(code, loc);
std::vector<var_idx_t> right = pre_compile_expr(rhs, code);
code.emplace_back(loc, Op::_Let, left, right);
local_lval.after_let(std::move(left), code, loc);
return right;
}
@ -364,28 +391,22 @@ static std::vector<var_idx_t> gen_op_call(CodeBlob& code, TypePtr ret_type, SrcL
}
static std::vector<var_idx_t> pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) {
std::vector<var_idx_t> pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) {
if (const auto* glob_ref = sym->try_as<GlobalVarData>()) {
if (!lval_ctx) {
// `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it
// note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type
std::vector<var_idx_t> local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)");
code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector<var_idx_t>{}, glob_ref);
return local_ir_idx;
} else {
// `globalVar = rhs` / `mutate globalVar` / `globalTuple.0 = rhs`
lval_ctx->register_lval(loc, glob_ref);
if (const std::vector<var_idx_t>* local_ir_idx = lval_ctx->exists_already_known_global(glob_ref)) {
return *local_ir_idx; // `f(mutate g.0, mutate g.1)`, then g will be read only once
}
std::vector<var_idx_t> local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)");
if (lval_ctx->is_rval_inside_lval()) { // for `globalVar.0` "globalVar" is rvalue inside lvalue
// for `globalVar = rhs` don't read a global actually, but for `globalVar.0 = rhs` do
code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector<var_idx_t>{}, glob_ref);
}
lval_ctx->register_modified_global(glob_ref, local_ir_idx);
return local_ir_idx;
// handle `globalVar = rhs` / `mutate globalVar`
if (lval_ctx && !lval_ctx->is_rval_inside_lval()) {
std::vector<var_idx_t> lval_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(lval-glob)");
lval_ctx->capture_global_modification(glob_ref, lval_ir_idx);
return lval_ir_idx;
}
// `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it
// note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type
std::vector<var_idx_t> local_ir_idx = code.create_var(glob_ref->declared_type, loc, "g_" + glob_ref->name);
code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector<var_idx_t>{}, glob_ref);
if (lval_ctx) { // `globalVar.0 = rhs`, globalVar is rval inside lval
lval_ctx->capture_global_modification(glob_ref, local_ir_idx);
}
return local_ir_idx;
}
if (const auto* const_ref = sym->try_as<GlobalConstData>()) {
if (const_ref->is_int_const()) {
@ -407,15 +428,12 @@ static std::vector<var_idx_t> pre_compile_symbol(SrcLocation loc, const Symbol*
#ifdef TOLK_DEBUG
tolk_assert(static_cast<int>(var_ref->ir_idx.size()) == var_ref->declared_type->calc_width_on_stack());
#endif
if (lval_ctx) {
lval_ctx->register_lval(loc, var_ref);
}
return var_ref->ir_idx;
}
throw Fatal("pre_compile_symbol");
}
static std::vector<var_idx_t> process_assign(V<ast_assign> v, CodeBlob& code) {
static std::vector<var_idx_t> process_assignment(V<ast_assign> v, CodeBlob& code) {
if (auto lhs_decl = v->get_lhs()->try_as<ast_local_vars_declaration>()) {
return pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc);
} else {
@ -492,12 +510,18 @@ static std::vector<var_idx_t> process_dot_access(V<ast_dot_access> v, CodeBlob&
if (!v->is_target_fun_ref()) {
TypePtr obj_type = v->get_obj()->inferred_type;
int index_at = std::get<int>(v->target);
// `tensorVar.0`; since a tensor of N elems are N vars on a stack actually, calculate offset
// `tensorVar.0`
if (const auto* t_tensor = obj_type->try_as<TypeDataTensor>()) {
if (lval_ctx) lval_ctx->register_lval(v->loc, v);
if (lval_ctx) lval_ctx->enter_rval_inside_lval();
// handle `tensorVar.0 = rhs` if tensors is a global, special case, then the global will be read on demand
if (lval_ctx && !lval_ctx->is_rval_inside_lval()) {
if (auto sink = calc_sink_leftmost_obj(v); sink && sink->sym->try_as<GlobalVarData>()) {
std::vector<var_idx_t> lval_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(lval-global-tensor)");
lval_ctx->capture_field_of_global_modification(v->get_obj(), index_at, lval_ir_idx);
return lval_ir_idx;
}
}
// since a tensor of N elems are N vars on a stack actually, calculate offset
std::vector<var_idx_t> lhs_vars = pre_compile_expr(v->get_obj(), code, lval_ctx);
if (lval_ctx) lval_ctx->exit_rval_inside_lval();
int stack_width = t_tensor->items[index_at]->calc_width_on_stack();
int stack_offset = 0;
for (int i = 0; i < index_at; ++i) {
@ -505,39 +529,26 @@ static std::vector<var_idx_t> process_dot_access(V<ast_dot_access> v, CodeBlob&
}
return {lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width};
}
// `tupleVar.0`; not to mess up, separate rvalue and lvalue cases
// `tupleVar.0`
if (obj_type->try_as<TypeDataTypedTuple>() || obj_type->try_as<TypeDataTuple>()) {
if (!lval_ctx) {
// `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars
std::vector<var_idx_t> tuple_ir_idx = pre_compile_expr(v->get_obj(), code);
std::vector<var_idx_t> index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)");
code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at));
std::vector<var_idx_t> field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)");
tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values
const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as<FunctionData>();
code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym);
return field_ir_idx;
} else {
// `tupleVar.0 = rhs`: finally "tupleSetAt(tupleVar, rhs, 0)" will be done
uint64_t hash = lval_ctx->register_lval(v->loc, v);
if (const var_idx_t* field_ir_idx = lval_ctx->exists_already_known_tuple_index(hash)) {
return {*field_ir_idx}; // `(t.0.0, t.0.1) = rhs`, then "t.0" will be read (tupleAt) once
}
lval_ctx->enter_rval_inside_lval();
std::vector<var_idx_t> tuple_ir_idx = pre_compile_expr(v->get_obj(), code, lval_ctx);
lval_ctx->exit_rval_inside_lval();
std::vector<var_idx_t> index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)");
code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at));
std::vector<var_idx_t> field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)");
if (lval_ctx->is_rval_inside_lval()) { // for `t.0.1 = rhs` "t.0" is rvalue inside lvalue
// for `t.0 = rhs` don't call tupleAt, but for `t.0.1 = rhs` do for t.0 (still don't for t.0.1)
const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as<FunctionData>();
code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym);
}
lval_ctx->register_modified_tuple_index(hash, tuple_ir_idx[0], index_ir_idx[0], field_ir_idx[0]);
vars_modification_watcher.trigger_callbacks(tuple_ir_idx, v->loc);
return field_ir_idx;
// handle `tupleVar.0 = rhs`, "0 SETINDEX" will be called when this was is modified
if (lval_ctx && !lval_ctx->is_rval_inside_lval() && calc_sink_leftmost_obj(v)) {
std::vector<var_idx_t> lval_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(lval-tuple-field)");
lval_ctx->capture_tuple_index_modification(v->get_obj(), index_at, lval_ir_idx);
return lval_ir_idx;
}
// `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars
std::vector<var_idx_t> tuple_ir_idx = pre_compile_expr(v->get_obj(), code);
std::vector<var_idx_t> index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)");
code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at));
std::vector<var_idx_t> field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)");
tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values
const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as<FunctionData>();
code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym);
if (lval_ctx && calc_sink_leftmost_obj(v)) { // `tupleVar.0.1 = rhs`, then `tupleVar.0` is rval inside lval
lval_ctx->capture_tuple_index_modification(v->get_obj(), index_at, field_ir_idx);
}
return field_ir_idx;
}
tolk_assert(false);
}
@ -627,8 +638,8 @@ static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, Code
std::vector<var_idx_t> rvect = code.create_tmp_var(real_ret_type, v->loc, "(fun-call)");
left.insert(left.end(), rvect.begin(), rvect.end());
vars_modification_watcher.trigger_callbacks(left, v->loc);
code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply);
local_lval.gen_ops_if_nonempty(code, v->loc);
code.emplace_back(v->loc, Op::_Let, left, rvect_apply);
local_lval.after_let(std::move(left), code, v->loc);
rvect_apply = rvect;
}
@ -710,7 +721,7 @@ std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext*
case ast_reference:
return pre_compile_symbol(v->loc, v->as<ast_reference>()->sym, code, lval_ctx);
case ast_assign:
return process_assign(v->as<ast_assign>(), code);
return process_assignment(v->as<ast_assign>(), code);
case ast_set_assign:
return process_set_assign(v->as<ast_set_assign>(), code);
case ast_binary_operator:

View file

@ -25,6 +25,8 @@
*
* Currently, it just replaces `-1` (ast_unary_operator ast_int_const) with a number -1
* and `!true` with false.
* Also, all parenthesized `((expr))` are replaced with `expr`, it's a constant transformation.
* (not to handle parenthesized in optimization passes, like `((x)) == true`)
* More rich constant folding should be done some day, but even without this, IR optimizations
* (operating low-level stack variables) pretty manage to do all related optimizations.
* Constant folding in the future, done at AST level, just would slightly reduce amount of work for optimizer.
@ -47,6 +49,14 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody {
return v_bool;
}
AnyExprV replace(V<ast_parenthesized_expression> v) override {
AnyExprV inner = parent::replace(v->get_expr());
if (v->is_lvalue) {
inner->mutate()->assign_lvalue_true();
}
return inner;
}
AnyExprV replace(V<ast_unary_operator> v) override {
parent::replace(v);

View file

@ -133,8 +133,8 @@ static void fire_error_cannot_deduce_untyped_tuple_access(SrcLocation loc, int i
// fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_cannot_put_non1_stack_width_arg_to_tuple(SrcLocation loc, TypePtr inferred_type) {
throw ParseError(loc, "can not put " + to_string(inferred_type) + " into a tuple, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1");
static void fire_error_tuple_cannot_have_non1_stack_width_elem(SrcLocation loc, TypePtr inferred_type) {
throw ParseError(loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1");
}
// check correctness of called arguments counts and their type matching
@ -351,6 +351,8 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
return infer_bool_const(v->as<ast_bool_const>());
case ast_local_vars_declaration:
return infer_local_vars_declaration(v->as<ast_local_vars_declaration>());
case ast_local_var_lhs:
return infer_local_var_lhs(v->as<ast_local_var_lhs>());
case ast_assign:
return infer_assignment(v->as<ast_assign>());
case ast_set_assign:
@ -410,133 +412,71 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
assign_inferred_type(v, TypeDataBool::create());
}
static void infer_local_vars_declaration(V<ast_local_vars_declaration>) {
// it can not appear as a standalone expression
// `var ... = rhs` is handled by ast_assign
tolk_assert(false);
void infer_local_vars_declaration(V<ast_local_vars_declaration> v) {
infer_any_expr(v->get_expr());
assign_inferred_type(v, v->get_expr());
}
static void infer_local_var_lhs(V<ast_local_var_lhs> v) {
// `var v = rhs`, inferring is called for `v`
// at the moment of inferring left side of assignment, we don't know type of rhs (since lhs is executed first)
// so, mark `v` as unknown
// later, v's inferred_type will be reassigned; see process_assignment_lhs_after_infer_rhs()
if (v->marked_as_redef) {
assign_inferred_type(v, v->var_ref->declared_type);
} else {
assign_inferred_type(v, v->declared_type ? v->declared_type : TypeDataUnknown::create());
}
}
void infer_assignment(V<ast_assign> v) {
// v is assignment: `x = 5` / `var x = 5` / `var x: slice = 5` / `(cs,_) = f()` / `val (a,[b],_) = (a,t,0)`
// it's a tricky node to handle, because to infer rhs, at first we need to create hint from lhs
// and then to apply/check inferred rhs onto lhs
// about a hint: `var i: int = t.tupleAt(0)` is ok, but `var i = t.tupleAt(0)` not, since `tupleAt<T>(t,i): T`
// execution flow is: lhs first, rhs second (at IR generation, also lhs is evaluated first, unlike FunC)
// after inferring lhs, use it for hint when inferring rhs
// example: `var i: int = t.tupleAt(0)` is ok (hint=int, T=int), but `var i = t.tupleAt(0)` not, since `tupleAt<T>(t,i): T`
AnyExprV lhs = v->get_lhs();
AnyExprV rhs = v->get_rhs();
infer_any_expr(rhs, calc_hint_from_assignment_lhs(lhs));
infer_any_expr(lhs);
infer_any_expr(rhs, lhs->inferred_type);
process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, rhs);
assign_inferred_type(v, lhs);
}
// having assignment like `var (i: int, s) = rhs` (its lhs is local vars declaration),
// create a contextual infer hint for rhs, `(int, unknown)` in this case
// this hint helps to deduce generics and to resolve unknown types while inferring rhs
static TypePtr calc_hint_from_assignment_lhs(AnyExprV lhs) {
// `var ... = rhs` - dig into left part
if (auto lhs_decl = lhs->try_as<ast_local_vars_declaration>()) {
return calc_hint_from_assignment_lhs(lhs_decl->get_expr());
}
// inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v")
if (auto lhs_var = lhs->try_as<ast_local_var_lhs>()) {
if (lhs_var->marked_as_redef) {
return lhs_var->var_ref->declared_type;
}
if (lhs_var->declared_type) {
return lhs_var->declared_type;
}
return TypeDataUnknown::create();
}
// `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion)
if (auto lhs_ref = lhs->try_as<ast_reference>()) {
if (const auto* var_ref = lhs_ref->sym->try_as<LocalVarData>()) {
return var_ref->declared_type;
}
if (const auto* glob_ref = lhs_ref->sym->try_as<GlobalVarData>()) {
return glob_ref->declared_type;
}
return TypeDataUnknown::create();
}
// `(v1, v2) = rhs` / `var (v1, v2) = rhs`
if (auto lhs_tensor = lhs->try_as<ast_tensor>()) {
std::vector<TypePtr> sub_hints;
sub_hints.reserve(lhs_tensor->size());
for (AnyExprV item : lhs_tensor->get_items()) {
sub_hints.push_back(calc_hint_from_assignment_lhs(item));
}
return TypeDataTensor::create(std::move(sub_hints));
}
// `[v1, v2] = rhs` / `var [v1, v2] = rhs`
if (auto lhs_tuple = lhs->try_as<ast_typed_tuple>()) {
std::vector<TypePtr> sub_hints;
sub_hints.reserve(lhs_tuple->size());
for (AnyExprV item : lhs_tuple->get_items()) {
sub_hints.push_back(calc_hint_from_assignment_lhs(item));
}
return TypeDataTypedTuple::create(std::move(sub_hints));
}
// `a.0 = rhs` / `b.1.0 = rhs` (remember, its target is not assigned yet)
if (auto lhs_dot = lhs->try_as<ast_dot_access>()) {
TypePtr obj_hint = calc_hint_from_assignment_lhs(lhs_dot->get_obj());
std::string_view field_name = lhs_dot->get_field_name();
if (field_name[0] >= '0' && field_name[0] <= '9') {
int index_at = std::stoi(std::string(field_name));
if (const auto* t_tensor = obj_hint->try_as<TypeDataTensor>(); t_tensor && index_at < t_tensor->size()) {
return t_tensor->items[index_at];
}
if (const auto* t_tuple = obj_hint->try_as<TypeDataTypedTuple>(); t_tuple && index_at < t_tuple->size()) {
return t_tuple->items[index_at];
}
}
return TypeDataUnknown::create();
}
return TypeDataUnknown::create();
assign_inferred_type(v, rhs); // note, that the resulting type is rhs, not lhs
}
// handle (and dig recursively) into `var lhs = rhs`
// at this point, both lhs and rhs are already inferred, but lhs newly-declared vars are unknown (unless have declared_type)
// examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy`
// the purpose is to update inferred_type of lhs vars (z, x, y)
// while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y)
// (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2
void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) {
static void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) {
tolk_assert(lhs->inferred_type != nullptr);
AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs;
// `var ... = rhs` - dig into left part
if (auto lhs_decl = lhs->try_as<ast_local_vars_declaration>()) {
process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs);
assign_inferred_type(lhs, lhs_decl->get_expr()->inferred_type);
return;
}
// inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v")
if (auto lhs_var = lhs->try_as<ast_local_var_lhs>()) {
TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr)
if (lhs_var->marked_as_redef) {
tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type);
declared_type = lhs_var->var_ref->declared_type;
}
if (declared_type) {
if (lhs_var->inferred_type != TypeDataUnknown::create()) { // it's `var v: int` or redef
TypePtr declared_type = lhs_var->inferred_type;
if (!declared_type->can_rhs_be_assigned(rhs_type)) {
err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type));
}
assign_inferred_type(lhs, declared_type);
} else {
if (rhs_type == TypeDataNullLiteral::create()) {
fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as<LocalVarData>(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword);
}
assign_inferred_type(lhs, rhs_type);
assign_inferred_type(lhs_var->var_ref, lhs_var->inferred_type);
assign_inferred_type(lhs_var, rhs_type);
assign_inferred_type(lhs_var->var_ref, rhs_type);
}
return;
}
// `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion)
if (lhs->try_as<ast_reference>()) {
infer_any_expr(lhs);
if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) {
err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs));
}
@ -554,13 +494,9 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch");
}
V<ast_tensor> rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as<ast_tensor>() : nullptr;
std::vector<TypePtr> types_list;
types_list.reserve(lhs_tensor->size());
for (int i = 0; i < lhs_tensor->size(); ++i) {
process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr);
types_list.push_back(lhs_tensor->get_item(i)->inferred_type);
}
assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list)));
return;
}
@ -575,25 +511,23 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch");
}
V<ast_typed_tuple> rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as<ast_typed_tuple>() : nullptr;
std::vector<TypePtr> types_list;
types_list.reserve(lhs_tuple->size());
for (int i = 0; i < lhs_tuple->size(); ++i) {
process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr);
types_list.push_back(lhs_tuple->get_item(i)->inferred_type);
}
assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list)));
return;
}
// `_ = rhs`
if (lhs->type == ast_underscore) {
assign_inferred_type(lhs, TypeDataUnknown::create());
return;
// check `untypedTuple.0 = rhs_tensor` and other non-1 width elements
if (auto lhs_dot = lhs->try_as<ast_dot_access>()) {
if (lhs_dot->is_target_indexed_access() && lhs_dot->get_obj()->inferred_type == TypeDataTuple::create()) {
if (rhs_type->calc_width_on_stack() != 1) {
fire_error_tuple_cannot_have_non1_stack_width_elem(err_loc->loc, rhs_type);
}
}
}
// here is something unhandled like `a.0 = rhs`, run regular inferring on rhs
// here is something unhandled like `a.0 = rhs`, just check type matching
// for something strange like `f() = rhs` type inferring will pass, but will fail later
infer_any_expr(lhs, rhs_type);
if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) {
err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs));
}
@ -895,14 +829,20 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
return;
}
if (obj_type->try_as<TypeDataTuple>()) {
if (hint == nullptr) {
fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at);
}
if (hint->calc_width_on_stack() != 1) {
fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->loc, hint);
TypePtr item_type = nullptr;
if (v->is_lvalue && !hint) { // left side of assignment
item_type = TypeDataUnknown::create();
} else {
if (hint == nullptr) {
fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at);
}
if (hint->calc_width_on_stack() != 1) {
fire_error_tuple_cannot_have_non1_stack_width_elem(v->loc, hint);
}
item_type = hint;
}
v->mutate()->assign_target(index_at);
assign_inferred_type(v, hint);
assign_inferred_type(v, item_type);
return;
}
v_ident->error("type " + to_string(obj_type) + " is not indexable");
@ -1081,7 +1021,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final {
AnyExprV item = v->get_item(i);
infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr);
if (item->inferred_type->calc_width_on_stack() != 1) {
fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->get_item(i)->loc, item->inferred_type);
fire_error_tuple_cannot_have_non1_stack_width_elem(v->get_item(i)->loc, item->inferred_type);
}
types_list.emplace_back(item->inferred_type);
}

View file

@ -25,7 +25,6 @@
*
* Example: `boolVar == true` -> `boolVar`.
* Example: `!!boolVar` -> `boolVar`.
* Also in unwraps parenthesis inside if condition and similar: `assert(((x)), 404)` -> `assert(x, 404)`
*
* todo some day, replace && || with & | when it's safe (currently, && always produces IFs in Fift)
* It's tricky to implement whether replacing is safe.
@ -35,13 +34,6 @@
namespace tolk {
static AnyExprV unwrap_parenthesis(AnyExprV v) {
while (v->type == ast_parenthesized_expression) {
v = v->as<ast_parenthesized_expression>()->get_expr();
}
return v;
}
struct OptimizerBooleanExpressionsReplacer final : ASTReplacerInFunctionBody {
static V<ast_int_const> create_int_const(SrcLocation loc, td::RefInt256&& intval) {
auto v_int = createV<ast_int_const>(loc, std::move(intval), {});
@ -118,7 +110,7 @@ protected:
AnyV replace(V<ast_if_statement> v) override {
parent::replace(v);
if (v->get_cond()->type == ast_parenthesized_expression) {
v = createV<ast_if_statement>(v->loc, v->is_ifnot, unwrap_parenthesis(v->get_cond()), v->get_if_body(), v->get_else_body());
v = createV<ast_if_statement>(v->loc, v->is_ifnot, v->get_cond(), v->get_if_body(), v->get_else_body());
}
// `if (!x)` -> ifnot(x)
@ -136,7 +128,7 @@ protected:
parent::replace(v);
if (v->get_cond()->type == ast_parenthesized_expression) {
v = createV<ast_while_statement>(v->loc, unwrap_parenthesis(v->get_cond()), v->get_body());
v = createV<ast_while_statement>(v->loc, v->get_cond(), v->get_body());
}
return v;
}
@ -145,7 +137,7 @@ protected:
parent::replace(v);
if (v->get_cond()->type == ast_parenthesized_expression) {
v = createV<ast_do_while_statement>(v->loc, v->get_body(), unwrap_parenthesis(v->get_cond()));
v = createV<ast_do_while_statement>(v->loc, v->get_body(), v->get_cond());
}
return v;
}
@ -154,7 +146,7 @@ protected:
parent::replace(v);
if (v->get_cond()->type == ast_parenthesized_expression) {
v = createV<ast_assert_statement>(v->loc, unwrap_parenthesis(v->get_cond()), v->get_thrown_code());
v = createV<ast_assert_statement>(v->loc, v->get_cond(), v->get_thrown_code());
}
return v;
}