From 1389ff678910f737e0fcd1ed55152d90620de66a Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 10 Feb 2025 12:57:25 +0400 Subject: [PATCH] [Tolk] Change order of assignment evaluation, lhs first In FunC (and in Tolk before), the assignment > lhs = rhs evaluation order (at IR level) was "rhs first, lhs second". In practice, this did not matter, because lhs could only be a primitive: > (v1, v2) = getValue() Left side of assignment actually has no "evaluation". Since Tolk implemented indexed access, there could be > getTensor().0 = getValue() or (in the future) > getObject().field = getValue() where evaluation order becomes significant. Now evaluation order will be to "lhs first, rhs second" (more expected from user's point of view), which will become significant when building control flow graph. --- tolk-tester/tests/a10.tolk | 17 +- .../tests/allow_post_modification.tolk | 2 +- tolk-tester/tests/assignment-tests.tolk | 210 ++++++++ tolk-tester/tests/codegen_check_demo.tolk | 2 +- tolk-tester/tests/indexed-access.tolk | 84 +++- tolk-tester/tests/invalid-assign-1.tolk | 6 +- tolk-tester/tests/invalid-assign-2.tolk | 9 +- tolk-tester/tests/invalid-assign-3.tolk | 7 +- tolk-tester/tests/invalid-assign-4.tolk | 10 - tolk-tester/tests/invalid-assign-5.tolk | 9 - tolk-tester/tests/invalid-assign-6.tolk | 9 - tolk-tester/tests/invalid-assign-7.tolk | 8 - tolk-tester/tests/invalid-call-10.tolk | 2 +- tolk-tester/tests/mutate-methods.tolk | 2 +- tolk-tester/tests/null-keyword.tolk | 2 +- tolk/ast-stringifier.h | 2 +- tolk/pipe-ast-to-legacy.cpp | 461 +++++++++--------- tolk/pipe-constant-folding.cpp | 10 + tolk/pipe-infer-types-and-calls.cpp | 168 ++----- tolk/pipe-optimize-boolean-expr.cpp | 38 -- 20 files changed, 602 insertions(+), 456 deletions(-) delete mode 100644 tolk-tester/tests/invalid-assign-4.tolk delete mode 100644 tolk-tester/tests/invalid-assign-5.tolk delete mode 100644 tolk-tester/tests/invalid-assign-6.tolk delete mode 100644 tolk-tester/tests/invalid-assign-7.tolk diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index 755a3bfb..031e29c9 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -144,15 +144,16 @@ fun test95() { """ test95 PROC:<{ ... - next GETGLOB // '10 - 3 PUSHINT // '10 '12=3 - 4 PUSHINT // '10 '12=3 '13=4 - 5 PUSHINT // '10 '12=3 '13=4 '14=5 - TRIPLE // '15 '16 - next SETGLOB + next GETGLOB // g_next + 3 PUSHINT // g_next '14=3 + 4 PUSHINT // g_next '14=3 '15=4 + 5 PUSHINT // g_next '14=3 '15=4 '16=5 + TRIPLE // '10 '11 + SWAP cur SETGLOB - cur GETGLOB // '17 - next GETGLOB // '17 '18 + next SETGLOB + cur GETGLOB // g_cur + next GETGLOB // g_cur g_next }> """ */ diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index df758a1e..e20e8218 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -147,5 +147,5 @@ fun main() { // x.0 x.1 """ -@code_hash 7627024945492125068389905298530400936797031708759561372406088054030801992712 +@code_hash 61280273714870328160131559159866470128402169974050439159015534193532598351244 */ diff --git a/tolk-tester/tests/assignment-tests.tolk b/tolk-tester/tests/assignment-tests.tolk index 40761939..34dd3e84 100644 --- a/tolk-tester/tests/assignment-tests.tolk +++ b/tolk-tester/tests/assignment-tests.tolk @@ -26,6 +26,185 @@ fun typesAsIdentifiers(builder: builder) { return int; } +global callOrder: tuple; + +fun getTensor_12() { + callOrder.tuplePush(100); + return (1, 2); +} +fun getTensor_1X(x: int) { + callOrder.tuplePush(101); + return (1, x); +} +fun getTuple_12() { + callOrder.tuplePush(110); + return [1, 2]; +} +fun getTuple_1X(x: int) { + callOrder.tuplePush(111); + return [1, x]; +} +fun getUntypedTuple_12() { + callOrder.tuplePush(120); + var t = createEmptyTuple(); t.tuplePush(1); t.tuplePush(2); + return t; +} +fun getUntypedTuple_1X(x: int) { + callOrder.tuplePush(121); + var t = createEmptyTuple(); t.tuplePush(1); t.tuplePush(x); + return t; +} +fun getIntValue5() { + callOrder.tuplePush(10); + return 5; +} +fun getIntValueX(x: int) { + callOrder.tuplePush(11); + return x; +} + +@method_id(102) +fun test102() { + callOrder = createEmptyTuple(); + var x = 0; + getTensor_12().0 = getIntValue5(); + getTensor_1X(5).1 = getIntValue5(); + getTensor_1X(x = 10).0 = getIntValueX(x); + return (callOrder, x); +} + +@method_id(103) +fun test103() { + callOrder = createEmptyTuple(); + var x = 0; + getTuple_12().0 = getIntValue5(); + getTuple_1X(5).1 = getIntValue5(); + getTuple_1X(x = 10).0 = getIntValueX(x); + return (callOrder, x); +} + +@method_id(104) +fun test104() { + callOrder = createEmptyTuple(); + var x = 0; + getUntypedTuple_12().0 = getIntValue5(); + getUntypedTuple_1X(5).1 = getIntValue5(); + getUntypedTuple_1X(x = 10).0 = getIntValueX(x); + return (callOrder, x); +} + +@method_id(105) +fun test105() { + callOrder = createEmptyTuple(); + getTensor_12().0 = getTensor_1X(getIntValue5()).1 = getIntValueX(getTensor_12().1); + return callOrder; +} + +@method_id(106) +fun test106() { + callOrder = createEmptyTuple(); + getTuple_12().0 = getTuple_1X(getIntValue5()).1 = getIntValueX(getTuple_12().1); + return callOrder; +} + +global t107: (int, int); + +@method_id(107) +fun test107() { + ((t107 = (1, 2)).0, (t107 = (3, 4)).1) = (5, 6); + return t107; +} + +global g108: int; +fun assertEq(a: int, b: int) { + assert(a == b, 10); + return b; +} + +@method_id(108) +fun test108() { + callOrder = createEmptyTuple(); + g108 = 0; + getTensor_1X(g108 = 8).1 = assertEq(g108, 8); + return (callOrder, g108); +} + +@method_id(109) +fun test109() { + callOrder = createEmptyTuple(); + var x = 0; + [getTuple_12().0, getTuple_1X(x = getIntValue5()).1, getTuple_1X(x += 10).0] = [getIntValue5(), getIntValue5(), getIntValueX(x)]; + return (callOrder, x); +} + +global g110: int; +global t110: (int, int); + +@method_id(110) +fun test110() { + callOrder = createEmptyTuple(); + var xy = [0, 0]; + [xy.0, getTuple_1X(g110 = 8).0] = [g110 += 5, getIntValueX(g110 += 10)]; + [xy.1, getTuple_1X((t110 = (8, 9)).0).1] = [t110.0 += 5, getIntValueX(t110.1 += 10)]; + return (xy, callOrder, g110, t110); +} + +@method_id(111) +fun test111() { + callOrder = createEmptyTuple(); + var z = -1; + var xy = [0, z = 0]; + var rhs = [getIntValueX(xy.1 += 10), xy.1, xy.0, z += 50]; + [xy.0, getTuple_1X(g110 = 8 + getIntValueX(xy.1)).0, xy.1, z] = rhs; + return (xy, g110, callOrder, z); +} + +@method_id(112) +fun test112() { + var xy = [1, 2]; + ((((xy))).0, ((xy.1))) = ((xy).1, ((xy.0))); + return xy; +} + +@method_id(113) +fun test113() { + var (a, t, z) = (1, [2,3], (-1,-1)); + (a, t, a, z, t.1, z.1) = (10, [a,12], 13, (a, t.1), 14, t.1); + return (a, t, z); +} + +global g114: int; +global t114: [int, int]; +global z114: (int, int); + +@method_id(114) +fun test114() { + g114 = 1; + t114 = [2, 3]; + (g114, t114, g114, z114, t114.1, z114.1) = (10, [g114,12], 13, (g114, t114.1), 14, t114.1); + return (g114, t114, z114); +} + +@method_id(115) +fun test115() { + callOrder = createEmptyTuple(); + var x = 0; + var y = 0; + [getTensor_1X(x = 5).0, y] = getTuple_1X(x = 9); + return (callOrder, x, y); +} + +@method_id(116) +fun test116() { + var (a,b,c,d) = (0,0,0,0); + var rhs = [1, 2, 3, 4]; + var rhs2 = ([a,b,c,d] = rhs); + __expect_type(rhs2, "[int, int, int, int]"); + return (a, b, c, d, rhs2); +} + + + fun main(value: int) { var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); if (x == null && y == null) { return null; } @@ -37,4 +216,35 @@ fun main(value: int) { @testcase | 0 | 6 | -1 @testcase | 0 | 11 | (null) @testcase | 101 | 78 | 88 +@testcase | 102 | | [ 100 10 101 10 101 11 ] 10 +@testcase | 103 | | [ 110 10 111 10 111 11 ] 10 +@testcase | 104 | | [ 120 10 121 10 121 11 ] 10 +@testcase | 105 | | [ 100 10 101 100 11 ] +@testcase | 106 | | [ 110 10 111 110 11 ] +@testcase | 107 | | 3 4 +@testcase | 108 | | [ 101 ] 8 +@testcase | 109 | | [ 110 10 111 111 10 10 11 ] 15 +@testcase | 110 | | [ 13 13 ] [ 111 11 111 11 ] 23 13 19 +@testcase | 111 | | [ 10 0 ] 18 [ 11 11 111 ] 50 +@testcase | 112 | | [ 2 1 ] +@testcase | 113 | | 13 [ 1 14 ] 1 3 +@testcase | 114 | | 13 [ 1 14 ] 1 3 +@testcase | 115 | | [ 101 111 ] 9 9 +@testcase | 116 | | 1 2 3 4 [ 1 2 3 4 ] + + +@fif_codegen +""" + test116 PROC:<{ + // + 1 PUSHINT // '10=1 + 2 PUSHINT // '10=1 '11=2 + 3 PUSHINT // '10=1 '11=2 '12=3 + 4 PUSHINT // '10=1 '11=2 '12=3 '13=4 + 4 TUPLE // rhs + DUP // rhs rhs + 4 UNTUPLE // rhs2 a b c d + 4 ROLL // a b c d rhs2 + }> +""" */ diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk index b355a9b7..5b46c093 100644 --- a/tolk-tester/tests/codegen_check_demo.tolk +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -35,7 +35,7 @@ Below, I just give examples of @fif_codegen tag: """ main PROC:<{ // s - 17 PUSHINT // s '1=17 + 17 PUSHINT // s '3=17 OVER // s z=17 t WHILE:<{ ... diff --git a/tolk-tester/tests/indexed-access.tolk b/tolk-tester/tests/indexed-access.tolk index 38094fa5..ab7995cf 100644 --- a/tolk-tester/tests/indexed-access.tolk +++ b/tolk-tester/tests/indexed-access.tolk @@ -21,6 +21,26 @@ fun plus(mutate self: int, y: int): int { fun eq(v: X): X { return v; } +global gTup: [int]; +global gTens: (int, int); + +@method_id(100) +fun testCodegenSimple() { + var t1 = [1]; + t1.0 = 2; + debugPrintString(""); + var t2 = [[1]]; + t2.0.0 = 2; + debugPrintString(""); + gTup = [1]; + gTup.0 = 2; + debugPrintString(""); + gTens = (1,2); + gTens.1 = 4; + debugPrintString(""); + return (t1, t2, gTup, gTens); +} + @method_id(101) fun test101() { var t = (1, (2, 3), [4, 5, [6, 7]], 8); @@ -241,30 +261,60 @@ fun main(){} @fif_codegen """ - testCodegenNoPureIndexedAccess PROC:<{ + testCodegenSimple PROC:<{ // - 0 PUSHINT // '8=0 + 1 PUSHINT // '2=1 + SINGLE // t1 + 2 PUSHINT // t1 '3=2 + 0 SETINDEX // t1 + x{} PUSHSLICE // t1 '6 + STRDUMP DROP + 1 PUSHINT // t1 '10=1 + SINGLE // t1 '9 + SINGLE // t1 t2 + 2 PUSHINT // t1 t2 '11=2 + OVER // t1 t2 '11=2 t2 + 0 INDEX // t1 t2 '11=2 '14 + SWAP // t1 t2 '14 '11=2 + 0 SETINDEX // t1 t2 '14 + 0 SETINDEX // t1 t2 + x{} PUSHSLICE // t1 t2 '17 + STRDUMP DROP + 1 PUSHINT // t1 t2 '20=1 + SINGLE // t1 t2 '18 + gTup SETGLOB + 2 PUSHINT // t1 t2 '21=2 + gTup GETGLOB // t1 t2 '21=2 g_gTup + SWAP // t1 t2 g_gTup '21=2 + 0 SETINDEX // t1 t2 g_gTup + gTup SETGLOB + x{} PUSHSLICE // t1 t2 '25 + STRDUMP DROP + 1 PUSHINT // t1 t2 '28=1 + 2 PUSHINT // t1 t2 '26=1 '27=2 + PAIR + gTens SETGLOB + 4 PUSHINT // t1 t2 g_gTens.1=4 + gTens GETGLOB + UNPAIR // t1 t2 g_gTens.1=4 g_gTens.0 g_gTens.1 + DROP // t1 t2 g_gTens.1=4 g_gTens.0 + SWAP // t1 t2 g_gTens.0 g_gTens.1=4 + PAIR + gTens SETGLOB + x{} PUSHSLICE // t1 t2 '36 + STRDUMP DROP + gTup GETGLOB // t1 t2 g_gTup + gTens GETGLOB + UNPAIR // t1 t2 g_gTup g_gTens.0 g_gTens.1 }> """ @fif_codegen """ - test104 PROC:<{ + testCodegenNoPureIndexedAccess PROC:<{ // - 5 PUSHINT // '2=5 - DUP // '2=5 '3=5 - PAIR // '1 - SINGLE // m - 10 PUSHINT // m '5=10 - 20 PUSHINT // m '5=10 '6=20 - s2 PUSH // m '5=10 '6=20 m - 0 INDEX // m '10=10 '12=20 '8 - SWAP // m '10=10 '8 '12=20 - 1 SETINDEX // m '10=10 '8 - SWAP // m '8 '10=10 - 0 SETINDEX // m '8 - 0 SETINDEX // m - ... + 0 PUSHINT // '8=0 + }> """ @fif_codegen diff --git a/tolk-tester/tests/invalid-assign-1.tolk b/tolk-tester/tests/invalid-assign-1.tolk index f605056e..799176df 100644 --- a/tolk-tester/tests/invalid-assign-1.tolk +++ b/tolk-tester/tests/invalid-assign-1.tolk @@ -1,9 +1,9 @@ fun main() { - var c = 1; - (c, c) = (2, 3); + var t = createEmptyTuple(); + t.0 = (1, 2); } /** @compilation_should_fail -@stderr one variable modified twice inside the same expression +@stderr a tuple can not have `(int, int)` inside, because it occupies 2 stack slots in TVM, not 1 */ diff --git a/tolk-tester/tests/invalid-assign-2.tolk b/tolk-tester/tests/invalid-assign-2.tolk index 2838ed9a..6a33e696 100644 --- a/tolk-tester/tests/invalid-assign-2.tolk +++ b/tolk-tester/tests/invalid-assign-2.tolk @@ -1,11 +1,8 @@ -fun incThree(mutate a: int, mutate b: int, mutate c: int) {} - -fun main() { - var c = [[[1, 2]]]; - incThree(mutate c.0.0.0, mutate c.0.0.1, mutate c.0.0.0); +fun main(cs: slice) { + var cb = cs.tupleSize; } /** @compilation_should_fail -@stderr one variable modified twice inside the same expression +@stderr referencing a method for `tuple` with object of type `slice` */ diff --git a/tolk-tester/tests/invalid-assign-3.tolk b/tolk-tester/tests/invalid-assign-3.tolk index d3f5d1f1..567ace33 100644 --- a/tolk-tester/tests/invalid-assign-3.tolk +++ b/tolk-tester/tests/invalid-assign-3.tolk @@ -1,10 +1,9 @@ -global gg: (int, int); - fun main() { - [gg.0, gg.1, gg.0] = [0, 1, 0]; + var t = createEmptyTuple(); + var xy = t.0 as (int, int); } /** @compilation_should_fail -@stderr one variable modified twice inside the same expression +@stderr a tuple can not have `(int, int)` inside, because it occupies 2 stack slots in TVM, not 1 */ diff --git a/tolk-tester/tests/invalid-assign-4.tolk b/tolk-tester/tests/invalid-assign-4.tolk deleted file mode 100644 index 67340b20..00000000 --- a/tolk-tester/tests/invalid-assign-4.tolk +++ /dev/null @@ -1,10 +0,0 @@ -global gg: (int, [int, int]); - -fun main() { - (gg.1.0, gg.1, gg.1.1) = (0, [1, 2], 3); -} - -/** -@compilation_should_fail -@stderr one variable both modified and read inside the same expression -*/ diff --git a/tolk-tester/tests/invalid-assign-5.tolk b/tolk-tester/tests/invalid-assign-5.tolk deleted file mode 100644 index f3fe59f7..00000000 --- a/tolk-tester/tests/invalid-assign-5.tolk +++ /dev/null @@ -1,9 +0,0 @@ -fun main() { - var ab = (1, 2); - (ab, ab.1) = ((2, 3), 4); -} - -/** -@compilation_should_fail -@stderr one variable both modified and read inside the same expression -*/ diff --git a/tolk-tester/tests/invalid-assign-6.tolk b/tolk-tester/tests/invalid-assign-6.tolk deleted file mode 100644 index 59d769e9..00000000 --- a/tolk-tester/tests/invalid-assign-6.tolk +++ /dev/null @@ -1,9 +0,0 @@ -fun main() { - var t = createEmptyTuple(); - t.0 = (1, 2); -} - -/** -@compilation_should_fail -@stderr can not put `(int, int)` into a tuple, because it occupies 2 stack slots in TVM, not 1 -*/ diff --git a/tolk-tester/tests/invalid-assign-7.tolk b/tolk-tester/tests/invalid-assign-7.tolk deleted file mode 100644 index 6a33e696..00000000 --- a/tolk-tester/tests/invalid-assign-7.tolk +++ /dev/null @@ -1,8 +0,0 @@ -fun main(cs: slice) { - var cb = cs.tupleSize; -} - -/** -@compilation_should_fail -@stderr referencing a method for `tuple` with object of type `slice` -*/ diff --git a/tolk-tester/tests/invalid-call-10.tolk b/tolk-tester/tests/invalid-call-10.tolk index 9a28c004..4da85f4f 100644 --- a/tolk-tester/tests/invalid-call-10.tolk +++ b/tolk-tester/tests/invalid-call-10.tolk @@ -7,5 +7,5 @@ fun main() { /** @compilation_should_fail -@stderr can not put `(int, builder)` into a tuple, because it occupies 2 stack slots in TVM, not 1 +@stderr a tuple can not have `(int, builder)` inside, because it occupies 2 stack slots in TVM, not 1 */ diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index ebd07aca..9ebf8b1d 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -307,7 +307,7 @@ fun main(){} ... incrementTwoInPlace CALLDICT // x y sum1 -ROT - 10 PUSHINT // sum1 x y '10=10 + 10 PUSHINT // sum1 x y '11=10 incrementTwoInPlace CALLDICT // sum1 x y sum2 s1 s3 s0 XCHG3 // x y sum1 sum2 }> diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 69678434..eb02b624 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -133,7 +133,7 @@ fun main() { """ test7 PROC:<{ ... - LDOPTREF // b '8 '7 + LDOPTREF // b '9 '8 DROP // b c ISNULL // b '11 10 MULCONST // b '13 diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index 4ec72cdd..1211d63f 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -193,7 +193,7 @@ class ASTStringifier final : public ASTVisitor { } case ast_local_var_lhs: { std::ostringstream os; - os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); + os << (v->as()->inferred_type ? v->as()->inferred_type->as_human_readable() : v->as()->declared_type->as_human_readable()); if (v->as()->get_name().empty()) { return "_: " + os.str(); } diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index f5eca22c..867c05ec 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -30,158 +30,17 @@ * Up to this point, all types have been inferred, all validity checks have been passed, etc. * All properties in AST nodes are assigned and can be safely used (fun_ref, etc.). * So, if execution reaches this pass, the input is (almost) correct, and code generation should succeed. - * The only thing additionally checked during this pass is tricky lvalue, like one and the same variable - * assigned/mutated multiple times in same expression, e.g. `(t.0, t.0) = rhs` / `f(mutate x.1.2, mutate x)`. + * (previously, there was a check for one variable modified twice like `(t.0, t.0) = rhs`, but after changing + * execution order of assignment to "first lhs, then lhs", it was removed for several reasons) */ namespace tolk { -// fire error on cases like `(a, a) = rhs` / `f(mutate t.1.0, mutate t.1.0)` -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_variable_modified_twice_inside_same_expression(SrcLocation loc) { - throw ParseError(loc, "one variable modified twice inside the same expression"); -} +class LValContext; +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr); +std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx); +void process_any_statement(AnyV v, CodeBlob& code); -// fire error on cases like `(m.1.0, m.1) = rhs` (m.1 inside m.1.0 is "rval inside lval") -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_variable_modified_and_read_inside_same_expression(SrcLocation loc) { - throw ParseError(loc, "one variable both modified and read inside the same expression"); -} - -// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable -// exists, but on its change, something non-trivial should happen. -// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6` -// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)` -// Of course, mixing globals with tuples should also be supported. -// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue". -// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global. -// A challenging thing is handling "unique" parts, to be read/updated only once. -// Example: `f(mutate globalTensor.0, mutate globalTensor.1)`, then globalTensor should be read/written once. -// Example: `(t.0.0, t.0.1) = rhs` (m is [[int, int]]), then t.0 should be read/updated once. -// Solving this by calculating hashes of every lvalue or rvalue inside lvalue automatically gives an ability -// to detect and fire "multiple writes inside expression", like `(a, a) = rhs` / `[t.0, (t.0.1, c)] = rhs`. -// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global). -class LValContext { - // every global variable used as lvalue is registered here - // example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after - // global tensors are stored as tuples (unpacked on reading, packed on writing), then multiple tmp vars are created - struct ModifiedGlob { - const GlobalVarData* glob_ref; - std::vector local_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors) - - void apply(CodeBlob& code, SrcLocation loc) const { - Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, local_ir_idx, glob_ref); - op.set_impure_flag(); - } - }; - - // every tuple index used as lvalue is registered here - // example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after - // for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times - struct ModifiedTupleIndex { - uint64_t hash; - var_idx_t tuple_ir_idx; - var_idx_t index_ir_idx; - var_idx_t field_ir_idx; - - void apply(CodeBlob& code, SrcLocation loc) const { - const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as(); - code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx, field_ir_idx, index_ir_idx}, builtin_sym); - } - }; - - int level_rval_inside_lval = 0; - std::vector> modifications; - std::unordered_set all_modified_hashes; - - void fire_if_one_variable_modified_twice(SrcLocation loc, uint64_t modified_hash) { - if (!is_rval_inside_lval()) { - if (!all_modified_hashes.insert(modified_hash).second) { - fire_error_variable_modified_twice_inside_same_expression(loc); - } - if (all_modified_hashes.contains(~modified_hash)) { - fire_error_variable_modified_and_read_inside_same_expression(loc); - } - } else { - all_modified_hashes.insert(~modified_hash); - if (all_modified_hashes.contains(modified_hash)) { - fire_error_variable_modified_and_read_inside_same_expression(loc); - } - } - } - -public: - void enter_rval_inside_lval() { level_rval_inside_lval++; } - void exit_rval_inside_lval() { level_rval_inside_lval--; } - bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; } - - uint64_t register_lval(SrcLocation loc, const LocalVarData* var_ref) { - uint64_t hash = reinterpret_cast(var_ref); - fire_if_one_variable_modified_twice(loc, hash); - return hash; - } - - uint64_t register_lval(SrcLocation loc, const GlobalVarData* glob_ref) { - uint64_t hash = reinterpret_cast(glob_ref); - fire_if_one_variable_modified_twice(loc, hash); - return hash; - } - - uint64_t register_lval(SrcLocation loc, V v) { - uint64_t hash = 7; - AnyExprV leftmost_obj = v; - while (auto v_dot = leftmost_obj->try_as()) { - if (!v_dot->is_target_indexed_access()) { - break; - } - hash = hash * 1915239017 + std::get(v_dot->target); - leftmost_obj = v_dot->get_obj(); - } - if (auto v_ref = leftmost_obj->try_as()) { - hash *= reinterpret_cast(v_ref->sym); // `v.0` and `v.0` in 2 places is the same - } else { - hash *= reinterpret_cast(leftmost_obj); // unlike `f().0` and `f().0` (pointers to AST nodes differ) - } - fire_if_one_variable_modified_twice(loc, hash); - return hash; - } - - const std::vector* exists_already_known_global(const GlobalVarData* glob_ref) const { - for (const auto& m : modifications) { - if (const auto* m_glob = std::get_if(&m); m_glob && m_glob->glob_ref == glob_ref) { - return &m_glob->local_ir_idx; - } - } - return nullptr; - } - - const var_idx_t* exists_already_known_tuple_index(uint64_t hash) const { - for (const auto& m : modifications) { - if (const auto* m_tup = std::get_if(&m); m_tup && m_tup->hash == hash) { - return &m_tup->field_ir_idx; - } - } - return nullptr; - } - - void register_modified_global(const GlobalVarData* glob_ref, std::vector local_ir_idx) { - modifications.emplace_back(ModifiedGlob{glob_ref, std::move(local_ir_idx)}); - } - - void register_modified_tuple_index(uint64_t hash, var_idx_t tuple_ir_idx, var_idx_t index_ir_idx, var_idx_t field_ir_idx) { - modifications.emplace_back(ModifiedTupleIndex{hash, tuple_ir_idx, index_ir_idx, field_ir_idx}); - } - - void gen_ops_if_nonempty(CodeBlob& code, SrcLocation loc) const { - for (auto it = modifications.rbegin(); it != modifications.rend(); ++it) { // reverse, it's important - if (const auto* m_glob = std::get_if(&*it)) { - m_glob->apply(code, loc); - } else if (const auto* m_tup = std::get_if(&*it)) { - m_tup->apply(code, loc); - } - } - } -}; // The goal of VarsModificationWatcher is to detect such cases: `return (x, x += y, x)`. // Without any changes, ops will be { _Call $2 = +($0_x, $1_y); _Return $0_x, $2, $0_x } - incorrect @@ -229,8 +88,176 @@ public: static VarsModificationWatcher vars_modification_watcher; -std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr); -void process_any_statement(AnyV v, CodeBlob& code); + +// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable +// exists, but on its change, something non-trivial should happen. +// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6` +// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)` +// Of course, mixing globals with tuples should also be supported. +// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue". +// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global. +// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global). +class LValContext { + // every global variable used as lvalue is registered here + // example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after + struct ModifiedGlobal { + const GlobalVarData* glob_ref; + std::vector lval_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors) + + // for 1-slot globals int/cell/slice, assigning to them is just SETGLOB + // same for tensors, if they are fully rewritten in an expression: `gTensor = (5,6)` + void apply_fully_rewrite(CodeBlob& code, SrcLocation loc) const { + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, lval_ir_idx, glob_ref); + op.set_impure_flag(); + } + + // for N-slot globals tensor/struct/union, assigning to their parts, like `gTensor.1 = 6` + // we need to read gTensor as a whole (0-th and 1-th component), rewrite 1-th component, and SETGLOB a whole back + void apply_partially_rewrite(CodeBlob& code, SrcLocation loc, std::vector&& was_modified_by_let) const { + LValContext local_lval; + local_lval.enter_rval_inside_lval(); + std::vector local_ir_idx = pre_compile_symbol(loc, glob_ref, code, &local_lval); + for (size_t i = 0; i < local_ir_idx.size(); ++i) { + if (was_modified_by_let[i]) { + code.emplace_back(loc, Op::_Let, std::vector{local_ir_idx[i]}, std::vector{lval_ir_idx[i]}); + } + } + + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, local_ir_idx, glob_ref); + op.set_impure_flag(); + } + }; + + // every tensor index, when a tensor is a global, is registered here (same for structs and fields) + // example: `global v: (int, int); v.1 = 5`, implicit var is created `$tmp = 5`, and when it's modified, + // we need to partially update w; essentially, apply_partially_rewrite() above will be called + struct ModifiedFieldOfGlobal { + AnyExprV tensor_obj; + int index_at; + std::vector lval_ir_idx; + + void apply(CodeBlob& code, SrcLocation loc) const { + LValContext local_lval; + local_lval.enter_rval_inside_lval(); + std::vector obj_ir_idx = pre_compile_expr(tensor_obj, code, &local_lval); + const TypeDataTensor* t_tensor = tensor_obj->inferred_type->try_as(); + tolk_assert(t_tensor); + int stack_width = t_tensor->items[index_at]->calc_width_on_stack(); + int stack_offset = 0; + for (int i = 0; i < index_at; ++i) { + stack_offset += t_tensor->items[i]->calc_width_on_stack(); + } + std::vector field_ir_idx = {obj_ir_idx.begin() + stack_offset, obj_ir_idx.begin() + stack_offset + stack_width}; + tolk_assert(field_ir_idx.size() == lval_ir_idx.size()); + + vars_modification_watcher.trigger_callbacks(field_ir_idx, loc); + code.emplace_back(loc, Op::_Let, field_ir_idx, lval_ir_idx); + local_lval.after_let(std::move(field_ir_idx), code, loc); + } + }; + + // every tuple index used as lvalue is registered here + // example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after + // for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times + struct ModifiedTupleIndex { + AnyExprV tuple_obj; + int index_at; + std::vector lval_ir_idx; + + void apply(CodeBlob& code, SrcLocation loc) const { + LValContext local_lval; + local_lval.enter_rval_inside_lval(); + std::vector tuple_ir_idx = pre_compile_expr(tuple_obj, code, &local_lval); + std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), loc, "(tuple-idx)"); + code.emplace_back(loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); + + vars_modification_watcher.trigger_callbacks(tuple_ir_idx, loc); + const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as(); + code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx[0], lval_ir_idx[0], index_ir_idx[0]}, builtin_sym); + local_lval.after_let(std::move(tuple_ir_idx), code, loc); + } + }; + + int level_rval_inside_lval = 0; + std::vector> modifications; + + static bool vector_contains(const std::vector& ir_vars, var_idx_t ir_idx) { + for (var_idx_t var_in_vector : ir_vars) { + if (var_in_vector == ir_idx) { + return true; + } + } + return false; + } + +public: + void enter_rval_inside_lval() { level_rval_inside_lval++; } + void exit_rval_inside_lval() { level_rval_inside_lval--; } + bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; } + + void capture_global_modification(const GlobalVarData* glob_ref, std::vector lval_ir_idx) { + modifications.emplace_back(ModifiedGlobal{glob_ref, std::move(lval_ir_idx)}); + } + + void capture_field_of_global_modification(AnyExprV tensor_obj, int index_at, std::vector lval_ir_idx) { + modifications.emplace_back(ModifiedFieldOfGlobal{tensor_obj, index_at, std::move(lval_ir_idx)}); + } + + void capture_tuple_index_modification(AnyExprV tuple_obj, int index_at, std::vector lval_ir_idx) { + modifications.emplace_back(ModifiedTupleIndex{tuple_obj, index_at, std::move(lval_ir_idx)}); + } + + void after_let(std::vector&& let_left_vars, CodeBlob& code, SrcLocation loc) const { + for (const auto& modification : modifications) { + if (const auto* m_glob = std::get_if(&modification)) { + int n_modified_by_let = 0; + std::vector was_modified_by_let; + was_modified_by_let.resize(m_glob->lval_ir_idx.size()); + for (size_t i = 0; i < m_glob->lval_ir_idx.size(); ++i) { + if (vector_contains(let_left_vars, m_glob->lval_ir_idx[i])) { + was_modified_by_let[i] = true; + n_modified_by_let++; + } + } + if (n_modified_by_let == static_cast(m_glob->lval_ir_idx.size())) { + m_glob->apply_fully_rewrite(code, loc); + } else if (n_modified_by_let > 0) { + m_glob->apply_partially_rewrite(code, loc, std::move(was_modified_by_let)); + } + } else if (const auto* m_tup = std::get_if(&modification)) { + bool was_tuple_index_modified = false; + for (var_idx_t field_ir_idx : m_tup->lval_ir_idx) { + was_tuple_index_modified |= vector_contains(let_left_vars, field_ir_idx); + } + if (was_tuple_index_modified) { + m_tup->apply(code, loc); + } + } else if (const auto* m_tens = std::get_if(&modification)) { + bool was_tensor_index_modified = false; + for (var_idx_t field_ir_idx : m_tens->lval_ir_idx) { + was_tensor_index_modified |= vector_contains(let_left_vars, field_ir_idx); + } + if (was_tensor_index_modified) { + m_tens->apply(code, loc); + } + } + } + } +}; + +// given `{some_expr}.{i}`, check it for pattern `some_var.0` / `some_var.0.1` / etc. +// return some_var if satisfies (it may be a local or a global var, a tensor or a tuple) +// return nullptr otherwise: `f().0` / `(v = rhs).0` / `some_var.method().0` / etc. +static V calc_sink_leftmost_obj(V v) { + AnyExprV leftmost_obj = v->get_obj(); + while (auto v_dot = leftmost_obj->try_as()) { + if (!v_dot->is_target_indexed_access()) { + break; + } + leftmost_obj = v_dot->get_obj(); + } + return leftmost_obj->type == ast_reference ? leftmost_obj->as() : nullptr; +} static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, @@ -313,43 +340,45 @@ static std::vector pre_compile_tensor(CodeBlob& code, const std::vect static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) { // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) { - std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); LValContext local_lval; std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); vars_modification_watcher.trigger_callbacks(left, loc); - code.emplace_back(loc, Op::_Let, std::move(left), right); - local_lval.gen_ops_if_nonempty(code, loc); + std::vector rvect = pre_compile_tensor(code, rhs->as()->get_items()); + code.emplace_back(loc, Op::_Let, left, rvect); + local_lval.after_let(std::move(left), code, loc); + std::vector right = code.create_tmp_var(TypeDataTuple::create(), loc, "(tuple)"); + code.emplace_back(lhs->loc, Op::_Tuple, right, std::move(rvect)); return right; } // [lhs] = rhs; it's un-tuple to N left vars if (lhs->type == ast_typed_tuple) { + LValContext local_lval; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); + vars_modification_watcher.trigger_callbacks(left, loc); std::vector right = pre_compile_expr(rhs, code); const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as(); std::vector types_list = inferred_tuple->items; std::vector rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc, "(unpack-tuple)"); code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); - LValContext local_lval; - std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); - vars_modification_watcher.trigger_callbacks(left, loc); - code.emplace_back(loc, Op::_Let, std::move(left), rvect); - local_lval.gen_ops_if_nonempty(code, loc); - return rvect; + code.emplace_back(loc, Op::_Let, left, rvect); + local_lval.after_let(std::move(left), code, loc); + return right; } // small optimization: `var x = rhs` or `local_var = rhs` (90% cases), LValContext not needed actually if (lhs->type == ast_local_var_lhs || (lhs->type == ast_reference && lhs->as()->sym->try_as())) { - std::vector right = pre_compile_expr(rhs, code); std::vector left = pre_compile_expr(lhs, code); // effectively, local_var->ir_idx vars_modification_watcher.trigger_callbacks(left, loc); + std::vector right = pre_compile_expr(rhs, code); code.emplace_back(loc, Op::_Let, std::move(left), right); return right; } // lhs = rhs - std::vector right = pre_compile_expr(rhs, code); LValContext local_lval; std::vector left = pre_compile_expr(lhs, code, &local_lval); vars_modification_watcher.trigger_callbacks(left, loc); - code.emplace_back(loc, Op::_Let, std::move(left), right); - local_lval.gen_ops_if_nonempty(code, loc); + std::vector right = pre_compile_expr(rhs, code); + code.emplace_back(loc, Op::_Let, left, right); + local_lval.after_let(std::move(left), code, loc); return right; } @@ -364,28 +393,22 @@ static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcL } -static std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { +std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { if (const auto* glob_ref = sym->try_as()) { - if (!lval_ctx) { - // `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it - // note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type - std::vector local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)"); - code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); - return local_ir_idx; - } else { - // `globalVar = rhs` / `mutate globalVar` / `globalTuple.0 = rhs` - lval_ctx->register_lval(loc, glob_ref); - if (const std::vector* local_ir_idx = lval_ctx->exists_already_known_global(glob_ref)) { - return *local_ir_idx; // `f(mutate g.0, mutate g.1)`, then g will be read only once - } - std::vector local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)"); - if (lval_ctx->is_rval_inside_lval()) { // for `globalVar.0` "globalVar" is rvalue inside lvalue - // for `globalVar = rhs` don't read a global actually, but for `globalVar.0 = rhs` do - code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); - } - lval_ctx->register_modified_global(glob_ref, local_ir_idx); - return local_ir_idx; + // handle `globalVar = rhs` / `mutate globalVar` + if (lval_ctx && !lval_ctx->is_rval_inside_lval()) { + std::vector lval_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(lval-glob)"); + lval_ctx->capture_global_modification(glob_ref, lval_ir_idx); + return lval_ir_idx; } + // `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it + // note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type + std::vector local_ir_idx = code.create_var(glob_ref->declared_type, loc, "g_" + glob_ref->name); + code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); + if (lval_ctx) { // `globalVar.0 = rhs`, globalVar is rval inside lval + lval_ctx->capture_global_modification(glob_ref, local_ir_idx); + } + return local_ir_idx; } if (const auto* const_ref = sym->try_as()) { if (const_ref->is_int_const()) { @@ -407,15 +430,12 @@ static std::vector pre_compile_symbol(SrcLocation loc, const Symbol* #ifdef TOLK_DEBUG tolk_assert(static_cast(var_ref->ir_idx.size()) == var_ref->declared_type->calc_width_on_stack()); #endif - if (lval_ctx) { - lval_ctx->register_lval(loc, var_ref); - } return var_ref->ir_idx; } throw Fatal("pre_compile_symbol"); } -static std::vector process_assign(V v, CodeBlob& code) { +static std::vector process_assignment(V v, CodeBlob& code) { if (auto lhs_decl = v->get_lhs()->try_as()) { return pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); } else { @@ -492,12 +512,18 @@ static std::vector process_dot_access(V v, CodeBlob& if (!v->is_target_fun_ref()) { TypePtr obj_type = v->get_obj()->inferred_type; int index_at = std::get(v->target); - // `tensorVar.0`; since a tensor of N elems are N vars on a stack actually, calculate offset + // `tensorVar.0` if (const auto* t_tensor = obj_type->try_as()) { - if (lval_ctx) lval_ctx->register_lval(v->loc, v); - if (lval_ctx) lval_ctx->enter_rval_inside_lval(); + // handle `tensorVar.0 = rhs` if tensors is a global, special case, then the global will be read on demand + if (lval_ctx && !lval_ctx->is_rval_inside_lval()) { + if (auto sink = calc_sink_leftmost_obj(v); sink && sink->sym->try_as()) { + std::vector lval_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(lval-global-tensor)"); + lval_ctx->capture_field_of_global_modification(v->get_obj(), index_at, lval_ir_idx); + return lval_ir_idx; + } + } + // since a tensor of N elems are N vars on a stack actually, calculate offset std::vector lhs_vars = pre_compile_expr(v->get_obj(), code, lval_ctx); - if (lval_ctx) lval_ctx->exit_rval_inside_lval(); int stack_width = t_tensor->items[index_at]->calc_width_on_stack(); int stack_offset = 0; for (int i = 0; i < index_at; ++i) { @@ -505,39 +531,26 @@ static std::vector process_dot_access(V v, CodeBlob& } return {lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width}; } - // `tupleVar.0`; not to mess up, separate rvalue and lvalue cases + // `tupleVar.0` if (obj_type->try_as() || obj_type->try_as()) { - if (!lval_ctx) { - // `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars - std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code); - std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); - code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); - std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); - tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values - const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); - code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); - return field_ir_idx; - } else { - // `tupleVar.0 = rhs`: finally "tupleSetAt(tupleVar, rhs, 0)" will be done - uint64_t hash = lval_ctx->register_lval(v->loc, v); - if (const var_idx_t* field_ir_idx = lval_ctx->exists_already_known_tuple_index(hash)) { - return {*field_ir_idx}; // `(t.0.0, t.0.1) = rhs`, then "t.0" will be read (tupleAt) once - } - lval_ctx->enter_rval_inside_lval(); - std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code, lval_ctx); - lval_ctx->exit_rval_inside_lval(); - std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); - code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); - std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); - if (lval_ctx->is_rval_inside_lval()) { // for `t.0.1 = rhs` "t.0" is rvalue inside lvalue - // for `t.0 = rhs` don't call tupleAt, but for `t.0.1 = rhs` do for t.0 (still don't for t.0.1) - const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); - code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); - } - lval_ctx->register_modified_tuple_index(hash, tuple_ir_idx[0], index_ir_idx[0], field_ir_idx[0]); - vars_modification_watcher.trigger_callbacks(tuple_ir_idx, v->loc); - return field_ir_idx; + // handle `tupleVar.0 = rhs`, "0 SETINDEX" will be called when this was is modified + if (lval_ctx && !lval_ctx->is_rval_inside_lval() && calc_sink_leftmost_obj(v)) { + std::vector lval_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(lval-tuple-field)"); + lval_ctx->capture_tuple_index_modification(v->get_obj(), index_at, lval_ir_idx); + return lval_ir_idx; } + // `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars + std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code); + std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); + code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); + std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); + tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values + const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); + code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); + if (lval_ctx && calc_sink_leftmost_obj(v)) { // `tupleVar.0.1 = rhs`, then `tupleVar.0` is rval inside lval + lval_ctx->capture_tuple_index_modification(v->get_obj(), index_at, field_ir_idx); + } + return field_ir_idx; } tolk_assert(false); } @@ -627,8 +640,8 @@ static std::vector process_function_call(V v, Code std::vector rvect = code.create_tmp_var(real_ret_type, v->loc, "(fun-call)"); left.insert(left.end(), rvect.begin(), rvect.end()); vars_modification_watcher.trigger_callbacks(left, v->loc); - code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); - local_lval.gen_ops_if_nonempty(code, v->loc); + code.emplace_back(v->loc, Op::_Let, left, rvect_apply); + local_lval.after_let(std::move(left), code, v->loc); rvect_apply = rvect; } @@ -710,7 +723,7 @@ std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* case ast_reference: return pre_compile_symbol(v->loc, v->as()->sym, code, lval_ctx); case ast_assign: - return process_assign(v->as(), code); + return process_assignment(v->as(), code); case ast_set_assign: return process_set_assign(v->as(), code); case ast_binary_operator: diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp index 98996c28..05d543b3 100644 --- a/tolk/pipe-constant-folding.cpp +++ b/tolk/pipe-constant-folding.cpp @@ -25,6 +25,8 @@ * * Currently, it just replaces `-1` (ast_unary_operator ast_int_const) with a number -1 * and `!true` with false. + * Also, all parenthesized `((expr))` are replaced with `expr`, it's a constant transformation. + * (not to handle parenthesized in optimization passes, like `((x)) == true`) * More rich constant folding should be done some day, but even without this, IR optimizations * (operating low-level stack variables) pretty manage to do all related optimizations. * Constant folding in the future, done at AST level, just would slightly reduce amount of work for optimizer. @@ -47,6 +49,14 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { return v_bool; } + AnyExprV replace(V v) override { + AnyExprV inner = parent::replace(v->get_expr()); + if (v->is_lvalue) { + inner->mutate()->assign_lvalue_true(); + } + return inner; + } + AnyExprV replace(V v) override { parent::replace(v); diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index ba5f77a7..abb060a2 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -133,8 +133,8 @@ static void fire_error_cannot_deduce_untyped_tuple_access(SrcLocation loc, int i // fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_put_non1_stack_width_arg_to_tuple(SrcLocation loc, TypePtr inferred_type) { - throw ParseError(loc, "can not put " + to_string(inferred_type) + " into a tuple, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1"); +static void fire_error_tuple_cannot_have_non1_stack_width_elem(SrcLocation loc, TypePtr inferred_type) { + throw ParseError(loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1"); } // check correctness of called arguments counts and their type matching @@ -351,6 +351,8 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { return infer_bool_const(v->as()); case ast_local_vars_declaration: return infer_local_vars_declaration(v->as()); + case ast_local_var_lhs: + return infer_local_var_lhs(v->as()); case ast_assign: return infer_assignment(v->as()); case ast_set_assign: @@ -410,133 +412,71 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { assign_inferred_type(v, TypeDataBool::create()); } - static void infer_local_vars_declaration(V) { - // it can not appear as a standalone expression - // `var ... = rhs` is handled by ast_assign - tolk_assert(false); + void infer_local_vars_declaration(V v) { + infer_any_expr(v->get_expr()); + assign_inferred_type(v, v->get_expr()); + } + + static void infer_local_var_lhs(V v) { + // `var v = rhs`, inferring is called for `v` + // at the moment of inferring left side of assignment, we don't know type of rhs (since lhs is executed first) + // so, mark `v` as unknown + // later, v's inferred_type will be reassigned; see process_assignment_lhs_after_infer_rhs() + if (v->marked_as_redef) { + assign_inferred_type(v, v->var_ref->declared_type); + } else { + assign_inferred_type(v, v->declared_type ? v->declared_type : TypeDataUnknown::create()); + } } void infer_assignment(V v) { // v is assignment: `x = 5` / `var x = 5` / `var x: slice = 5` / `(cs,_) = f()` / `val (a,[b],_) = (a,t,0)` - // it's a tricky node to handle, because to infer rhs, at first we need to create hint from lhs - // and then to apply/check inferred rhs onto lhs - // about a hint: `var i: int = t.tupleAt(0)` is ok, but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` + // execution flow is: lhs first, rhs second (at IR generation, also lhs is evaluated first, unlike FunC) + // after inferring lhs, use it for hint when inferring rhs + // example: `var i: int = t.tupleAt(0)` is ok (hint=int, T=int), but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(rhs, calc_hint_from_assignment_lhs(lhs)); + infer_any_expr(lhs); + infer_any_expr(rhs, lhs->inferred_type); process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, rhs); - assign_inferred_type(v, lhs); - } - - // having assignment like `var (i: int, s) = rhs` (its lhs is local vars declaration), - // create a contextual infer hint for rhs, `(int, unknown)` in this case - // this hint helps to deduce generics and to resolve unknown types while inferring rhs - static TypePtr calc_hint_from_assignment_lhs(AnyExprV lhs) { - // `var ... = rhs` - dig into left part - if (auto lhs_decl = lhs->try_as()) { - return calc_hint_from_assignment_lhs(lhs_decl->get_expr()); - } - - // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") - if (auto lhs_var = lhs->try_as()) { - if (lhs_var->marked_as_redef) { - return lhs_var->var_ref->declared_type; - } - if (lhs_var->declared_type) { - return lhs_var->declared_type; - } - return TypeDataUnknown::create(); - } - - // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) - if (auto lhs_ref = lhs->try_as()) { - if (const auto* var_ref = lhs_ref->sym->try_as()) { - return var_ref->declared_type; - } - if (const auto* glob_ref = lhs_ref->sym->try_as()) { - return glob_ref->declared_type; - } - return TypeDataUnknown::create(); - } - - // `(v1, v2) = rhs` / `var (v1, v2) = rhs` - if (auto lhs_tensor = lhs->try_as()) { - std::vector sub_hints; - sub_hints.reserve(lhs_tensor->size()); - for (AnyExprV item : lhs_tensor->get_items()) { - sub_hints.push_back(calc_hint_from_assignment_lhs(item)); - } - return TypeDataTensor::create(std::move(sub_hints)); - } - - // `[v1, v2] = rhs` / `var [v1, v2] = rhs` - if (auto lhs_tuple = lhs->try_as()) { - std::vector sub_hints; - sub_hints.reserve(lhs_tuple->size()); - for (AnyExprV item : lhs_tuple->get_items()) { - sub_hints.push_back(calc_hint_from_assignment_lhs(item)); - } - return TypeDataTypedTuple::create(std::move(sub_hints)); - } - - // `a.0 = rhs` / `b.1.0 = rhs` (remember, its target is not assigned yet) - if (auto lhs_dot = lhs->try_as()) { - TypePtr obj_hint = calc_hint_from_assignment_lhs(lhs_dot->get_obj()); - std::string_view field_name = lhs_dot->get_field_name(); - if (field_name[0] >= '0' && field_name[0] <= '9') { - int index_at = std::stoi(std::string(field_name)); - if (const auto* t_tensor = obj_hint->try_as(); t_tensor && index_at < t_tensor->size()) { - return t_tensor->items[index_at]; - } - if (const auto* t_tuple = obj_hint->try_as(); t_tuple && index_at < t_tuple->size()) { - return t_tuple->items[index_at]; - } - } - return TypeDataUnknown::create(); - } - - return TypeDataUnknown::create(); + assign_inferred_type(v, rhs); // note, that the resulting type is rhs, not lhs } // handle (and dig recursively) into `var lhs = rhs` + // at this point, both lhs and rhs are already inferred, but lhs newly-declared vars are unknown (unless have declared_type) // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` + // the purpose is to update inferred_type of lhs vars (z, x, y) // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 - void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + static void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + tolk_assert(lhs->inferred_type != nullptr); AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; // `var ... = rhs` - dig into left part if (auto lhs_decl = lhs->try_as()) { process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); - assign_inferred_type(lhs, lhs_decl->get_expr()->inferred_type); return; } // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") if (auto lhs_var = lhs->try_as()) { - TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr) - if (lhs_var->marked_as_redef) { - tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type); - declared_type = lhs_var->var_ref->declared_type; - } - if (declared_type) { + if (lhs_var->inferred_type != TypeDataUnknown::create()) { // it's `var v: int` or redef + TypePtr declared_type = lhs_var->inferred_type; if (!declared_type->can_rhs_be_assigned(rhs_type)) { err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); } - assign_inferred_type(lhs, declared_type); } else { if (rhs_type == TypeDataNullLiteral::create()) { fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); } - assign_inferred_type(lhs, rhs_type); - assign_inferred_type(lhs_var->var_ref, lhs_var->inferred_type); + assign_inferred_type(lhs_var, rhs_type); + assign_inferred_type(lhs_var->var_ref, rhs_type); } return; } // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) if (lhs->try_as()) { - infer_any_expr(lhs); if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); } @@ -554,13 +494,9 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); } V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; - std::vector types_list; - types_list.reserve(lhs_tensor->size()); for (int i = 0; i < lhs_tensor->size(); ++i) { process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); - types_list.push_back(lhs_tensor->get_item(i)->inferred_type); } - assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); return; } @@ -575,25 +511,23 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); } V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; - std::vector types_list; - types_list.reserve(lhs_tuple->size()); for (int i = 0; i < lhs_tuple->size(); ++i) { process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); - types_list.push_back(lhs_tuple->get_item(i)->inferred_type); } - assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); return; } - // `_ = rhs` - if (lhs->type == ast_underscore) { - assign_inferred_type(lhs, TypeDataUnknown::create()); - return; + // check `untypedTuple.0 = rhs_tensor` and other non-1 width elements + if (auto lhs_dot = lhs->try_as()) { + if (lhs_dot->is_target_indexed_access() && lhs_dot->get_obj()->inferred_type == TypeDataTuple::create()) { + if (rhs_type->calc_width_on_stack() != 1) { + fire_error_tuple_cannot_have_non1_stack_width_elem(err_loc->loc, rhs_type); + } + } } - // here is something unhandled like `a.0 = rhs`, run regular inferring on rhs + // here is something unhandled like `a.0 = rhs`, just check type matching // for something strange like `f() = rhs` type inferring will pass, but will fail later - infer_any_expr(lhs, rhs_type); if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); } @@ -895,14 +829,20 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { return; } if (obj_type->try_as()) { - if (hint == nullptr) { - fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at); - } - if (hint->calc_width_on_stack() != 1) { - fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->loc, hint); + TypePtr item_type = nullptr; + if (v->is_lvalue && !hint) { // left side of assignment + item_type = TypeDataUnknown::create(); + } else { + if (hint == nullptr) { + fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at); + } + if (hint->calc_width_on_stack() != 1) { + fire_error_tuple_cannot_have_non1_stack_width_elem(v->loc, hint); + } + item_type = hint; } v->mutate()->assign_target(index_at); - assign_inferred_type(v, hint); + assign_inferred_type(v, item_type); return; } v_ident->error("type " + to_string(obj_type) + " is not indexable"); @@ -1081,7 +1021,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { AnyExprV item = v->get_item(i); infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); if (item->inferred_type->calc_width_on_stack() != 1) { - fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->get_item(i)->loc, item->inferred_type); + fire_error_tuple_cannot_have_non1_stack_width_elem(v->get_item(i)->loc, item->inferred_type); } types_list.emplace_back(item->inferred_type); } diff --git a/tolk/pipe-optimize-boolean-expr.cpp b/tolk/pipe-optimize-boolean-expr.cpp index 03750256..a2e67047 100644 --- a/tolk/pipe-optimize-boolean-expr.cpp +++ b/tolk/pipe-optimize-boolean-expr.cpp @@ -25,7 +25,6 @@ * * Example: `boolVar == true` -> `boolVar`. * Example: `!!boolVar` -> `boolVar`. - * Also in unwraps parenthesis inside if condition and similar: `assert(((x)), 404)` -> `assert(x, 404)` * * todo some day, replace && || with & | when it's safe (currently, && always produces IFs in Fift) * It's tricky to implement whether replacing is safe. @@ -35,13 +34,6 @@ namespace tolk { -static AnyExprV unwrap_parenthesis(AnyExprV v) { - while (v->type == ast_parenthesized_expression) { - v = v->as()->get_expr(); - } - return v; -} - struct OptimizerBooleanExpressionsReplacer final : ASTReplacerInFunctionBody { static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { auto v_int = createV(loc, std::move(intval), {}); @@ -117,9 +109,6 @@ protected: AnyV replace(V v) override { parent::replace(v); - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, v->is_ifnot, unwrap_parenthesis(v->get_cond()), v->get_if_body(), v->get_else_body()); - } // `if (!x)` -> ifnot(x) while (auto v_cond_unary = v->get_cond()->try_as()) { @@ -132,33 +121,6 @@ protected: return v; } - AnyV replace(V v) override { - parent::replace(v); - - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_body()); - } - return v; - } - - AnyV replace(V v) override { - parent::replace(v); - - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, v->get_body(), unwrap_parenthesis(v->get_cond())); - } - return v; - } - - AnyV replace(V v) override { - parent::replace(v); - - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_thrown_code()); - } - return v; - } - public: bool should_visit_function(const FunctionData* fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function();