diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index 5cfa2f3d..5e0ce6b9 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -2,85 +2,112 @@ fun unsafe_tuple(x: X): tuple asm "NOP"; fun inc(x: int, y: int): (int, int) { - return (x + y, y * 10); + return (x + y, y * 10); } fun `~inc`(mutate self: int, y: int): int { - val (newX, newY) = inc(self, y); - self = newX; - return newY; + val (newX, newY) = inc(self, y); + self = newX; + return newY; } +fun eq(v: X): X { return v; } +fun eq2(v: (int, int)) { return v; } +fun mul2(mutate dest: int, v: int): int { dest = v*2; return dest; } +fun multens(mutate self: (int, int), v: (int, int)): (int, int) { var (f, s) = self; var (m1, m2) = v; self = (f*m1, s*m2); return self; } + @method_id(11) fun test_return(x: int): (int, int, int, int, int, int, int) { - return (x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); + return (x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } @method_id(12) fun test_assign(x: int): (int, int, int, int, int, int, int) { - var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x.`~inc`(x / 20), x, x=x*2, x, x+=1, x); - return (x1, x2, x3, x4, x5, x6, x7); + var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x.`~inc`(x / 20), x, x=x*2, x, x+=1, x); + return (x1, x2, x3, x4, x5, x6, x7); } @method_id(13) fun test_tuple(x: int): tuple { - var t: tuple = unsafe_tuple([x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]); - return t; + var t: tuple = unsafe_tuple([x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]); + return t; } @method_id(14) fun test_tuple_assign(x: int): (int, int, int, int, int, int, int) { - var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]; - return (x1, x2, x3, x4, x5, x6, x7); + var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]; + return (x1, x2, x3, x4, x5, x6, x7); } fun foo1(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) { - return (x1, x2, x3, x4, x5, x6, x7); + return (x1, x2, x3, x4, x5, x6, x7); } @method_id(15) fun test_call_1(x: int): (int, int, int, int, int, int, int) { - return foo1(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); + return foo1(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } fun foo2(x1: int, x2: int, x3456: (int, int, int, int), x7: int): (int, int, int, int, int, int, int) { - var (x3: int, x4: int, x5: int, x6: int) = x3456; - return (x1, x2, x3, x4, x5, x6, x7); + var (x3: int, x4: int, x5: int, x6: int) = x3456; + return (x1, x2, x3, x4, x5, x6, x7); } @method_id(16) fun test_call_2(x: int): (int, int, int, int, int, int, int) { - return foo2(x, x.`~inc`(x / 20), (x, x = x * 2, x, x += 1), x); + return foo2(x, x.`~inc`(x / 20), (x, x = x * 2, x, x += 1), x); } fun asm_func(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) -asm - (x4 x5 x6 x7 x1 x2 x3->0 1 2 3 4 5 6) "NOP"; + asm (x4 x5 x6 x7 x1 x2 x3->0 1 2 3 4 5 6) "NOP"; @method_id(17) fun test_call_asm_old(x: int): (int, int, int, int, int, int, int) { - return asm_func(x, x += 1, x, x, x.`~inc`(x / 20), x, x = x * 2); + return asm_func(x, x += 1, x, x, x.`~inc`(x / 20), x, x = x * 2); } @method_id(18) fun test_call_asm_new(x: int): (int, int, int, int, int, int, int) { - return asm_func(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); + return asm_func(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } global xx: int; @method_id(19) -fun test_global(x: int): (int, int, int, int, int, int, int) { - xx = x; - return (xx, xx.`~inc`(xx / 20), xx, xx = xx * 2, xx, xx += 1, xx); +fun test_global(x: int) { + xx = x; + return (x, xx, xx.`~inc`(xx / 20), eq(xx += (x *= 0)), xx = xx * 2, xx, xx += 1, xx, x); } @method_id(20) fun test_if_else(x: int): (int, int, int, int, int) { - if (x > 10) { - return (x.`~inc`(8), x + 1, x = 1, x <<= 3, x); - } else { - xx = 9; - return (x, x.`~inc`(-4), x.`~inc`(-1), x >= 1, x = x + xx); - } + if (x > 10) { + return (x.`~inc`(8), x + 1, x = 1, x <<= 3, x); + } else { + xx = 9; + return (x, x.`~inc`(-4), x.`~inc`(-1), x >= 1, x = x + xx); + } +} + +@method_id(21) +fun test_assign_with_inner(x: int) { + return (x, x += 10, [(x, x += 20, eq(x -= 50), x)], eq2((x, x *= eq(x /= 2)))); +} + +@method_id(22) +fun test_assign_with_mutate(x: int) { + return (x, mul2(mutate x, x += 5), x.`~inc`(mul2(mutate x, x)), x); +} + +@method_id(23) +fun test_assign_tensor(x: (int, int)) { + var fs = (0, 0); + return (x, x = (20, 30), fs = x.multens((1, 2)), fs.multens(multens(mutate x, (-1, -1))), x, fs); +} + +global fs: (int, int); +@method_id(24) +fun test_assign_tensor_global(x: (int, int)) { + fs = (0, 0); + return (x, x = (20, 30), fs = x.multens((1, 2)), fs.multens(multens(mutate x, (-1, -1))), x, fs); } fun main() { @@ -96,9 +123,13 @@ fun main() { @testcase | 16 | 100 | 100 50 105 210 210 211 211 @testcase | 17 | 100 | 101 50 106 212 100 101 101 @testcase | 18 | 100 | 210 210 211 211 100 50 105 -@testcase | 19 | 100 | 100 50 105 210 210 211 211 +@testcase | 19 | 100 | 100 100 50 105 210 210 211 211 0 @testcase | 20 | 80 | 80 89 1 8 8 @testcase | 20 | 9 | 9 -40 -10 -1 13 +@testcase | 21 | 100 | 100 110 [ 110 130 80 80 ] 80 3200 +@testcase | 22 | 100 | 100 210 4200 630 +@testcase | 23 | 1 1 | 1 1 20 30 20 60 -400 -3600 -20 -60 -400 -3600 +@testcase | 24 | 1 1 | 1 1 20 30 20 60 -400 -3600 -20 -60 -400 -3600 @fif_codegen """ @@ -107,5 +138,5 @@ fun main() { inc CALLDICT // self newY }> """ -@code_hash 97139400653362069936987769894397430077752335662822462908581556703209313861576 +@code_hash 33262590582878205026101577472505372101182291690814957175155528952950621243206 */ diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index e1d28b8b..adb5ad22 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -216,15 +216,15 @@ Note, that since 'compute-asm-ltr' became on be default, chaining methods codege """ test6 PROC:<{ // - NEWC // _1 - 1 PUSHINT // _1 _2=1 - SWAP // _2=1 _1 + NEWC // _0 + 1 PUSHINT // _0 _1=1 + SWAP // _1=1 _0 32 STU // _0 - 2 PUSHINT // _0 _6=2 - SWAP // _6=2 _0 + 2 PUSHINT // _0 _5=2 + SWAP // _5=2 _0 32 STU // _0 - 3 PUSHINT // _0 _10=3 - SWAP // _10=3 _0 + 3 PUSHINT // _0 _9=3 + SWAP // _9=3 _0 32 STU // _0 }> """ diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk index 02379540..dc78abf8 100644 --- a/tolk-tester/tests/codegen_check_demo.tolk +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -35,7 +35,7 @@ Below, I just give examples of @fif_codegen tag: """ main PROC:<{ // s - 17 PUSHINT // s _3=17 + 17 PUSHINT // s _1=17 OVER // s z=17 t WHILE:<{ ... diff --git a/tolk-tester/tests/invalid-call-1.tolk b/tolk-tester/tests/invalid-call-1.tolk index 1c32422e..57a33c4b 100644 --- a/tolk-tester/tests/invalid-call-1.tolk +++ b/tolk-tester/tests/invalid-call-1.tolk @@ -5,5 +5,5 @@ fun main() { /** @compilation_should_fail The message is weird now, but later I'll rework error messages anyway. -@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??3 with int +@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??2 with int */ diff --git a/tolk-tester/tests/invalid-call-5.tolk b/tolk-tester/tests/invalid-call-5.tolk index 89ab026a..32905cd7 100644 --- a/tolk-tester/tests/invalid-call-5.tolk +++ b/tolk-tester/tests/invalid-call-5.tolk @@ -8,6 +8,6 @@ fun main() { /** @compilation_should_fail -@stderr rvalue expected +@stderr `_` can't be used as a value; it's a placeholder for a left side of assignment @stderr inc(_) */ diff --git a/tolk-tester/tests/invalid-const-1.tolk b/tolk-tester/tests/invalid-const-1.tolk new file mode 100644 index 00000000..10e8303a --- /dev/null +++ b/tolk-tester/tests/invalid-const-1.tolk @@ -0,0 +1,8 @@ +fun main() { + return 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999; +} + +/** +@compilation_should_fail +@stderr invalid integer constant + */ diff --git a/tolk-tester/tests/invalid-mutate-1.tolk b/tolk-tester/tests/invalid-mutate-1.tolk index 237940fc..280d1e99 100644 --- a/tolk-tester/tests/invalid-mutate-1.tolk +++ b/tolk-tester/tests/invalid-mutate-1.tolk @@ -7,5 +7,5 @@ fun cantAssignToVal() { /** @compilation_should_fail -@stderr modifying an immutable variable `x` +@stderr modifying immutable variable `x` */ diff --git a/tolk-tester/tests/invalid-mutate-11.tolk b/tolk-tester/tests/invalid-mutate-11.tolk index 9f2c2601..dfc69851 100644 --- a/tolk-tester/tests/invalid-mutate-11.tolk +++ b/tolk-tester/tests/invalid-mutate-11.tolk @@ -4,5 +4,5 @@ fun load32(self: slice): int { /** @compilation_should_fail -@stderr modifying `self` (call a mutating method), which is immutable by default +@stderr modifying `self`, which is immutable by default */ diff --git a/tolk-tester/tests/invalid-mutate-2.tolk b/tolk-tester/tests/invalid-mutate-2.tolk index 7501fdaf..71afe730 100644 --- a/tolk-tester/tests/invalid-mutate-2.tolk +++ b/tolk-tester/tests/invalid-mutate-2.tolk @@ -6,5 +6,5 @@ fun cantAssignToVal() { /** @compilation_should_fail -@stderr modifying an immutable variable `x` +@stderr modifying immutable variable `x` */ diff --git a/tolk-tester/tests/invalid-mutate-3.tolk b/tolk-tester/tests/invalid-mutate-3.tolk index c49973f7..d556c9ed 100644 --- a/tolk-tester/tests/invalid-mutate-3.tolk +++ b/tolk-tester/tests/invalid-mutate-3.tolk @@ -7,5 +7,5 @@ fun cantAssignToConst() { /** @compilation_should_fail -@stderr modifying an immutable variable `op_increase` +@stderr modifying immutable constant */ diff --git a/tolk-tester/tests/invalid-mutate-4.tolk b/tolk-tester/tests/invalid-mutate-4.tolk index f25a707c..5f2c111d 100644 --- a/tolk-tester/tests/invalid-mutate-4.tolk +++ b/tolk-tester/tests/invalid-mutate-4.tolk @@ -10,5 +10,5 @@ fun cantPassToMutatingFunction() { /** @compilation_should_fail -@stderr modifying an immutable variable `myVal` +@stderr modifying immutable variable `myVal` */ diff --git a/tolk-tester/tests/invalid-mutate-5.tolk b/tolk-tester/tests/invalid-mutate-5.tolk index fd8d1192..2b282cf0 100644 --- a/tolk-tester/tests/invalid-mutate-5.tolk +++ b/tolk-tester/tests/invalid-mutate-5.tolk @@ -9,6 +9,6 @@ fun cantCallMutatingMethod(c: cell) { /** @compilation_should_fail -@stderr modifying an immutable variable `s` (call a mutating method) +@stderr modifying immutable variable `s` @stderr s.loadUint */ diff --git a/tolk-tester/tests/invalid-mutate-6.tolk b/tolk-tester/tests/invalid-mutate-6.tolk index bb577ae4..749d9cab 100644 --- a/tolk-tester/tests/invalid-mutate-6.tolk +++ b/tolk-tester/tests/invalid-mutate-6.tolk @@ -11,6 +11,6 @@ fun cantCallMutatingFunctionWithImmutable() { /** @compilation_should_fail -@stderr modifying an immutable variable `op_increase` (call a mutating function) +@stderr modifying immutable constant @stderr inc(mutate op_increase) */ diff --git a/tolk-tester/tests/invalid-mutate-7.tolk b/tolk-tester/tests/invalid-mutate-7.tolk index 5b6b6afe..de3bce45 100644 --- a/tolk-tester/tests/invalid-mutate-7.tolk +++ b/tolk-tester/tests/invalid-mutate-7.tolk @@ -10,6 +10,6 @@ fun cantCallMutatingFunctionWithRvalue() { /** @compilation_should_fail -@stderr lvalue expected (call a mutating function) +@stderr literal can not be used as lvalue @stderr incBoth(mutate x, mutate 30) */ diff --git a/tolk-tester/tests/invalid-mutate-8.tolk b/tolk-tester/tests/invalid-mutate-8.tolk index 0dd7c568..9b14e28f 100644 --- a/tolk-tester/tests/invalid-mutate-8.tolk +++ b/tolk-tester/tests/invalid-mutate-8.tolk @@ -6,5 +6,5 @@ fun cantRedefImmutable() { /** @compilation_should_fail -@stderr modifying an immutable variable `x` (left side of assignment) +@stderr `redef` for immutable variable */ diff --git a/tolk-tester/tests/invalid-mutate-9.tolk b/tolk-tester/tests/invalid-mutate-9.tolk index 7e79052e..3489a288 100644 --- a/tolk-tester/tests/invalid-mutate-9.tolk +++ b/tolk-tester/tests/invalid-mutate-9.tolk @@ -4,6 +4,6 @@ fun increment(self: int) { /** @compilation_should_fail -@stderr modifying `self` (left side of assignment), which is immutable by default +@stderr modifying `self`, which is immutable by default @stderr probably, you want to declare `mutate self` */ diff --git a/tolk-tester/tests/invalid-pure-1.tolk b/tolk-tester/tests/invalid-pure-1.tolk index 5baa3292..4f0e9142 100644 --- a/tolk-tester/tests/invalid-pure-1.tolk +++ b/tolk-tester/tests/invalid-pure-1.tolk @@ -4,7 +4,7 @@ fun f_pure(): int { return f_impure(); } -fun f_impure(): int {} +fun f_impure(): int { return 0; } fun main(): int { return f_pure(); diff --git a/tolk-tester/tests/invalid-pure-3.tolk b/tolk-tester/tests/invalid-pure-3.tolk index f64b81ce..31d4f021 100644 --- a/tolk-tester/tests/invalid-pure-3.tolk +++ b/tolk-tester/tests/invalid-pure-3.tolk @@ -2,6 +2,7 @@ fun validate_input(input: cell): (int, int) { var (x, y, z, correct) = calculateCellSize(input, 10); assert(correct) throw 102; + return (x, y); } @pure diff --git a/tolk-tester/tests/invalid-pure-4.tolk b/tolk-tester/tests/invalid-pure-4.tolk new file mode 100644 index 00000000..868498f6 --- /dev/null +++ b/tolk-tester/tests/invalid-pure-4.tolk @@ -0,0 +1,16 @@ +global set: int; + +@pure +fun someF(): int { + var set redef = 0; + return set; +} + +/** +@compilation_should_fail +@stderr +""" +an impure operation in a pure function +var set +""" +*/ diff --git a/tolk-tester/tests/invalid-redefinition-6.tolk b/tolk-tester/tests/invalid-redefinition-6.tolk new file mode 100644 index 00000000..e6b087c6 --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-6.tolk @@ -0,0 +1,10 @@ +const s1 = "asdf"; + +fun main() { + var s1 redef = "d"; +} + +/** +@compilation_should_fail +@stderr `redef` for unknown variable + */ diff --git a/tolk-tester/tests/invalid-typing-6.tolk b/tolk-tester/tests/invalid-typing-6.tolk new file mode 100644 index 00000000..dcdab5f1 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-6.tolk @@ -0,0 +1,8 @@ +fun failWhenTernaryConditionNotInt(cs: slice) { + return cs ? 1 : 0; +} + +/** +@compilation_should_fail +@stderr condition of ternary ?: operator must be an integer + */ diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index b9184ca9..73a6591b 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -118,12 +118,19 @@ fun updateTwoItems(mutate self: (int, int), byValue: int) { self = (first + byValue, second + byValue); } +global t107_1: int; +global t107_2: int; + @method_id(107) fun testMutableTensor() { var t = (40, 50); t.updateTwoItems(10); updateTwoItems(mutate t, 10); - return t; + t107_1 = 1; + t107_2 = 2; + (t107_1, t107_2).updateTwoItems(10); + updateTwoItems(mutate (t107_1, t107_2), 10); + return (t, t107_1, t107_2); } @pure @@ -278,7 +285,7 @@ fun main(){} @testcase | 104 | | 1 2 3 @testcase | 105 | | 5 5 110 @testcase | 106 | | 160 110 -@testcase | 107 | | 60 70 +@testcase | 107 | | 60 70 21 22 @testcase | 110 | | 320 @testcase | 111 | | 55 55 @testcase | 112 | | [ 1 13 3 23 33 ] @@ -300,7 +307,7 @@ fun main(){} ... incrementTwoInPlace CALLDICT // x y sum1 -ROT - 10 PUSHINT // sum1 x y _9=10 + 10 PUSHINT // sum1 x y _8=10 incrementTwoInPlace CALLDICT // sum1 x y sum2 s1 s3 s0 XCHG3 // x y sum1 sum2 }> @@ -310,8 +317,8 @@ fun main(){} """ load_next PROC:<{ // cs - 32 LDI // _1 cs - SWAP // cs _1 + 32 LDI // _3 cs + SWAP // cs _3 }> """ @@ -319,7 +326,7 @@ fun main(){} """ testStoreUintPureUnusedResult PROC:<{ // - 0 PUSHINT // _12=0 + 0 PUSHINT // _11=0 }> """ @@ -330,7 +337,7 @@ fun main(){} NEWC // b STIX // _2 DROP // - 0 PUSHINT // _12=0 + 0 PUSHINT // _11=0 }> """ diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index cdfe5acf..8fcf2584 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -145,14 +145,14 @@ fun main() { """ test7 PROC:<{ ... - LDOPTREF // b _20 _19 + LDOPTREF // b _18 _17 DROP // b c - ISNULL // b _13 - 10 MULCONST // b _15 - SWAP // _15 b - ISNULL // _15 _16 - 0 EQINT // _15 _17 - ADD // _18 + ISNULL // b _11 + 10 MULCONST // b _13 + SWAP // _13 b + ISNULL // _13 _14 + 0 EQINT // _13 _15 + ADD // _16 }> """ */ diff --git a/tolk-tester/tests/self-keyword.tolk b/tolk-tester/tests/self-keyword.tolk index a339e7d0..ba779454 100644 --- a/tolk-tester/tests/self-keyword.tolk +++ b/tolk-tester/tests/self-keyword.tolk @@ -158,6 +158,44 @@ fun testNotMutatingChainableSelfMutateAnother(initial: int) { return (arg, c108, c109, x); } +fun pickG110(mutate self: int, mutate pushTo: tuple): self { + self += 10; + pushTo.tuplePush(c110); + return self; +} + +global tup110: tuple; +global c110: int; + +@method_id(110) +fun testMutateGlobalsLValue(init: int) { + c110 = init; + tup110 = createEmptyTuple(); + c110.incChained().incChained().pickG110(mutate tup110).incChained().pickG110(mutate tup110).incChained(); + return (c110, tup110); +} + +fun myTuplePush(mutate self: tuple, value: T): self { + self.tuplePush(value); + return self; +} + +fun myTupleAt(self: tuple, idx: int): T { + return self.tupleAt(idx); +} + +global tup111: tuple; + +@method_id(111) +fun testForallFunctionsWithSelf() { + var t = createEmptyTuple(); + tup111 = createEmptyTuple(); + t.myTuplePush(10); + tup111.myTuplePush(1).myTuplePush(2).myTuplePush(3); + return (t.myTupleAt(0), tup111.myTupleAt(tup111.tupleSize() - 1), tup111); +} + + fun main() { } @@ -179,6 +217,8 @@ fun main() { } @testcase | 109 | 200 | 200 3 1 2 @testcase | 109 | 100 | 100 0 0 1 @testcase | 109 | 102 | 102 2 1 2 +@testcase | 110 | 0 | 24 [ 2 13 ] +@testcase | 111 | | 10 3 [ 1 2 3 ] @fif_codegen """ diff --git a/tolk-tester/tests/try-func.tolk b/tolk-tester/tests/try-func.tolk index 7963a850..5ce03ff1 100644 --- a/tolk-tester/tests/try-func.tolk +++ b/tolk-tester/tests/try-func.tolk @@ -1,7 +1,6 @@ fun unsafeGetInt(any: X): int asm "NOP"; -@method_id(11) fun foo(x: int): int { try { if (x == 7) { @@ -14,7 +13,6 @@ fun foo(x: int): int { } @inline -@method_id(12) fun foo_inline(x: int): int { try { assert(!(x == 7)) throw 44; @@ -25,7 +23,6 @@ fun foo_inline(x: int): int { } @inline_ref -@method_id(13) fun foo_inlineref(x: int): int { try { if (x == 7) { throw (44, 2); } @@ -35,26 +32,25 @@ fun foo_inlineref(x: int): int { } } -@method_id(1) +@method_id(101) fun test(x: int, y: int, z: int): int { y = foo(y); return x * 100 + y * 10 + z; } -@method_id(2) +@method_id(102) fun test_inline(x: int, y: int, z: int): int { y = foo_inline(y); return x * 100 + y * 10 + z; } -@method_id(3) +@method_id(103) fun test_inlineref(x: int, y: int, z: int): int { y = foo_inlineref(y); return x * 100 + y * 10 + z; } @inline -@method_id(14) fun foo_inline_big( x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int @@ -69,7 +65,7 @@ fun foo_inline_big( } } -@method_id(4) +@method_id(104) fun test_inline_big(x: int, y: int, z: int): int { y = foo_inline_big( y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, @@ -77,7 +73,6 @@ fun test_inline_big(x: int, y: int, z: int): int { return x * 1000000 + y * 1000 + z; } -@method_id(15) fun foo_big( x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int @@ -92,7 +87,7 @@ fun foo_big( } } -@method_id(5) +@method_id(105) fun test_big(x: int, y: int, z: int): int { y = foo_big( y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, @@ -100,7 +95,7 @@ fun test_big(x: int, y: int, z: int): int { return x * 1000000 + y * 1000 + z; } -@method_id(16) +@method_id(106) fun test_catch_into_same(x: int): int { var code = x; try { @@ -112,7 +107,7 @@ fun test_catch_into_same(x: int): int { } -@method_id(17) +@method_id(107) fun test_catch_into_same_2(x: int): int { var code = x; try { @@ -124,28 +119,77 @@ fun test_catch_into_same_2(x: int): int { return code; } +global after046: int; + +// this bug existed in FunC and is fixed in v0.4.6 +fun bug_046_internal(op: int) { + if (op == 1) { + return; + } else if (op == 2) { + return; + } else { + throw 1; + } +} + +fun bug_046_called() { + after046 = 0; + try { + bug_046_internal(1337); + after046 = 1; // shouldn't be called + } catch(n) { + return; + } + return; +} + +@method_id(108) +fun bug_046_entrypoint() { + bug_046_called(); + return after046; +} + +global g_reg: int; + +@method_id(109) +fun test109(): (int, int) { + var l_reg = 10; + g_reg = 10; + try { + // note, that regardless of assignment, an exception RESTORES them to previous (to 10) + // it's very unexpected, but is considered to be a TVM feature, not a bug + g_reg = 999; + l_reg = 999; + bug_046_internal(999); // throws + } catch { + } + // returns (10,10) because of an exception, see a comment above + return (g_reg, l_reg); +} + fun main() { } /** - method_id | in | out -@testcase | 1 | 1 2 3 | 123 -@testcase | 1 | 3 8 9 | 389 -@testcase | 1 | 3 7 9 | 329 -@testcase | 2 | 1 2 3 | 123 -@testcase | 2 | 3 8 9 | 389 -@testcase | 2 | 3 7 9 | 329 -@testcase | 3 | 1 2 3 | 123 -@testcase | 3 | 3 8 9 | 389 -@testcase | 3 | 3 7 9 | 329 -@testcase | 4 | 4 8 9 | 4350009 -@testcase | 4 | 4 7 9 | 4001009 -@testcase | 5 | 4 8 9 | 4350009 -@testcase | 5 | 4 7 9 | 4001009 -@testcase | 16 | 5 | 5 -@testcase | 16 | 20 | 44 -@testcase | 17 | 5 | 5 -@testcase | 17 | 20 | 20 + method_id | in | out +@testcase | 101 | 1 2 3 | 123 +@testcase | 101 | 3 8 9 | 389 +@testcase | 101 | 3 7 9 | 329 +@testcase | 102 | 1 2 3 | 123 +@testcase | 102 | 3 8 9 | 389 +@testcase | 102 | 3 7 9 | 329 +@testcase | 103 | 1 2 3 | 123 +@testcase | 103 | 3 8 9 | 389 +@testcase | 103 | 3 7 9 | 329 +@testcase | 104 | 4 8 9 | 4350009 +@testcase | 104 | 4 7 9 | 4001009 +@testcase | 105 | 4 8 9 | 4350009 +@testcase | 105 | 4 7 9 | 4001009 +@testcase | 106 | 5 | 5 +@testcase | 106 | 20 | 44 +@testcase | 107 | 5 | 5 +@testcase | 107 | 20 | 20 +@testcase | 108 | | 0 -@code_hash 73240939343624734070640372352271282883450660826541545137654364443860257436623 +@code_hash 39307974281105539319288356721945232226028429128341177951717392648324358675585 */ diff --git a/tolk-tester/tests/unreachable-1.tolk b/tolk-tester/tests/unreachable-1.tolk new file mode 100644 index 00000000..5b3cb1b0 --- /dev/null +++ b/tolk-tester/tests/unreachable-1.tolk @@ -0,0 +1,14 @@ +fun main(x: int) { + if (x) { + x = 10;;;;; + return x;;; + x = 20; + } + return -1; +} + +/** +@testcase | 0 | 1 | 10 +@stderr warning: unreachable code +@stderr x = 20; + */ diff --git a/tolk-tester/tests/unreachable-2.tolk b/tolk-tester/tests/unreachable-2.tolk new file mode 100644 index 00000000..aeadd8c6 --- /dev/null +++ b/tolk-tester/tests/unreachable-2.tolk @@ -0,0 +1,22 @@ +fun main(x: int) { + if (x) { + if (x > 10) { + return 1; // throw 1; + } else if (true) { + return -1; + } else { + return 2; // throw 2; + } + } else { + {{return 1;} + x = 30;} + } + assert(false, 10); +} + +/** +@testcase | 0 | 1 | -1 +@stderr warning: unreachable code +@stderr assert(false, 10) +@stderr x = 30 + */ diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk index 9bee862a..6a84a4fa 100644 --- a/tolk-tester/tests/var-apply.tolk +++ b/tolk-tester/tests/var-apply.tolk @@ -15,8 +15,101 @@ fun testVarApply1() { return (s.loadInt(32), s.loadInt(32)); } +@inline +fun my_throw_always() { + throw 1000; +} + +@inline +fun get_raiser() { + return my_throw_always; +} + +@method_id(102) +fun testVarApplyWithoutSavingResult() { + try { + var raiser = get_raiser(); + raiser(); // `some_var()` is always impure, the compiler has no considerations about its runtime value + return 0; + } catch (code) { + return code; + } +} + +@inline +fun sum(a: int, b: int) { + assert(a + b < 24, 1000); + return a + b; +} + +@inline +fun mul(a: int, b: int) { + assert(a * b < 24, 1001); + return a * b; +} + +fun demo_handler(op: int, query_id: int, a: int, b: int): int { + if (op == 0xF2) { + val func = query_id % 2 == 0 ? sum : mul; + val result = func(a, b); + return 0; // result not used, we test that func is nevertheless called + } + if (op == 0xF4) { + val func = query_id % 2 == 0 ? sum : mul; + val result = func(a, b); + return result; + } + return -1; +} + +@method_id(103) +fun testVarApplyInTernary() { + var t: tuple = createEmptyTuple(); + try { + t.tuplePush(demo_handler(0xF2, 122, 100, 200)); + } catch(code) { + t.tuplePush(code); + } + try { + t.tuplePush(demo_handler(0xF4, 122, 100, 200)); + } catch(code) { + t.tuplePush(code); + } + try { + t.tuplePush(demo_handler(0xF2, 122, 10, 10)); + } catch(code) { + t.tuplePush(code); + } + try { + t.tuplePush(demo_handler(0xF2, 123, 10, 10)); + } catch(code) { + t.tuplePush(code); + } + return t; +} + +fun always_throw2(x: int) { + throw 239 + x; +} + +global global_f: int -> (); + +@method_id(104) +fun testGlobalVarApply() { + try { + global_f = always_throw2; + global_f(1); + return 0; + } catch (code) { + return code; + } +} + fun main() {} /** @testcase | 101 | | 1 2 +@testcase | 102 | | 1000 +@testcase | 103 | | [ 1000 1000 0 1001 ] +@testcase | 104 | | 240 */ diff --git a/tolk-tester/tolk-tester.js b/tolk-tester/tolk-tester.js index 2a3eb776..c7e71021 100644 --- a/tolk-tester/tolk-tester.js +++ b/tolk-tester/tolk-tester.js @@ -347,11 +347,11 @@ class TolkTestFile { if (exit_code === 0 && this.compilation_should_fail) throw new TolkCompilationSucceededError("compilation succeeded, but it should have failed") - if (exit_code !== 0 && this.compilation_should_fail) { - for (let should_include of this.stderr_includes) - should_include.check(stderr) + for (let should_include of this.stderr_includes) // @stderr is used to check errors and warnings + should_include.check(stderr) + + if (exit_code !== 0 && this.compilation_should_fail) return - } if (exit_code !== 0 && !this.compilation_should_fail) throw new TolkCompilationFailedError(`tolk exit_code = ${exit_code}`, stderr) diff --git a/tolk-tester/tolk-tester.py b/tolk-tester/tolk-tester.py index 261ab496..0b3c774c 100644 --- a/tolk-tester/tolk-tester.py +++ b/tolk-tester/tolk-tester.py @@ -327,9 +327,10 @@ class TolkTestFile: if exit_code == 0 and self.compilation_should_fail: raise TolkCompilationSucceededError("compilation succeeded, but it should have failed") + for should_include in self.stderr_includes: # @stderr is used to check errors and warnings + should_include.check(stderr) + if exit_code != 0 and self.compilation_should_fail: - for should_include in self.stderr_includes: - should_include.check(stderr) return if exit_code != 0 and not self.compilation_should_fail: diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index d2decea7..0c3e7c63 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -7,14 +7,22 @@ set(TOLK_SOURCE compiler-state.cpp ast.cpp ast-from-tokens.cpp + constant-evaluator.cpp pipe-discover-parse-sources.cpp pipe-register-symbols.cpp + pipe-resolve-symbols.cpp + pipe-calc-rvalue-lvalue.cpp + pipe-detect-unreachable.cpp + pipe-infer-check-types.cpp + pipe-refine-lvalue-for-mutate.cpp + pipe-check-rvalue-lvalue.cpp + pipe-check-pure-impure.cpp + pipe-constant-folding.cpp pipe-ast-to-legacy.cpp pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp unify-types.cpp abscode.cpp - gen-abscode.cpp analyzer.cpp asmops.cpp builtins.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index c1add683..253e8012 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -25,17 +25,6 @@ namespace tolk { * */ -TmpVar::TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc) - : v_type(_type), idx(_idx), sym_idx(sym_idx), coord(0), where(loc) { - if (!_type) { - v_type = TypeExpr::new_hole(); - } -} - -void TmpVar::set_location(SrcLocation loc) { - where = loc; -} - void TmpVar::dump(std::ostream& os) const { show(os); os << " : " << v_type << " (width "; @@ -55,8 +44,8 @@ void TmpVar::dump(std::ostream& os) const { } void TmpVar::show(std::ostream& os, int omit_idx) const { - if (!is_unnamed()) { - os << G.symbols.get_name(sym_idx); + if (v_sym) { + os << v_sym->name; if (omit_idx >= 2) { return; } @@ -149,10 +138,6 @@ void VarDescr::set_const(std::string value) { val = _Const; } -void VarDescr::set_const_nan() { - set_const(td::make_refint()); -} - void VarDescr::operator|=(const VarDescr& y) { val &= y.val; if (is_int_const() && y.is_int_const() && cmp(int_const, y.int_const) != 0) { @@ -273,7 +258,7 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx case _Call: os << pfx << dis << "CALL: "; show_var_list(os, left, vars); - os << " := " << (fun_ref ? fun_ref->name() : "(null)") << " "; + os << " := " << (f_sym ? f_sym->name : "(null)") << " "; if ((mode & 4) && args.size() == right.size()) { show_var_list(os, args, vars); } else { @@ -332,11 +317,11 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx case _GlobVar: os << pfx << dis << "GLOBVAR "; show_var_list(os, left, vars); - os << " := " << (fun_ref ? fun_ref->name() : "(null)") << std::endl; + os << " := " << (g_sym ? g_sym->name : "(null)") << std::endl; break; case _SetGlob: os << pfx << dis << "SETGLOB "; - os << (fun_ref ? fun_ref->name() : "(null)") << " := "; + os << (g_sym ? g_sym->name : "(null)") << " := "; show_var_list(os, right, vars); os << std::endl; break; @@ -458,22 +443,22 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation location) { - vars.emplace_back(var_cnt, var_type, sym_idx, location); +var_idx_t CodeBlob::create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation location) { + vars.emplace_back(var_cnt, var_type, v_sym, location); return var_cnt++; } -bool CodeBlob::import_params(FormalArgList arg_list) { +bool CodeBlob::import_params(FormalArgList&& arg_list) { if (var_cnt || in_var_cnt) { return false; } std::vector list; for (const auto& par : arg_list) { TypeExpr* arg_type; - SymDef* arg_sym; + const LocalVarData* arg_sym; SrcLocation arg_loc; std::tie(arg_type, arg_sym, arg_loc) = par; - list.push_back(create_var(arg_type, arg_sym ? arg_sym->sym_idx : 0, arg_loc)); + list.push_back(create_var(arg_type, arg_sym, arg_loc)); } emplace_back(loc, Op::_Import, list); in_var_cnt = var_cnt; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 719df9b7..495ae03b 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -46,7 +46,7 @@ int CodeBlob::split_vars(bool strict) { if (k != 1) { var.coord = ~((n << 8) + k); for (int i = 0; i < k; i++) { - auto v = create_var(comp_types[i], vars[j].sym_idx, vars[j].where); + auto v = create_var(comp_types[i], vars[j].v_sym, vars[j].where); tolk_assert(v == n + i); tolk_assert(vars[v].idx == v); vars[v].coord = ((int)j << 8) + i + 1; @@ -732,15 +732,18 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { } case _Call: { prepare_args(values); - auto func = dynamic_cast(fun_ref->value); - if (func) { + if (!f_sym->is_regular_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { res.emplace_back(i); } AsmOpList tmp; - func->compile(tmp, res, args, where); // abstract interpretation of res := f (args) + if (f_sym->is_asm_function()) { + std::get(f_sym->body)->compile(tmp); // abstract interpretation of res := f (args) + } else { + std::get(f_sym->body)->compile(tmp, res, args, where); + } int j = 0; for (var_idx_t i : left) { values.add_newval(i).set_value(res[j++]); @@ -878,27 +881,10 @@ bool Op::set_noreturn(bool flag) { return flag; } -void Op::set_impure(const CodeBlob &code) { - // todo calling this function with `code` is a bad design (flags are assigned after Op is constructed) - // later it's better to check this somewhere in code.emplace_back() - if (code.flags & CodeBlob::_ForbidImpure) { - throw ParseError(where, "an impure operation in a pure function"); - } +void Op::set_impure_flag() { flags |= _Impure; } -void Op::set_impure(const CodeBlob &code, bool flag) { - if (flag) { - if (code.flags & CodeBlob::_ForbidImpure) { - throw ParseError(where, "an impure operation in a pure function"); - } - flags |= _Impure; - } else { - flags &= ~_Impure; - } -} - - bool Op::mark_noreturn() { switch (cl) { case _Nop: diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp index 8db75091..547922da 100644 --- a/tolk/asmops.cpp +++ b/tolk/asmops.cpp @@ -52,10 +52,10 @@ std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg) { } } -AsmOp AsmOp::Const(int arg, std::string push_op, td::RefInt256 origin) { +AsmOp AsmOp::Const(int arg, const std::string& push_op) { std::ostringstream os; os << arg << ' ' << push_op; - return AsmOp::Const(os.str(), origin); + return AsmOp::Const(os.str()); } AsmOp AsmOp::make_stk2(int a, int b, const char* str, int delta) { @@ -161,36 +161,36 @@ AsmOp AsmOp::UnTuple(int a) { return AsmOp::Custom(os.str(), 1, a); } -AsmOp AsmOp::IntConst(td::RefInt256 x) { +AsmOp AsmOp::IntConst(const td::RefInt256& x) { if (x->signed_fits_bits(8)) { - return AsmOp::Const(dec_string(x) + " PUSHINT", x); + return AsmOp::Const(dec_string(x) + " PUSHINT"); } if (!x->is_valid()) { - return AsmOp::Const("PUSHNAN", x); + return AsmOp::Const("PUSHNAN"); } int k = is_pos_pow2(x); if (k >= 0) { - return AsmOp::Const(k, "PUSHPOW2", x); + return AsmOp::Const(k, "PUSHPOW2"); } k = is_pos_pow2(x + 1); if (k >= 0) { - return AsmOp::Const(k, "PUSHPOW2DEC", x); + return AsmOp::Const(k, "PUSHPOW2DEC"); } k = is_pos_pow2(-x); if (k >= 0) { - return AsmOp::Const(k, "PUSHNEGPOW2", x); + return AsmOp::Const(k, "PUSHNEGPOW2"); } if (!x->mod_pow2_short(23)) { - return AsmOp::Const(dec_string(x) + " PUSHINTX", x); + return AsmOp::Const(dec_string(x) + " PUSHINTX"); } - return AsmOp::Const(dec_string(x) + " PUSHINT", x); + return AsmOp::Const(dec_string(x) + " PUSHINT"); } AsmOp AsmOp::BoolConst(bool f) { return AsmOp::Const(f ? "TRUE" : "FALSE"); } -AsmOp AsmOp::Parse(std::string custom_op) { +AsmOp AsmOp::Parse(const std::string& custom_op) { if (custom_op == "NOP") { return AsmOp::Nop(); } else if (custom_op == "SWAP") { diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 1a1d199e..22d64442 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -75,7 +75,7 @@ static void fire_error_mix_and_or_no_parenthesis(SrcLocation loc, std::string_vi // the only way to suppress this error for the programmer is to use parenthesis // (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator}, // that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis) -static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyV lhs, AnyV rhs) { +static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { // handle "flags & 0xFF != 0" (rhs = "0xFF != 0") if (rhs->type == ast_binary_operator && is_comparison_binary_op(rhs->as()->tok)) { fire_error_lower_precedence(loc, operator_name, rhs->as()->operator_name); @@ -90,7 +90,7 @@ static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operat // similar to above, but detect potentially invalid usage of && and || // since anyway, using parenthesis when both && and || occur in the same expression, // && and || have equal operator precedence in Tolk -static void diagnose_and_or_precedence(SrcLocation loc, AnyV lhs, TokenType rhs_tok, std::string_view rhs_operator_name) { +static void diagnose_and_or_precedence(SrcLocation loc, AnyExprV lhs, TokenType rhs_tok, std::string_view rhs_operator_name) { if (auto lhs_op = lhs->try_as()) { // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") if (is_bitwise_binary_op(lhs_op->tok) && is_bitwise_binary_op(rhs_tok) && lhs_op->tok != rhs_tok) { @@ -105,7 +105,7 @@ static void diagnose_and_or_precedence(SrcLocation loc, AnyV lhs, TokenType rhs_ } // diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) -static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyV rhs) { +static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyExprV rhs) { if (rhs->type == ast_binary_operator && is_add_or_sub_binary_op(rhs->as()->tok)) { fire_error_lower_precedence(loc, bitshift_operator_name, rhs->as()->operator_name); } @@ -122,7 +122,7 @@ static void fire_error_FunC_style_var_declaration(Lexer& lex) { } // replace (a == null) and similar to isNull(a) (call of a built-in function) -static AnyV maybe_replace_eq_null_with_isNull_call(V v) { +static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) { bool has_null = v->get_lhs()->type == ast_null_keyword || v->get_rhs()->type == ast_null_keyword; bool replace = has_null && (v->tok == tok_eq || v->tok == tok_neq); if (!replace) { @@ -130,9 +130,9 @@ static AnyV maybe_replace_eq_null_with_isNull_call(V v) { } auto v_ident = createV(v->loc, "__isNull"); // built-in function - AnyV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); - AnyV v_arg = createV(v->loc, v_null, false); - AnyV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_arg})); + AnyExprV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); + AnyExprV v_arg = createV(v->loc, v_null, false); + AnyExprV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_arg})); if (v->tok == tok_neq) { v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); } @@ -230,7 +230,7 @@ static TypeExpr* parse_type(Lexer& lex, V genericsT_list) { return res; } -AnyV parse_expr(Lexer& lex); +AnyExprV parse_expr(Lexer& lex); static AnyV parse_parameter(Lexer& lex, V genericsT_list, bool is_first) { SrcLocation loc = lex.cur_location(); @@ -256,7 +256,6 @@ static AnyV parse_parameter(Lexer& lex, V genericsT_list, bo } else if (lex.tok() != tok_underscore) { lex.unexpected("parameter name"); } - auto v_ident = createV(lex.cur_location(), param_name); lex.next(); // parameter type after colon, also mandatory (even explicit ":auto") @@ -269,7 +268,7 @@ static AnyV parse_parameter(Lexer& lex, V genericsT_list, bo throw ParseError(loc, "`self` parameter must be strictly typed"); } - return createV(loc, v_ident, param_type, declared_as_mutate); + return createV(loc, param_name, param_type, declared_as_mutate); } static AnyV parse_global_var_declaration(Lexer& lex, const std::vector>& annotations) { @@ -316,7 +315,7 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector parse_parameter_list(Lexer& lex, V(loc, expr, passed_as_mutate); } static V parse_argument_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); - std::vector args; + std::vector args; lex.expect(tok_oppar, "`(`"); if (lex.tok() != tok_clpar) { args.push_back(parse_argument(lex)); @@ -371,7 +370,7 @@ static V parse_argument_list(Lexer& lex) { } // parse (expr) / [expr] / identifier / number -static AnyV parse_expr100(Lexer& lex) { +static AnyExprV parse_expr100(Lexer& lex) { SrcLocation loc = lex.cur_location(); switch (lex.tok()) { case tok_oppar: { @@ -380,12 +379,12 @@ static AnyV parse_expr100(Lexer& lex) { lex.next(); return createV(loc, {}); } - AnyV first = parse_expr(lex); + AnyExprV first = parse_expr(lex); if (lex.tok() == tok_clpar) { lex.next(); - return createV(loc, first); + return createV(loc, first); } - std::vector items(1, first); + std::vector items(1, first); while (lex.tok() == tok_comma) { lex.next(); items.emplace_back(parse_expr(lex)); @@ -399,7 +398,7 @@ static AnyV parse_expr100(Lexer& lex) { lex.next(); return createV(loc, {}); } - std::vector items(1, parse_expr(lex)); + std::vector items(1, parse_expr(lex)); while (lex.tok() == tok_comma) { lex.next(); items.emplace_back(parse_expr(lex)); @@ -408,9 +407,13 @@ static AnyV parse_expr100(Lexer& lex) { return createV(loc, std::move(items)); } case tok_int_const: { - std::string_view int_val = lex.cur_str(); + std::string_view orig_str = lex.cur_str(); + td::RefInt256 intval = td::string_to_int256(static_cast(orig_str)); + if (intval.is_null() || !intval->signed_fits_bits(257)) { + lex.error("invalid integer constant"); + } lex.next(); - return createV(loc, int_val); + return createV(loc, std::move(intval), orig_str); } case tok_string_const: { std::string_view str_val = lex.cur_str(); @@ -459,8 +462,8 @@ static AnyV parse_expr100(Lexer& lex) { } // parse E(args) -static AnyV parse_expr90(Lexer& lex) { - AnyV res = parse_expr100(lex); +static AnyExprV parse_expr90(Lexer& lex) { + AnyExprV res = parse_expr100(lex); if (lex.tok() == tok_oppar) { return createV(res->loc, res, parse_argument_list(lex)); } @@ -468,8 +471,8 @@ static AnyV parse_expr90(Lexer& lex) { } // parse E.method(...) (left-to-right) -static AnyV parse_expr80(Lexer& lex) { - AnyV lhs = parse_expr90(lex); +static AnyExprV parse_expr80(Lexer& lex) { + AnyExprV lhs = parse_expr90(lex); while (lex.tok() == tok_dot) { SrcLocation loc = lex.cur_location(); lex.next(); @@ -482,27 +485,27 @@ static AnyV parse_expr80(Lexer& lex) { } // parse ! ~ - + E (unary) -static AnyV parse_expr75(Lexer& lex) { +static AnyExprV parse_expr75(Lexer& lex) { TokenType t = lex.tok(); if (t == tok_logical_not || t == tok_bitwise_not || t == tok_minus || t == tok_plus) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr75(lex); + AnyExprV rhs = parse_expr75(lex); return createV(loc, operator_name, t, rhs); } return parse_expr80(lex); } // parse E * / % ^/ ~/ E (left-to-right) -static AnyV parse_expr30(Lexer& lex) { - AnyV lhs = parse_expr75(lex); +static AnyExprV parse_expr30(Lexer& lex) { + AnyExprV lhs = parse_expr75(lex); TokenType t = lex.tok(); while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr75(lex); + AnyExprV rhs = parse_expr75(lex); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); } @@ -510,14 +513,14 @@ static AnyV parse_expr30(Lexer& lex) { } // parse E + - E (left-to-right) -static AnyV parse_expr20(Lexer& lex) { - AnyV lhs = parse_expr30(lex); +static AnyExprV parse_expr20(Lexer& lex) { + AnyExprV lhs = parse_expr30(lex); TokenType t = lex.tok(); while (t == tok_minus || t == tok_plus) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr30(lex); + AnyExprV rhs = parse_expr30(lex); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); } @@ -525,14 +528,14 @@ static AnyV parse_expr20(Lexer& lex) { } // parse E << >> ~>> ^>> E (left-to-right) -static AnyV parse_expr17(Lexer& lex) { - AnyV lhs = parse_expr20(lex); +static AnyExprV parse_expr17(Lexer& lex) { + AnyExprV lhs = parse_expr20(lex); TokenType t = lex.tok(); while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr20(lex); + AnyExprV rhs = parse_expr20(lex); diagnose_addition_in_bitshift(loc, operator_name, rhs); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); @@ -541,14 +544,14 @@ static AnyV parse_expr17(Lexer& lex) { } // parse E == < > <= >= != <=> E (left-to-right) -static AnyV parse_expr15(Lexer& lex) { - AnyV lhs = parse_expr17(lex); +static AnyExprV parse_expr15(Lexer& lex) { + AnyExprV lhs = parse_expr17(lex); TokenType t = lex.tok(); if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr17(lex); + AnyExprV rhs = parse_expr17(lex); lhs = createV(loc, operator_name, t, lhs, rhs); if (t == tok_eq || t == tok_neq) { lhs = maybe_replace_eq_null_with_isNull_call(lhs->as()); @@ -558,14 +561,14 @@ static AnyV parse_expr15(Lexer& lex) { } // parse E & | ^ E (left-to-right) -static AnyV parse_expr14(Lexer& lex) { - AnyV lhs = parse_expr15(lex); +static AnyExprV parse_expr14(Lexer& lex) { + AnyExprV lhs = parse_expr15(lex); TokenType t = lex.tok(); while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr15(lex); + AnyExprV rhs = parse_expr15(lex); diagnose_bitwise_precedence(loc, operator_name, lhs, rhs); diagnose_and_or_precedence(loc, lhs, t, operator_name); lhs = createV(loc, operator_name, t, lhs, rhs); @@ -575,14 +578,14 @@ static AnyV parse_expr14(Lexer& lex) { } // parse E && || E (left-to-right) -static AnyV parse_expr13(Lexer& lex) { - AnyV lhs = parse_expr14(lex); +static AnyExprV parse_expr13(Lexer& lex) { + AnyExprV lhs = parse_expr14(lex); TokenType t = lex.tok(); while (t == tok_logical_and || t == tok_logical_or) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr14(lex); + AnyExprV rhs = parse_expr14(lex); diagnose_and_or_precedence(loc, lhs, t, operator_name); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); @@ -591,8 +594,8 @@ static AnyV parse_expr13(Lexer& lex) { } // parse E = += -= E and E ? E : E (right-to-left) -static AnyV parse_expr10(Lexer& lex) { - AnyV lhs = parse_expr13(lex); +static AnyExprV parse_expr10(Lexer& lex) { + AnyExprV lhs = parse_expr13(lex); TokenType t = lex.tok(); if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || @@ -601,36 +604,36 @@ static AnyV parse_expr10(Lexer& lex) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr10(lex); + AnyExprV rhs = parse_expr10(lex); return createV(loc, operator_name, t, lhs, rhs); } if (t == tok_question) { SrcLocation loc = lex.cur_location(); lex.next(); - AnyV when_true = parse_expr10(lex); + AnyExprV when_true = parse_expr10(lex); lex.expect(tok_colon, "`:`"); - AnyV when_false = parse_expr10(lex); + AnyExprV when_false = parse_expr10(lex); return createV(loc, lhs, when_true, when_false); } return lhs; } -AnyV parse_expr(Lexer& lex) { +AnyExprV parse_expr(Lexer& lex) { return parse_expr10(lex); } AnyV parse_statement(Lexer& lex); -static AnyV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { +static AnyExprV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_oppar) { lex.next(); - AnyV first = parse_var_declaration_lhs(lex, is_immutable); + AnyExprV first = parse_var_declaration_lhs(lex, is_immutable); if (lex.tok() == tok_clpar) { lex.next(); - return createV(loc, first); + return createV(loc, first); } - std::vector args(1, first); + std::vector args(1, first); while (lex.tok() == tok_comma) { lex.next(); args.push_back(parse_var_declaration_lhs(lex, is_immutable)); @@ -640,7 +643,7 @@ static AnyV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { } if (lex.tok() == tok_opbracket) { lex.next(); - std::vector args(1, parse_var_declaration_lhs(lex, is_immutable)); + std::vector args(1, parse_var_declaration_lhs(lex, is_immutable)); while (lex.tok() == tok_comma) { lex.next(); args.push_back(parse_var_declaration_lhs(lex, is_immutable)); @@ -679,12 +682,12 @@ static AnyV parse_local_vars_declaration(Lexer& lex) { bool is_immutable = lex.tok() == tok_val; lex.next(); - AnyV lhs = parse_var_declaration_lhs(lex, is_immutable); + AnyExprV lhs = parse_var_declaration_lhs(lex, is_immutable); if (lex.tok() != tok_assign) { lex.error("variables declaration must be followed by assignment: `var xxx = ...`"); } lex.next(); - AnyV assigned_val = parse_expr(lex); + AnyExprV assigned_val = parse_expr(lex); if (lex.tok() == tok_comma) { lex.error("multiple declarations are not allowed, split variables on separate lines"); @@ -708,7 +711,7 @@ static V parse_sequence(Lexer& lex) { static AnyV parse_return_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_return, "`return`"); - AnyV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void) + AnyExprV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void) ? createV(lex.cur_location(), {}) : parse_expr(lex); lex.expect(tok_semicolon, "`;`"); @@ -720,7 +723,7 @@ static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { lex.expect(tok_if, "`if`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); // replace if(!expr) with ifnot(expr) (this should be done later, but for now, let this be right at parsing time) if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { @@ -748,7 +751,7 @@ static AnyV parse_repeat_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_repeat, "`repeat`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); V body = parse_sequence(lex); return createV(loc, cond, body); @@ -758,7 +761,7 @@ static AnyV parse_while_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_while, "`while`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); V body = parse_sequence(lex); return createV(loc, cond, body); @@ -770,13 +773,13 @@ static AnyV parse_do_while_statement(Lexer& lex) { V body = parse_sequence(lex); lex.expect(tok_while, "`while`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); lex.expect(tok_semicolon, "`;`"); return createV(loc, body, cond); } -static AnyV parse_catch_variable(Lexer& lex) { +static AnyExprV parse_catch_variable(Lexer& lex) { SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_identifier) { std::string_view var_name = lex.cur_str(); @@ -794,7 +797,7 @@ static AnyV parse_throw_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_throw, "`throw`"); - AnyV thrown_code, thrown_arg; + AnyExprV thrown_code, thrown_arg; if (lex.tok() == tok_oppar) { // throw (code) or throw (code, arg) lex.next(); thrown_code = parse_expr(lex); @@ -802,12 +805,12 @@ static AnyV parse_throw_statement(Lexer& lex) { lex.next(); thrown_arg = parse_expr(lex); } else { - thrown_arg = createV(loc); + thrown_arg = createV(loc); } lex.expect(tok_clpar, "`)`"); } else { // throw code thrown_code = parse_expr(lex); - thrown_arg = createV(loc); + thrown_arg = createV(loc); } lex.expect(tok_semicolon, "`;`"); @@ -819,8 +822,8 @@ static AnyV parse_assert_statement(Lexer& lex) { lex.expect(tok_assert, "`assert`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); - AnyV thrown_code; + AnyExprV cond = parse_expr(lex); + AnyExprV thrown_code; if (lex.tok() == tok_comma) { // assert(cond, code) lex.next(); thrown_code = parse_expr(lex); @@ -840,7 +843,7 @@ static AnyV parse_try_catch_statement(Lexer& lex) { lex.expect(tok_try, "`try`"); V try_body = parse_sequence(lex); - std::vector catch_args; + std::vector catch_args; lex.expect(tok_catch, "`catch`"); SrcLocation catch_loc = lex.cur_location(); if (lex.tok() == tok_oppar) { @@ -889,13 +892,13 @@ AnyV parse_statement(Lexer& lex) { case tok_semicolon: { SrcLocation loc = lex.cur_location(); lex.next(); - return createV(loc); + return createV(loc); } case tok_break: case tok_continue: lex.error("break/continue from loops are not supported yet"); default: { - AnyV expr = parse_expr(lex); + AnyExprV expr = parse_expr(lex); lex.expect(tok_semicolon, "`;`"); return expr; } @@ -976,7 +979,7 @@ static V parse_annotation(Lexer& lex) { if (lex.tok() == tok_oppar) { SrcLocation loc_args = lex.cur_location(); lex.next(); - std::vector args; + std::vector args; args.push_back(parse_expr(lex)); while (lex.tok() == tok_comma) { lex.next(); @@ -1038,7 +1041,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector v_param_list = parse_parameter_list(lex, genericsT_list)->as(); - bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->get_identifier()->name == "self"; + bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->param_name == "self"; int n_mutate_params = v_param_list->get_mutate_params_count(); TypeExpr* ret_type = nullptr; @@ -1069,7 +1072,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vectorget_params()) { if (v_param->as()->declared_as_mutate) { - ret_tensor_items.emplace_back(v_param->as()->param_type); + ret_tensor_items.emplace_back(v_param->as()->declared_type); } } ret_tensor_items.emplace_back(ret_type ? ret_type : TypeExpr::new_hole()); @@ -1079,7 +1082,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector(lex.cur_location()); + v_body = createV(lex.cur_location()); lex.next(); lex.expect(tok_semicolon, "`;`"); } else if (lex.tok() == tok_opbrace) { @@ -1098,7 +1101,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vectoris_entrypoint = is_entrypoint; f_declaration->genericsT_list = genericsT_list; f_declaration->marked_as_get_method = is_get_method; - f_declaration->marked_as_builtin = v_body->type == ast_empty; + f_declaration->marked_as_builtin = v_body->type == ast_empty_statement; f_declaration->accepts_self = accepts_self; f_declaration->returns_self = returns_self; @@ -1142,7 +1145,7 @@ static AnyV parse_tolk_required_version(Lexer& lex) { loc.show_warning("the contract is written in Tolk v" + semver + ", but you use Tolk compiler v" + TOLK_VERSION + "; probably, it will lead to compilation errors or hash changes"); } - return createV(loc, tok_eq, semver); // semicolon is not necessary + return createV(loc, semver); // semicolon is not necessary } static AnyV parse_import_statement(Lexer& lex) { diff --git a/tolk/ast-from-tokens.h b/tolk/ast-from-tokens.h index 5f380c56..39574f9c 100644 --- a/tolk/ast-from-tokens.h +++ b/tolk/ast-from-tokens.h @@ -16,12 +16,10 @@ */ #pragma once -#include "src-file.h" +#include "fwd-declarations.h" namespace tolk { -struct ASTNodeBase; - -const ASTNodeBase* parse_src_file_to_ast(const SrcFile* file); +AnyV parse_src_file_to_ast(const SrcFile* file); } // namespace tolk diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h index 478994e8..45f4c638 100644 --- a/tolk/ast-replacer.h +++ b/tolk/ast-replacer.h @@ -35,25 +35,39 @@ namespace tolk { class ASTReplacer { protected: - GNU_ATTRIBUTE_ALWAYS_INLINE static AnyV replace_children(const ASTNodeLeaf* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE static AnyExprV replace_children(const ASTExprLeaf* v) { return v; } - GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeUnary* v) { - auto* v_mutable = const_cast(v); + GNU_ATTRIBUTE_ALWAYS_INLINE AnyExprV replace_children(const ASTExprUnary* v) { + auto* v_mutable = const_cast(v); v_mutable->child = replace(v_mutable->child); return v_mutable; } - GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeBinary* v) { - auto* v_mutable = const_cast(v); + GNU_ATTRIBUTE_ALWAYS_INLINE AnyExprV replace_children(const ASTExprBinary* v) { + auto* v_mutable = const_cast(v); v_mutable->lhs = replace(v->lhs); v_mutable->rhs = replace(v->rhs); return v_mutable; } - GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeVararg* v) { - auto* v_mutable = const_cast(v); + GNU_ATTRIBUTE_ALWAYS_INLINE AnyExprV replace_children(const ASTExprVararg* v) { + auto* v_mutable = const_cast(v); + for (AnyExprV& child : v_mutable->children) { + child = replace(child); + } + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTStatementUnary* v) { + auto* v_mutable = const_cast(v); + v_mutable->child = replace(v_mutable->child); + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTStatementVararg* v) { + auto* v_mutable = const_cast(v); for (AnyV& child : v_mutable->children) { child = replace(child); } @@ -64,44 +78,50 @@ public: virtual ~ASTReplacer() = default; virtual AnyV replace(AnyV v) = 0; + virtual AnyExprV replace(AnyExprV v) = 0; }; class ASTReplacerInFunctionBody : public ASTReplacer { protected: using parent = ASTReplacerInFunctionBody; - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } - AnyV replace(AnyV v) final { + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + + AnyExprV replace(AnyExprV v) final { switch (v->type) { - case ast_empty: return replace(v->as()); - case ast_parenthesized_expr: return replace(v->as()); + case ast_empty_expression: return replace(v->as()); + case ast_parenthesized_expression: return replace(v->as()); case ast_tensor: return replace(v->as()); case ast_tensor_square: return replace(v->as()); case ast_identifier: return replace(v->as()); @@ -110,12 +130,23 @@ protected: case ast_bool_const: return replace(v->as()); case ast_null_keyword: return replace(v->as()); case ast_self_keyword: return replace(v->as()); + case ast_argument: return replace(v->as()); + case ast_argument_list: return replace(v->as()); case ast_function_call: return replace(v->as()); case ast_dot_method_call: return replace(v->as()); case ast_underscore: return replace(v->as()); case ast_unary_operator: return replace(v->as()); case ast_binary_operator: return replace(v->as()); case ast_ternary_operator: return replace(v->as()); + case ast_local_var: return replace(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::replace"); + } + } + + AnyV replace(AnyV v) final { + switch (v->type) { + case ast_empty_statement: return replace(v->as()); case ast_return_statement: return replace(v->as()); case ast_sequence: return replace(v->as()); case ast_repeat_statement: return replace(v->as()); @@ -125,11 +156,13 @@ protected: case ast_assert_statement: return replace(v->as()); case ast_try_catch_statement: return replace(v->as()); case ast_if_statement: return replace(v->as()); - case ast_local_var: return replace(v->as()); case ast_local_vars_declaration: return replace(v->as()); case ast_asm_body: return replace(v->as()); - default: - throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::visit"); + default: { + // be very careful, don't forget to handle all statements (not expressions) above! + AnyExprV as_expr = reinterpret_cast(v); + return replace(as_expr); + } } } @@ -139,22 +172,18 @@ public: } }; -class ASTReplacerAllFunctionsInFile : public ASTReplacerInFunctionBody { -protected: - using parent = ASTReplacerAllFunctionsInFile; - - virtual bool should_enter_function(V v) = 0; - -public: - void start_replacing_in_file(V v_file) { - for (AnyV v : v_file->get_toplevel_declarations()) { - if (auto v_function = v->try_as()) { - if (should_enter_function(v_function)) { - replace(v_function->get_body()); +template +void replace_ast_of_all_functions(const AllSrcFiles& all_files) { + for (const SrcFile* file : all_files) { + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + if (v_func->is_regular_function()) { + BodyReplacerT visitor; + visitor.start_replacing_in_function(v_func); } } } } -}; +} } // namespace tolk diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index 759873b0..cc91371c 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -31,8 +31,9 @@ namespace tolk { class ASTStringifier final : public ASTVisitor { constexpr static std::pair name_pairs[] = { - {ast_empty, "ast_empty"}, - {ast_parenthesized_expr, "ast_parenthesized_expr"}, + {ast_empty_statement, "ast_empty_statement"}, + {ast_empty_expression, "ast_empty_expression"}, + {ast_parenthesized_expression, "ast_parenthesized_expression"}, {ast_tensor, "ast_tensor"}, {ast_tensor_square, "ast_tensor_square"}, {ast_identifier, "ast_identifier"}, @@ -115,7 +116,7 @@ class ASTStringifier final : public ASTVisitor { case ast_identifier: return static_cast(v->as()->name); case ast_int_const: - return static_cast(v->as()->int_val); + return static_cast(v->as()->orig_str); case ast_string_const: if (char modifier = v->as()->modifier) { return "\"" + static_cast(v->as()->str_val) + "\"" + std::string(1, modifier); @@ -146,21 +147,21 @@ class ASTStringifier final : public ASTVisitor { return annotation_kinds[static_cast(v->as()->kind)].second; case ast_parameter: { std::ostringstream os; - os << v->as()->param_type; - return static_cast(v->as()->get_identifier()->name) + ": " + os.str(); + os << v->as()->declared_type; + return static_cast(v->as()->param_name) + ": " + os.str(); } case ast_function_declaration: { std::string param_names; for (int i = 0; i < v->as()->get_num_params(); i++) { if (!param_names.empty()) param_names += ","; - param_names += v->as()->get_param(i)->get_identifier()->name; + param_names += v->as()->get_param(i)->param_name; } return "fun " + static_cast(v->as()->get_identifier()->name) + "(" + param_names + ")"; } case ast_local_var: { std::ostringstream os; - os << v->as()->declared_type; + os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); if (auto v_ident = v->as()->get_identifier()->try_as()) { return static_cast(v_ident->name) + ":" + os.str(); } @@ -202,8 +203,9 @@ public: void visit(AnyV v) override { switch (v->type) { - case ast_empty: return handle_vertex(v->as()); - case ast_parenthesized_expr: return handle_vertex(v->as()); + case ast_empty_statement: return handle_vertex(v->as()); + case ast_empty_expression: return handle_vertex(v->as()); + case ast_parenthesized_expression: return handle_vertex(v->as()); case ast_tensor: return handle_vertex(v->as()); case ast_tensor_square: return handle_vertex(v->as()); case ast_identifier: return handle_vertex(v->as()); diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index d0a7bfaf..a67f6800 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -37,20 +37,40 @@ namespace tolk { class ASTVisitor { protected: - GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTNodeLeaf* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTExprLeaf* v) { static_cast(v); } - GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeUnary* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTExprUnary* v) { visit(v->child); } - GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeBinary* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTExprBinary* v) { visit(v->lhs); visit(v->rhs); } - GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeVararg* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTExprVararg* v) { + for (AnyExprV child : v->children) { + visit(child); + } + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTStatementUnary* v) { + visit(v->child); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTStatementVararg* v) { + for (AnyV child : v->children) { + visit(child); + } + } + + GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTOtherLeaf* v) { + static_cast(v); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTOtherVararg* v) { for (AnyV child : v->children) { visit(child); } @@ -66,8 +86,9 @@ class ASTVisitorFunctionBody : public ASTVisitor { protected: using parent = ASTVisitorFunctionBody; - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -76,8 +97,10 @@ protected: virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -87,6 +110,8 @@ protected: virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -95,8 +120,9 @@ protected: void visit(AnyV v) final { switch (v->type) { - case ast_empty: return visit(v->as()); - case ast_parenthesized_expr: return visit(v->as()); + case ast_empty_statement: return visit(v->as()); + case ast_empty_expression: return visit(v->as()); + case ast_parenthesized_expression: return visit(v->as()); case ast_tensor: return visit(v->as()); case ast_tensor_square: return visit(v->as()); case ast_identifier: return visit(v->as()); @@ -105,6 +131,8 @@ protected: case ast_bool_const: return visit(v->as()); case ast_null_keyword: return visit(v->as()); case ast_self_keyword: return visit(v->as()); + case ast_argument: return visit(v->as()); + case ast_argument_list: return visit(v->as()); case ast_function_call: return visit(v->as()); case ast_dot_method_call: return visit(v->as()); case ast_underscore: return visit(v->as()); @@ -129,27 +157,23 @@ protected: } public: - void start_visiting_function(V v_function) { + virtual void start_visiting_function(V v_function) { visit(v_function->get_body()); } }; -class ASTVisitorAllFunctionsInFile : public ASTVisitorFunctionBody { -protected: - using parent = ASTVisitorAllFunctionsInFile; - - virtual bool should_enter_function(V v) = 0; - -public: - void start_visiting_file(V v_file) { - for (AnyV v : v_file->get_toplevel_declarations()) { +template +void visit_ast_of_all_functions(const AllSrcFiles& all_files) { + for (const SrcFile* file : all_files) { + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { if (auto v_func = v->try_as()) { - if (should_enter_function(v_func)) { - visit(v_func->get_body()); + if (v_func->is_regular_function()) { + BodyVisitorT visitor; + visitor.start_visiting_function(v_func); } } } } -}; +} } // namespace tolk diff --git a/tolk/ast.cpp b/tolk/ast.cpp index b1af5100..4e78b013 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -79,7 +79,7 @@ int Vertex::lookup_idx(std::string_view nameT) const { int Vertex::lookup_idx(std::string_view param_name) const { for (size_t idx = 0; idx < children.size(); ++idx) { - if (children[idx] && children[idx]->as()->get_identifier()->name == param_name) { + if (children[idx] && children[idx]->as()->param_name == param_name) { return static_cast(idx); } } @@ -96,8 +96,64 @@ int Vertex::get_mutate_params_count() const { return n; } -void Vertex::mutate_set_src_file(const SrcFile* file) const { - const_cast(this)->file = file; +// --------------------------------------------------------- +// "assign" methods +// +// From the user's point of view, all AST vertices are constant, fields are public, but can't be modified. +// The only way to modify a field is to call "mutate()" and then use these "assign_*" methods. +// Therefore, there is a guarantee, that all AST mutations are done via these methods, +// easily searched by usages, and there is no another way to modify any other field. + +void ASTNodeExpressionBase::assign_inferred_type(TypeExpr* type) { + this->inferred_type = type; +} + +void ASTNodeExpressionBase::assign_rvalue_true() { + this->is_rvalue = true; +} + +void ASTNodeExpressionBase::assign_lvalue_true() { + this->is_lvalue = true; +} + +void Vertex::assign_sym(const Symbol* sym) { + this->sym = sym; +} + +void Vertex::assign_param_ref(const LocalVarData* self_param) { + this->param_ref = self_param; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_maybe = fun_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_var_ref(const GlobalVarData* var_ref) { + this->var_ref = var_ref; +} + +void Vertex::assign_const_ref(const GlobalConstData* const_ref) { + this->const_ref = const_ref; +} + +void Vertex::assign_param_ref(const LocalVarData* param_ref) { + this->param_ref = param_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_var_ref(const Symbol* var_ref) { + this->var_maybe = var_ref; +} + +void Vertex::assign_src_file(const SrcFile* file) { + this->file = file; } } // namespace tolk diff --git a/tolk/ast.h b/tolk/ast.h index fd2b27cb..ccc4ac58 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -17,10 +17,12 @@ #pragma once #include +#include "fwd-declarations.h" #include "platform-utils.h" #include "src-file.h" #include "type-expr.h" #include "lexer.h" +#include "symtable.h" /* * Here we introduce AST representation of Tolk source code. @@ -32,14 +34,18 @@ * * From the user's point of view, all AST vertices are constant. All API is based on constancy. * Even though fields of vertex structs are public, they can't be modified, since vertices are accepted by const ref. - * Generally, there are two ways of accepting a vertex: + * Generally, there are three ways of accepting a vertex: * * AnyV (= const ASTNodeBase*) * the only you can do with this vertex is to see v->type (ASTNodeType) and to cast via v->as() + * * AnyExprV (= const ASTNodeExpressionBase*) + * in contains expression-specific properties (lvalue/rvalue, inferred type) * * V (= const Vertex*) * a specific type of vertex, you can use its fields and methods * There is one way of creating a vertex: * * createV(...constructor_args) (= new Vertex(...)) * vertices are currently created on a heap, without any custom memory arena, just allocated and never deleted + * The only way to modify a field is to use "mutate()" method (drops constancy, the only point of mutation) + * and then to call "assign_*" method, like "assign_sym", "assign_src_file", etc. * * Having AnyV and knowing its node_type, a call * v->as() @@ -59,8 +65,9 @@ namespace tolk { enum ASTNodeType { - ast_empty, - ast_parenthesized_expr, + ast_empty_statement, + ast_empty_expression, + ast_parenthesized_expression, ast_tensor, ast_tensor_square, ast_identifier, @@ -111,10 +118,6 @@ enum class AnnotationKind { unknown, }; -struct ASTNodeBase; - -using AnyV = const ASTNodeBase*; - template struct Vertex; @@ -157,7 +160,7 @@ struct ASTNodeBase { return type == node_type ? static_cast>(this) : nullptr; } - #ifdef TOLK_DEBUG +#ifdef TOLK_DEBUG std::string to_debug_string() const { return to_debug_string(false); } std::string to_debug_string(bool colored) const; void debug_print() const; @@ -167,46 +170,120 @@ struct ASTNodeBase { void error(const std::string& err_msg) const; }; -struct ASTNodeLeaf : ASTNodeBase { +struct ASTNodeExpressionBase : ASTNodeBase { + TypeExpr* inferred_type = nullptr; // todo make it const + bool is_rvalue: 1 = false; + bool is_lvalue: 1 = false; + + ASTNodeExpressionBase* mutate() const { return const_cast(this); } + void assign_inferred_type(TypeExpr* type); + void assign_rvalue_true(); + void assign_lvalue_true(); + + ASTNodeExpressionBase(ASTNodeType type, SrcLocation loc) : ASTNodeBase(type, loc) {} +}; + +struct ASTNodeStatementBase : ASTNodeBase { + ASTNodeStatementBase(ASTNodeType type, SrcLocation loc) : ASTNodeBase(type, loc) {} +}; + +struct ASTExprLeaf : ASTNodeExpressionBase { friend class ASTVisitor; friend class ASTReplacer; protected: - ASTNodeLeaf(ASTNodeType type, SrcLocation loc) - : ASTNodeBase(type, loc) {} + ASTExprLeaf(ASTNodeType type, SrcLocation loc) + : ASTNodeExpressionBase(type, loc) {} }; -struct ASTNodeUnary : ASTNodeBase { +struct ASTExprUnary : ASTNodeExpressionBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyExprV child; + + ASTExprUnary(ASTNodeType type, SrcLocation loc, AnyExprV child) + : ASTNodeExpressionBase(type, loc), child(child) {} +}; + +struct ASTExprBinary : ASTNodeExpressionBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyExprV lhs; + AnyExprV rhs; + + ASTExprBinary(ASTNodeType type, SrcLocation loc, AnyExprV lhs, AnyExprV rhs) + : ASTNodeExpressionBase(type, loc), lhs(lhs), rhs(rhs) {} +}; + +struct ASTExprVararg : ASTNodeExpressionBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + std::vector children; + + ASTExprVararg(ASTNodeType type, SrcLocation loc, std::vector children) + : ASTNodeExpressionBase(type, loc), children(std::move(children)) {} + +public: + int size() const { return static_cast(children.size()); } + bool empty() const { return children.empty(); } +}; + +struct ASTStatementUnary : ASTNodeStatementBase { friend class ASTVisitor; friend class ASTReplacer; protected: AnyV child; - ASTNodeUnary(ASTNodeType type, SrcLocation loc, AnyV child) - : ASTNodeBase(type, loc), child(child) {} + AnyExprV child_as_expr() const { return reinterpret_cast(child); } + + ASTStatementUnary(ASTNodeType type, SrcLocation loc, AnyV child) + : ASTNodeStatementBase(type, loc), child(child) {} }; -struct ASTNodeBinary : ASTNodeBase { - friend class ASTVisitor; - friend class ASTReplacer; - -protected: - AnyV lhs; - AnyV rhs; - - ASTNodeBinary(ASTNodeType type, SrcLocation loc, AnyV lhs, AnyV rhs) - : ASTNodeBase(type, loc), lhs(lhs), rhs(rhs) {} -}; - -struct ASTNodeVararg : ASTNodeBase { +struct ASTStatementVararg : ASTNodeStatementBase { friend class ASTVisitor; friend class ASTReplacer; protected: std::vector children; - ASTNodeVararg(ASTNodeType type, SrcLocation loc, std::vector children) + AnyV child(int i) const { return children.at(i); } + AnyExprV child_as_expr(int i) const { return reinterpret_cast(children.at(i)); } + + ASTStatementVararg(ASTNodeType type, SrcLocation loc, std::vector children) + : ASTNodeStatementBase(type, loc), children(std::move(children)) {} + +public: + int size() const { return static_cast(children.size()); } + bool empty() const { return children.empty(); } +}; + +struct ASTOtherLeaf : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + ASTOtherLeaf(ASTNodeType type, SrcLocation loc) + : ASTNodeBase(type, loc) {} +}; + +struct ASTOtherVararg : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + std::vector children; + + AnyV child(int i) const { return children.at(i); } + + ASTOtherVararg(ASTNodeType type, SrcLocation loc, std::vector children) : ASTNodeBase(type, loc), children(std::move(children)) {} public: @@ -217,309 +294,383 @@ public: // --------------------------------------------------------- template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTStatementVararg { explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_empty, loc) {} + : ASTStatementVararg(ast_empty_statement, loc, {}) {} }; template<> -struct Vertex final : ASTNodeUnary { - AnyV get_expr() const { return child; } - - Vertex(SrcLocation loc, AnyV expr) - : ASTNodeUnary(ast_parenthesized_expr, loc, expr) {} +struct Vertex final : ASTExprLeaf { + explicit Vertex(SrcLocation loc) + : ASTExprLeaf(ast_empty_expression, loc) {} }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_items() const { return children; } - AnyV get_item(int i) const { return children.at(i); } +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } - Vertex(SrcLocation loc, std::vector items) - : ASTNodeVararg(ast_tensor, loc, std::move(items)) {} + Vertex(SrcLocation loc, AnyExprV expr) + : ASTExprUnary(ast_parenthesized_expression, loc, expr) {} }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_items() const { return children; } - AnyV get_item(int i) const { return children.at(i); } +struct Vertex final : ASTExprVararg { + const std::vector& get_items() const { return children; } + AnyExprV get_item(int i) const { return children.at(i); } - Vertex(SrcLocation loc, std::vector items) - : ASTNodeVararg(ast_tensor_square, loc, std::move(items)) {} + Vertex(SrcLocation loc, std::vector items) + : ASTExprVararg(ast_tensor, loc, std::move(items)) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprVararg { + const std::vector& get_items() const { return children; } + AnyExprV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTExprVararg(ast_tensor_square, loc, std::move(items)) {} +}; + +template<> +struct Vertex final : ASTExprLeaf { + const Symbol* sym = nullptr; // always filled (after resolved); points to local / global / function / constant std::string_view name; + Vertex* mutate() const { return const_cast(this); } + void assign_sym(const Symbol* sym); + Vertex(SrcLocation loc, std::string_view name) - : ASTNodeLeaf(ast_identifier, loc), name(name) {} + : ASTExprLeaf(ast_identifier, loc) + , name(name) {} }; template<> -struct Vertex final : ASTNodeLeaf { - std::string_view int_val; +struct Vertex final : ASTExprLeaf { + td::RefInt256 intval; // parsed value, 255 for "0xFF" + std::string_view orig_str; // original "0xFF"; empty for nodes generated by compiler (e.g. in constant folding) - Vertex(SrcLocation loc, std::string_view int_val) - : ASTNodeLeaf(ast_int_const, loc), int_val(int_val) {} + Vertex(SrcLocation loc, td::RefInt256 intval, std::string_view orig_str) + : ASTExprLeaf(ast_int_const, loc) + , intval(std::move(intval)) + , orig_str(orig_str) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { std::string_view str_val; char modifier; + bool is_bitslice() const { + char m = modifier; + return m == 0 || m == 's' || m == 'a'; + } + bool is_intval() const { + char m = modifier; + return m == 'u' || m == 'h' || m == 'H' || m == 'c'; + } + Vertex(SrcLocation loc, std::string_view str_val, char modifier) - : ASTNodeLeaf(ast_string_const, loc), str_val(str_val), modifier(modifier) {} + : ASTExprLeaf(ast_string_const, loc) + , str_val(str_val), modifier(modifier) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { bool bool_val; Vertex(SrcLocation loc, bool bool_val) - : ASTNodeLeaf(ast_bool_const, loc), bool_val(bool_val) {} + : ASTExprLeaf(ast_bool_const, loc) + , bool_val(bool_val) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_null_keyword, loc) {} + : ASTExprLeaf(ast_null_keyword, loc) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { + const LocalVarData* param_ref = nullptr; // filled after resolve identifiers, points to `self` parameter + + Vertex* mutate() const { return const_cast(this); } + void assign_param_ref(const LocalVarData* self_param); + explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_self_keyword, loc) {} + : ASTExprLeaf(ast_self_keyword, loc) {} }; template<> -struct Vertex final : ASTNodeUnary { +struct Vertex final : ASTExprUnary { bool passed_as_mutate; // when called `f(mutate arg)`, not `f(arg)` - AnyV get_expr() const { return child; } + AnyExprV get_expr() const { return child; } - explicit Vertex(SrcLocation loc, AnyV expr, bool passed_as_mutate) - : ASTNodeUnary(ast_argument, loc, expr), passed_as_mutate(passed_as_mutate) {} + Vertex(SrcLocation loc, AnyExprV expr, bool passed_as_mutate) + : ASTExprUnary(ast_argument, loc, expr) + , passed_as_mutate(passed_as_mutate) {} }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_arguments() const { return children; } +struct Vertex final : ASTExprVararg { + const std::vector& get_arguments() const { return children; } auto get_arg(int i) const { return children.at(i)->as(); } - explicit Vertex(SrcLocation loc, std::vector arguments) - : ASTNodeVararg(ast_argument_list, loc, std::move(arguments)) {} + Vertex(SrcLocation loc, std::vector arguments) + : ASTExprVararg(ast_argument_list, loc, std::move(arguments)) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_called_f() const { return lhs; } +struct Vertex final : ASTExprBinary { + const FunctionData* fun_maybe = nullptr; // filled after resolve; remains nullptr for `localVar()` / `getF()()` + + AnyExprV get_called_f() const { return lhs; } auto get_arg_list() const { return rhs->as(); } int get_num_args() const { return rhs->as()->size(); } auto get_arg(int i) const { return rhs->as()->get_arg(i); } - Vertex(SrcLocation loc, AnyV lhs_f, V arguments) - : ASTNodeBinary(ast_function_call, loc, lhs_f, arguments) {} + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + + Vertex(SrcLocation loc, AnyExprV lhs_f, V arguments) + : ASTExprBinary(ast_function_call, loc, lhs_f, arguments) {} }; template<> -struct Vertex final : ASTNodeBinary { +struct Vertex final : ASTExprBinary { + const FunctionData* fun_ref = nullptr; // points to global function (after resolve) std::string_view method_name; - AnyV get_obj() const { return lhs; } + AnyExprV get_obj() const { return lhs; } auto get_arg_list() const { return rhs->as(); } + int get_num_args() const { return rhs->as()->size(); } + auto get_arg(int i) const { return rhs->as()->get_arg(i); } - Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, V arguments) - : ASTNodeBinary(ast_dot_method_call, loc, lhs, arguments), method_name(method_name) {} + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + + Vertex(SrcLocation loc, std::string_view method_name, AnyExprV lhs, V arguments) + : ASTExprBinary(ast_dot_method_call, loc, lhs, arguments) + , method_name(method_name) {} }; template<> -struct Vertex final : ASTNodeUnary { - TypeExpr* declared_type; // may be nullptr +struct Vertex final : ASTStatementUnary { + const GlobalVarData* var_ref = nullptr; // filled after register + TypeExpr* declared_type; auto get_identifier() const { return child->as(); } + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const GlobalVarData* var_ref); + Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type) - : ASTNodeUnary(ast_global_var_declaration, loc, name_identifier), declared_type(declared_type) {} + : ASTStatementUnary(ast_global_var_declaration, loc, name_identifier) + , declared_type(declared_type) {} }; template<> -struct Vertex final : ASTNodeBinary { +struct Vertex final : ASTStatementVararg { + const GlobalConstData* const_ref = nullptr; // filled after register TypeExpr* declared_type; // may be nullptr - auto get_identifier() const { return lhs->as(); } - AnyV get_init_value() const { return rhs; } + auto get_identifier() const { return child(0)->as(); } + AnyExprV get_init_value() const { return child_as_expr(1); } - Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type, AnyV init_value) - : ASTNodeBinary(ast_constant_declaration, loc, name_identifier, init_value), declared_type(declared_type) {} + Vertex* mutate() const { return const_cast(this); } + void assign_const_ref(const GlobalConstData* const_ref); + + Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type, AnyExprV init_value) + : ASTStatementVararg(ast_constant_declaration, loc, {name_identifier, init_value}) + , declared_type(declared_type) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_underscore, loc) {} + : ASTExprLeaf(ast_underscore, loc) {} }; template<> -struct Vertex final : ASTNodeUnary { +struct Vertex final : ASTExprUnary { std::string_view operator_name; TokenType tok; - AnyV get_rhs() const { return child; } + AnyExprV get_rhs() const { return child; } - Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV rhs) - : ASTNodeUnary(ast_unary_operator, loc, rhs), operator_name(operator_name), tok(tok) {} + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV rhs) + : ASTExprUnary(ast_unary_operator, loc, rhs) + , operator_name(operator_name), tok(tok) {} }; template<> -struct Vertex final : ASTNodeBinary { +struct Vertex final : ASTExprBinary { std::string_view operator_name; TokenType tok; - AnyV get_lhs() const { return lhs; } - AnyV get_rhs() const { return rhs; } + AnyExprV get_lhs() const { return lhs; } + AnyExprV get_rhs() const { return rhs; } - Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV lhs, AnyV rhs) - : ASTNodeBinary(ast_binary_operator, loc, lhs, rhs), operator_name(operator_name), tok(tok) {} + bool is_set_assign() const { + TokenType t = tok; + return t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || + t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || + t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor; + } + + bool is_assign() const { + return tok == tok_assign; + } + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) + : ASTExprBinary(ast_binary_operator, loc, lhs, rhs) + , operator_name(operator_name), tok(tok) {} }; template<> -struct Vertex final : ASTNodeVararg { - AnyV get_cond() const { return children.at(0); } - AnyV get_when_true() const { return children.at(1); } - AnyV get_when_false() const { return children.at(2); } +struct Vertex final : ASTExprVararg { + AnyExprV get_cond() const { return children.at(0); } + AnyExprV get_when_true() const { return children.at(1); } + AnyExprV get_when_false() const { return children.at(2); } - Vertex(SrcLocation loc, AnyV cond, AnyV when_true, AnyV when_false) - : ASTNodeVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} + Vertex(SrcLocation loc, AnyExprV cond, AnyExprV when_true, AnyExprV when_false) + : ASTExprVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} }; template<> -struct Vertex : ASTNodeUnary { - AnyV get_return_value() const { return child; } +struct Vertex : ASTStatementUnary { + AnyExprV get_return_value() const { return child_as_expr(); } - Vertex(SrcLocation loc, AnyV child) - : ASTNodeUnary(ast_return_statement, loc, child) {} + Vertex(SrcLocation loc, AnyExprV child) + : ASTStatementUnary(ast_return_statement, loc, child) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTStatementVararg { SrcLocation loc_end; const std::vector& get_items() const { return children; } AnyV get_item(int i) const { return children.at(i); } Vertex(SrcLocation loc, SrcLocation loc_end, std::vector items) - : ASTNodeVararg(ast_sequence, loc, std::move(items)), loc_end(loc_end) {} + : ASTStatementVararg(ast_sequence, loc, std::move(items)) + , loc_end(loc_end) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_cond() const { return lhs; } - auto get_body() const { return rhs->as(); } +struct Vertex final : ASTStatementVararg { + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_body() const { return child(1)->as(); } - Vertex(SrcLocation loc, AnyV cond, V body) - : ASTNodeBinary(ast_repeat_statement, loc, cond, body) {} + Vertex(SrcLocation loc, AnyExprV cond, V body) + : ASTStatementVararg(ast_repeat_statement, loc, {cond, body}) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_cond() const { return lhs; } - auto get_body() const { return rhs->as(); } +struct Vertex final : ASTStatementVararg { + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_body() const { return child(1)->as(); } - Vertex(SrcLocation loc, AnyV cond, V body) - : ASTNodeBinary(ast_while_statement, loc, cond, body) {} + Vertex(SrcLocation loc, AnyExprV cond, V body) + : ASTStatementVararg(ast_while_statement, loc, {cond, body}) {} }; template<> -struct Vertex final : ASTNodeBinary { - auto get_body() const { return lhs->as(); } - AnyV get_cond() const { return rhs; } +struct Vertex final : ASTStatementVararg { + auto get_body() const { return child(0)->as(); } + AnyExprV get_cond() const { return child_as_expr(1); } - Vertex(SrcLocation loc, V body, AnyV cond) - : ASTNodeBinary(ast_do_while_statement, loc, body, cond) {} + Vertex(SrcLocation loc, V body, AnyExprV cond) + : ASTStatementVararg(ast_do_while_statement, loc, {body, cond}) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_thrown_code() const { return lhs; } - AnyV get_thrown_arg() const { return rhs; } // may be ast_empty - bool has_thrown_arg() const { return rhs->type != ast_empty; } +struct Vertex final : ASTStatementVararg { + AnyExprV get_thrown_code() const { return child_as_expr(0); } + AnyExprV get_thrown_arg() const { return child_as_expr(1); } // may be ast_empty + bool has_thrown_arg() const { return child_as_expr(1)->type != ast_empty_expression; } - Vertex(SrcLocation loc, AnyV thrown_code, AnyV thrown_arg) - : ASTNodeBinary(ast_throw_statement, loc, thrown_code, thrown_arg) {} + Vertex(SrcLocation loc, AnyExprV thrown_code, AnyExprV thrown_arg) + : ASTStatementVararg(ast_throw_statement, loc, {thrown_code, thrown_arg}) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_cond() const { return lhs; } - AnyV get_thrown_code() const { return rhs; } +struct Vertex final : ASTStatementVararg { + AnyExprV get_cond() const { return child_as_expr(0); } + AnyExprV get_thrown_code() const { return child_as_expr(1); } - Vertex(SrcLocation loc, AnyV cond, AnyV thrown_code) - : ASTNodeBinary(ast_assert_statement, loc, cond, thrown_code) {} + Vertex(SrcLocation loc, AnyExprV cond, AnyExprV thrown_code) + : ASTStatementVararg(ast_assert_statement, loc, {cond, thrown_code}) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTStatementVararg { auto get_try_body() const { return children.at(0)->as(); } auto get_catch_expr() const { return children.at(1)->as(); } // (excNo, arg), always len 2 auto get_catch_body() const { return children.at(2)->as(); } Vertex(SrcLocation loc, V try_body, V catch_expr, V catch_body) - : ASTNodeVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} + : ASTStatementVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTStatementVararg { bool is_ifnot; // if(!cond), to generate more optimal fift code - AnyV get_cond() const { return children.at(0); } - auto get_if_body() const { return children.at(1)->as(); } - auto get_else_body() const { return children.at(2)->as(); } // always exists (when else omitted, it's empty) + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_if_body() const { return child(1)->as(); } + auto get_else_body() const { return child(2)->as(); } // always exists (when else omitted, it's empty) - Vertex(SrcLocation loc, bool is_ifnot, AnyV cond, V if_body, V else_body) - : ASTNodeVararg(ast_if_statement, loc, {cond, if_body, else_body}), is_ifnot(is_ifnot) {} + Vertex(SrcLocation loc, bool is_ifnot, AnyExprV cond, V if_body, V else_body) + : ASTStatementVararg(ast_if_statement, loc, {cond, if_body, else_body}) + , is_ifnot(is_ifnot) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTOtherLeaf { TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates std::string_view nameT; Vertex(SrcLocation loc, TypeExpr* created_type, std::string_view nameT) - : ASTNodeLeaf(ast_genericsT_item, loc), created_type(created_type), nameT(nameT) {} + : ASTOtherLeaf(ast_genericsT_item, loc) + , created_type(created_type), nameT(nameT) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTOtherVararg { std::vector get_items() const { return children; } auto get_item(int i) const { return children.at(i)->as(); } Vertex(SrcLocation loc, std::vector genericsT_items) - : ASTNodeVararg(ast_genericsT_list, loc, std::move(genericsT_items)) {} + : ASTOtherVararg(ast_genericsT_list, loc, std::move(genericsT_items)) {} int lookup_idx(std::string_view nameT) const; }; template<> -struct Vertex final : ASTNodeUnary { - TypeExpr* param_type; +struct Vertex final : ASTOtherLeaf { + const LocalVarData* param_ref = nullptr; // filled after resolved + std::string_view param_name; + TypeExpr* declared_type; bool declared_as_mutate; // declared as `mutate param_name` - auto get_identifier() const { return child->as(); } // for underscore, name is empty - bool is_underscore() const { return child->as()->name.empty(); } + bool is_underscore() const { return param_name.empty(); } - Vertex(SrcLocation loc, V name_identifier, TypeExpr* param_type, bool declared_as_mutate) - : ASTNodeUnary(ast_parameter, loc, name_identifier), param_type(param_type), declared_as_mutate(declared_as_mutate) {} + Vertex* mutate() const { return const_cast(this); } + void assign_param_ref(const LocalVarData* param_ref); + + Vertex(SrcLocation loc, std::string_view param_name, TypeExpr* declared_type, bool declared_as_mutate) + : ASTOtherLeaf(ast_parameter, loc) + , param_name(param_name), declared_type(declared_type), declared_as_mutate(declared_as_mutate) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTOtherVararg { const std::vector& get_params() const { return children; } auto get_param(int i) const { return children.at(i)->as(); } Vertex(SrcLocation loc, std::vector params) - : ASTNodeVararg(ast_parameter_list, loc, std::move(params)) {} + : ASTOtherVararg(ast_parameter_list, loc, std::move(params)) {} int lookup_idx(std::string_view param_name) const; int get_mutate_params_count() const; @@ -527,57 +678,64 @@ struct Vertex final : ASTNodeVararg { }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTStatementVararg { std::vector arg_order; std::vector ret_order; const std::vector& get_asm_commands() const { return children; } // ast_string_const[] Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) - : ASTNodeVararg(ast_asm_body, loc, std::move(asm_commands)), arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} + : ASTStatementVararg(ast_asm_body, loc, std::move(asm_commands)) + , arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} }; template<> -struct Vertex final : ASTNodeUnary { +struct Vertex final : ASTOtherVararg { AnnotationKind kind; - auto get_arg() const { return child->as(); } + auto get_arg() const { return child(0)->as(); } static AnnotationKind parse_kind(std::string_view name); Vertex(SrcLocation loc, AnnotationKind kind, V arg_probably_empty) - : ASTNodeUnary(ast_annotation, loc, arg_probably_empty), kind(kind) {} + : ASTOtherVararg(ast_annotation, loc, {arg_probably_empty}) + , kind(kind) {} }; template<> -struct Vertex final : ASTNodeUnary { +struct Vertex final : ASTExprUnary { + const Symbol* var_maybe = nullptr; // typically local var; can be global var if `var g_v redef`; remains nullptr for underscore TypeExpr* declared_type; bool is_immutable; // declared via 'val', not 'var' bool marked_as_redef; // var (existing_var redef, new_var: int) = ... - AnyV get_identifier() const { return child; } // ast_identifier / ast_underscore + AnyExprV get_identifier() const { return child; } // ast_identifier / ast_underscore - Vertex(SrcLocation loc, AnyV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef) - : ASTNodeUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const Symbol* var_ref); + + Vertex(SrcLocation loc, AnyExprV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef) + : ASTExprUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_lhs() const { return lhs; } // ast_local_var / ast_tensor / ast_tensor_square - AnyV get_assigned_val() const { return rhs; } +struct Vertex final : ASTStatementVararg { + AnyExprV get_lhs() const { return child_as_expr(0); } // ast_local_var / ast_tensor / ast_tensor_square + AnyExprV get_assigned_val() const { return child_as_expr(1); } - Vertex(SrcLocation loc, AnyV lhs, AnyV assigned_val) - : ASTNodeBinary(ast_local_vars_declaration, loc, lhs, assigned_val) {} + Vertex(SrcLocation loc, AnyExprV lhs, AnyExprV assigned_val) + : ASTStatementVararg(ast_local_vars_declaration, loc, {lhs, assigned_val}) {} }; template<> -struct Vertex final : ASTNodeVararg { - auto get_identifier() const { return children.at(0)->as(); } - int get_num_params() const { return children.at(1)->as()->size(); } - auto get_param_list() const { return children.at(1)->as(); } - auto get_param(int i) const { return children.at(1)->as()->get_param(i); } - AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body +struct Vertex final : ASTOtherVararg { + auto get_identifier() const { return child(0)->as(); } + int get_num_params() const { return child(1)->as()->size(); } + auto get_param_list() const { return child(1)->as(); } + auto get_param(int i) const { return child(1)->as()->get_param(i); } + AnyV get_body() const { return child(2); } // ast_sequence / ast_asm_body + const FunctionData* fun_ref = nullptr; // filled after register TypeExpr* ret_type = nullptr; V genericsT_list = nullptr; bool is_entrypoint = false; @@ -591,42 +749,49 @@ struct Vertex final : ASTNodeVararg { V method_id = nullptr; bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } + bool is_regular_function() const { return children.at(2)->type == ast_sequence; } + bool is_builtin_function() const { return marked_as_builtin; } + + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body) - : ASTNodeVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {} + : ASTOtherVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {} }; template<> -struct Vertex final : ASTNodeLeaf { - TokenType cmp_tok; +struct Vertex final : ASTOtherLeaf { std::string_view semver; - Vertex(SrcLocation loc, TokenType cmp_tok, std::string_view semver) - : ASTNodeLeaf(ast_tolk_required_version, loc), cmp_tok(cmp_tok), semver(semver) {} + Vertex(SrcLocation loc, std::string_view semver) + : ASTOtherLeaf(ast_tolk_required_version, loc) + , semver(semver) {} }; template<> -struct Vertex final : ASTNodeUnary { - const SrcFile* file = nullptr; // assigned after includes have been resolved +struct Vertex final : ASTOtherVararg { + const SrcFile* file = nullptr; // assigned after imports have been resolved - auto get_file_leaf() const { return child->as(); } + auto get_file_leaf() const { return child(0)->as(); } - std::string get_file_name() const { return static_cast(child->as()->str_val); } + std::string get_file_name() const { return static_cast(child(0)->as()->str_val); } - void mutate_set_src_file(const SrcFile* file) const; + Vertex* mutate() const { return const_cast(this); } + void assign_src_file(const SrcFile* file); Vertex(SrcLocation loc, V file_name) - : ASTNodeUnary(ast_import_statement, loc, file_name) {} + : ASTOtherVararg(ast_import_statement, loc, {file_name}) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTOtherVararg { const SrcFile* const file; const std::vector& get_toplevel_declarations() const { return children; } Vertex(const SrcFile* file, std::vector toplevel_declarations) - : ASTNodeVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)), file(file) {} + : ASTOtherVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)) + , file(file) {} }; } // namespace tolk diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index d18cfa64..01b096da 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -20,82 +20,64 @@ namespace tolk { using namespace std::literals::string_literals; -/* - * - * SYMBOL VALUES - * - */ - -SymDef* define_builtin_func_impl(const std::string& name, SymValAsmFunc* func_val) { - sym_idx_t name_idx = G.symbols.lookup_add(name); - SymDef* def = define_global_symbol(name_idx); - tolk_assert(!def->value); - - def->value = func_val; -#ifdef TOLK_DEBUG - def->value->sym_name = name; -#endif - return def; -} - -// given func_type = `(slice, int) -> slice` and func flags, create SymDef for parameters +// given func_type = `(slice, int) -> slice` and func flags, create SymLocalVarOrParameter // currently (see at the bottom) parameters of built-in functions are unnamed: // built-in functions are created using a resulting type -static std::vector define_builtin_parameters(const TypeExpr* func_type, int func_flags) { +static std::vector define_builtin_parameters(const TypeExpr* func_type, int func_flags) { // `loadInt()`, `storeInt()`: they accept `self` and mutate it; no other options available in built-ins for now - bool is_mutate_self = func_flags & SymValFunc::flagHasMutateParams; + bool is_mutate_self = func_flags & FunctionData::flagHasMutateParams; // func_type a map (params_type -> ret_type), probably surrounded by forall (internal representation of ) TypeExpr* params_type = func_type->constr == TypeExpr::te_ForAll ? func_type->args[0]->args[0] : func_type->args[0]; - std::vector parameters; + std::vector parameters; if (params_type->constr == TypeExpr::te_Tensor) { // multiple parameters: it's a tensor parameters.reserve(params_type->args.size()); for (int i = 0; i < static_cast(params_type->args.size()); ++i) { - SymDef* sym_def = define_parameter(i, {}); - SymValVariable* sym_val = new SymValVariable(i, params_type->args[i]); + LocalVarData p_sym("", {}, i, params_type->args[i]); if (i == 0 && is_mutate_self) { - sym_val->flags |= SymValVariable::flagMutateParameter; + p_sym.flags |= LocalVarData::flagMutateParameter; } - sym_def->value = sym_val; - parameters.emplace_back(sym_def); + parameters.push_back(std::move(p_sym)); } } else { // single parameter - SymDef* sym_def = define_parameter(0, {}); - SymValVariable* sym_val = new SymValVariable(0, params_type); + LocalVarData p_sym("", {}, 0, params_type); if (is_mutate_self) { - sym_val->flags |= SymValVariable::flagMutateParameter; + p_sym.flags |= LocalVarData::flagMutateParameter; } - sym_def->value = sym_val; - parameters.emplace_back(sym_def); + parameters.push_back(std::move(p_sym)); } return parameters; } -static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) { - return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction)); +static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) { + auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func)); + G.symtable.add_function(f_sym); } -static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) { - return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, make_simple_compile(macro), flags | SymValFunc::flagBuiltinFunction)); +static void define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) { + auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(make_simple_compile(macro))); + G.symtable.add_function(f_sym); } -static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags, - std::initializer_list arg_order, std::initializer_list ret_order) { - return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction, arg_order, ret_order)); +static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags, + std::initializer_list arg_order, std::initializer_list ret_order) { + auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func)); + f_sym->arg_order = arg_order; + f_sym->ret_order = ret_order; + G.symtable.add_function(f_sym); } -bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, - SrcLocation where) const { - if (simple_compile) { - return dest.append(simple_compile(out, in, where)); - } else if (ext_compile) { - return ext_compile(dest, out, in); - } else { - return false; - } +void FunctionBodyBuiltin::compile(AsmOpList& dest, std::vector& out, std::vector& in, + SrcLocation where) const { + dest.append(simple_compile(out, in, where)); } +void FunctionBodyAsm::compile(AsmOpList& dest) const { + dest.append(ops); +} + + /* * * DEFINE BUILT-IN FUNCTIONS @@ -1119,91 +1101,71 @@ void define_builtins() { TypeExpr* throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); define_builtin_func("_+_", arith_bin_op, compile_add, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_-_", arith_bin_op, compile_sub, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("-_", arith_un_op, compile_unary_minus, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("+_", arith_un_op, compile_unary_plus, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_*_", arith_bin_op, compile_mul, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_<<_", arith_bin_op, compile_lshift, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("!_", arith_un_op, compile_logical_not, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("~_", arith_un_op, compile_bitwise_not, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_&_", arith_bin_op, compile_bitwise_and, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_|_", arith_bin_op, compile_bitwise_or, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_+=_", arith_bin_op, compile_add, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_-=_", arith_bin_op, compile_sub, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_*=_", arith_bin_op, compile_mul, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_<<=_", arith_bin_op, compile_lshift, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL"), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__throw", impure_un_op, compile_throw, 0); define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, @@ -1211,23 +1173,28 @@ void define_builtins() { define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), compile_throw_if_unless, 0); define_builtin_func("loadInt", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, + {}, {1, 0}); define_builtin_func("loadUint", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, + {}, {1, 0}); define_builtin_func("loadBits", fetch_slice_op_mutate, std::bind(compile_fetch_slice, _1, _2, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, + {}, {1, 0}); define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); define_builtin_func("storeInt", store_int_mutate, std::bind(compile_store_int, _1, _2, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, + {1, 0, 2}, {}); define_builtin_func("storeUint", store_int_mutate, std::bind(compile_store_int, _1, _2, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, + {1, 0, 2}, {}); define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at, - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); define_builtin_func("debugPrint", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), AsmOp::Custom("s0 DUMP DROP", 1, 1), 0); diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 9a90a3ed..0529696f 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -314,7 +314,7 @@ bool Op::generate_code_step(Stack& stack) { return true; } case _GlobVar: - if (dynamic_cast(fun_ref->value)) { + if (g_sym) { bool used = false; for (auto i : left) { auto p = next->var_info[i]; @@ -325,8 +325,7 @@ bool Op::generate_code_step(Stack& stack) { if (!used || disabled()) { return true; } - std::string name = G.symbols.get_name(fun_ref->sym_idx); - stack.o << AsmOp::Custom(name + " GETGLOB", 0, 1); + stack.o << AsmOp::Custom(g_sym->name + " GETGLOB", 0, 1); if (left.size() != 1) { tolk_assert(left.size() <= 15); stack.o << AsmOp::UnTuple((int)left.size()); @@ -343,14 +342,14 @@ bool Op::generate_code_step(Stack& stack) { } stack.o << "CONT:<{"; stack.o.indent(); - auto func = dynamic_cast(fun_ref->value); - if (func) { + if (f_sym->is_asm_function() || f_sym->is_builtin_function()) { // TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly) std::vector args0, res; - TypeExpr::remove_indirect(func->sym_type); - tolk_assert(func->get_type()->is_map()); - auto wr = func->get_type()->args.at(0)->get_width(); - auto wl = func->get_type()->args.at(1)->get_width(); + TypeExpr* func_type = f_sym->full_type; + TypeExpr::remove_indirect(func_type); + tolk_assert(func_type->is_map()); + auto wr = func_type->args.at(0)->get_width(); + auto wl = func_type->args.at(1)->get_width(); tolk_assert(wl >= 0 && wr >= 0); for (int i = 0; i < wl; i++) { res.emplace_back(0); @@ -358,10 +357,13 @@ bool Op::generate_code_step(Stack& stack) { for (int i = 0; i < wr; i++) { args0.emplace_back(0); } - func->compile(stack.o, res, args0, where); // compile res := f (args0) + if (f_sym->is_asm_function()) { + std::get(f_sym->body)->compile(stack.o); // compile res := f (args0) + } else { + std::get(f_sym->body)->compile(stack.o, res, args0, where); // compile res := f (args0) + } } else { - std::string name = G.symbols.get_name(fun_ref->sym_idx); - stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + stack.o << AsmOp::Custom(f_sym->name + " CALLDICT", (int)right.size(), (int)left.size()); } stack.o.undent(); stack.o << "}>"; @@ -438,10 +440,9 @@ bool Op::generate_code_step(Stack& stack) { if (disabled()) { return true; } - // fun_ref can be nullptr for Op::_CallInd (invoke a variable, not a function) - SymValFunc* func = (fun_ref ? dynamic_cast(fun_ref->value) : nullptr); - auto arg_order = (func ? func->get_arg_order() : nullptr); - auto ret_order = (func ? func->get_ret_order() : nullptr); + // f_sym can be nullptr for Op::_CallInd (invoke a variable, not a function) + const std::vector* arg_order = f_sym ? f_sym->get_arg_order() : nullptr; + const std::vector* ret_order = f_sym ? f_sym->get_ret_order() : nullptr; tolk_assert(!arg_order || arg_order->size() == right.size()); tolk_assert(!ret_order || ret_order->size() == left.size()); std::vector right1; @@ -488,23 +489,25 @@ bool Op::generate_code_step(Stack& stack) { }; if (cl == _CallInd) { exec_callxargs((int)right.size() - 1, (int)left.size()); - } else if (auto asm_fv = dynamic_cast(fun_ref->value)) { + } else if (!f_sym->is_regular_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { res.emplace_back(i); } - asm_fv->compile(stack.o, res, args, where); // compile res := f (args) + if (f_sym->is_asm_function()) { + std::get(f_sym->body)->compile(stack.o); // compile res := f (args) + } else { + std::get(f_sym->body)->compile(stack.o, res, args, where); // compile res := f (args) + } } else { - auto fv = dynamic_cast(fun_ref->value); - std::string name = G.symbols.get_name(fun_ref->sym_idx); - if (fv->is_inline() || fv->is_inline_ref()) { - stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); - } else if (fv->code && fv->code->require_callxargs) { - stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2); + if (f_sym->is_inline() || f_sym->is_inline_ref()) { + stack.o << AsmOp::Custom(f_sym->name + " INLINECALLDICT", (int)right.size(), (int)left.size()); + } else if (f_sym->is_regular_function() && std::get(f_sym->body)->code->require_callxargs) { + stack.o << AsmOp::Custom(f_sym->name + (" PREPAREDICT"), 0, 2); exec_callxargs((int)right.size() + 1, (int)left.size()); } else { - stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + stack.o << AsmOp::Custom(f_sym->name + " CALLDICT", (int)right.size(), (int)left.size()); } } stack.s.resize(k); @@ -515,7 +518,7 @@ bool Op::generate_code_step(Stack& stack) { return true; } case _SetGlob: { - tolk_assert(fun_ref && dynamic_cast(fun_ref->value)); + tolk_assert(g_sym); std::vector last; for (var_idx_t x : right) { last.push_back(var_info[x] && var_info[x]->is_last()); @@ -534,8 +537,7 @@ bool Op::generate_code_step(Stack& stack) { stack.o << AsmOp::Tuple((int)right.size()); } if (!right.empty()) { - std::string name = G.symbols.get_name(fun_ref->sym_idx); - stack.o << AsmOp::Custom(name + " SETGLOB", 1, 0); + stack.o << AsmOp::Custom(g_sym->name + " SETGLOB", 1, 0); } stack.s.resize(k); return true; @@ -826,6 +828,8 @@ bool Op::generate_code_step(Stack& stack) { catch_stack.push_new_var(left[1]); stack.rearrange_top(catch_vars, catch_last); stack.opt_show(); + stack.o << "c1 PUSH"; + stack.o << "c3 PUSH"; stack.o << "c4 PUSH"; stack.o << "c5 PUSH"; stack.o << "c7 PUSH"; @@ -842,6 +846,8 @@ bool Op::generate_code_step(Stack& stack) { stack.o << "c7 SETCONT"; stack.o << "c5 SETCONT"; stack.o << "c4 SETCONT"; + stack.o << "c3 SETCONT"; + stack.o << "c1 SETCONT"; for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) { begin = end >= block_size ? end - block_size : 0; stack.o << std::to_string(end - begin) + " PUSHINT"; diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp index fb70022f..14d064d9 100644 --- a/tolk/compiler-state.cpp +++ b/tolk/compiler-state.cpp @@ -27,6 +27,19 @@ void ExperimentalOption::mark_deprecated(const char* deprecated_from_v, const ch this->deprecated_reason = deprecated_reason; } +std::string_view PersistentHeapAllocator::copy_string_to_persistent_memory(std::string_view str_in_tmp_memory) { + size_t len = str_in_tmp_memory.size(); + char* allocated = new char[len]; + memcpy(allocated, str_in_tmp_memory.data(), str_in_tmp_memory.size()); + auto new_chunk = std::make_unique(allocated, std::move(head)); + head = std::move(new_chunk); + return {head->allocated, len}; +} + +void PersistentHeapAllocator::clear() { + head = nullptr; +} + void CompilerSettings::enable_experimental_option(std::string_view name) { ExperimentalOption* to_enable = nullptr; diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index aec1945e..56158876 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -19,6 +19,7 @@ #include "src-file.h" #include "symtable.h" #include "td/utils/Status.h" +#include #include #include @@ -64,6 +65,26 @@ struct CompilerSettings { void parse_experimental_options_cmd_arg(const std::string& cmd_arg); }; +// AST nodes contain std::string_view referencing to contents of .tolk files (kept in memory after reading). +// It's more than enough, except a situation when we create new AST nodes inside the compiler +// and want some "persistent place" for std::string_view to point to. +// This class copies strings to heap, so that they remain valid after closing scope. +class PersistentHeapAllocator { + struct ChunkInHeap { + const char* allocated; + std::unique_ptr next; + + ChunkInHeap(const char* allocated, std::unique_ptr&& next) + : allocated(allocated), next(std::move(next)) {} + }; + + std::unique_ptr head = nullptr; + +public: + std::string_view copy_string_to_persistent_memory(std::string_view str_in_tmp_memory); + void clear(); +}; + // CompilerState contains a mutable state that is changed while the compilation is going on. // It's a "global state" of all compilation. // Historically, in FunC, this global state was spread along many global C++ variables. @@ -71,14 +92,13 @@ struct CompilerSettings { struct CompilerState { CompilerSettings settings; - SymTable symbols; - int scope_level = 0; - SymDef* sym_def[SymTable::SIZE_PRIME + 1]{}; - SymDef* global_sym_def[SymTable::SIZE_PRIME + 1]{}; - std::vector> symbol_stack; - std::vector scope_opened_at; + GlobalSymbolTable symtable; + PersistentHeapAllocator persistent_mem; - std::vector all_code_functions, all_global_vars, all_get_methods, all_constants; + std::vector all_code_functions; + std::vector all_get_methods; + std::vector all_global_vars; + std::vector all_constants; AllRegisteredSrcFiles all_src_files; bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } diff --git a/tolk/constant-evaluator.cpp b/tolk/constant-evaluator.cpp new file mode 100644 index 00000000..73c80b9a --- /dev/null +++ b/tolk/constant-evaluator.cpp @@ -0,0 +1,313 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "constant-evaluator.h" +#include "ast.h" +#include "tolk.h" +#include "openssl/digest.hpp" +#include "crypto/common/util.h" +#include "td/utils/crypto.h" +#include "ton/ton-types.h" + +namespace tolk { + +// parse address like "EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5" +// based on unpack_std_smc_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_friendly_address(const char packed[48], ton::WorkchainId& workchain, ton::StdSmcAddress& addr) { + unsigned char buffer[36]; + if (!td::buff_base64_decode(td::MutableSlice{buffer, 36}, td::Slice{packed, 48}, true)) { + return false; + } + td::uint16 crc = td::crc16(td::Slice{buffer, 34}); + if (buffer[34] != (crc >> 8) || buffer[35] != (crc & 0xff) || (buffer[0] & 0x3f) != 0x11) { + return false; + } + workchain = static_cast(buffer[1]); + std::memcpy(addr.data(), buffer + 2, 32); + return true; +} + +// parse address like "0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8" +// based on StdAddress::parse_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_raw_address(const std::string& acc_string, int& workchain, ton::StdSmcAddress& addr) { + size_t pos = acc_string.find(':'); + if (pos != std::string::npos) { + td::Result r_wc = td::to_integer_safe(acc_string.substr(0, pos)); + if (r_wc.is_error()) { + return false; + } + workchain = r_wc.move_as_ok(); + pos++; + } else { + pos = 0; + } + if (acc_string.size() != pos + 64) { + return false; + } + + for (int i = 0; i < 64; ++i) { // loop through each hex digit + char c = acc_string[pos + i]; + int x; + if (c >= '0' && c <= '9') { + x = c - '0'; + } else if (c >= 'a' && c <= 'z') { + x = c - 'a' + 10; + } else if (c >= 'A' && c <= 'Z') { + x = c - 'A' + 10; + } else { + return false; + } + + if ((i & 1) == 0) { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0x0F) | (x << 4)); + } else { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0xF0) | x); + } + } + return true; +} + + +static std::string parse_vertex_string_const_as_slice(V v) { + std::string str = static_cast(v->str_val); + switch (v->modifier) { + case 0: { + return td::hex_encode(str); + } + case 's': { + unsigned char buff[128]; + long bits = td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); + if (bits < 0) { + v->error("invalid hex bitstring constant '" + str + "'"); + } + return str; + } + case 'a': { // MsgAddress + ton::WorkchainId workchain; + ton::StdSmcAddress addr; + bool correct = (str.size() == 48 && parse_friendly_address(str.data(), workchain, addr)) || + (str.size() != 48 && parse_raw_address(str, workchain, addr)); + if (!correct) { + v->error("invalid standard address '" + str + "'"); + } + if (workchain < -128 || workchain >= 128) { + v->error("anycast addresses not supported"); + } + + unsigned char data[3 + 8 + 256]; // addr_std$10 anycast:(Maybe Anycast) workchain_id:int8 address:bits256 = MsgAddressInt; + td::bitstring::bits_store_long_top(data, 0, static_cast(4) << (64 - 3), 3); + td::bitstring::bits_store_long_top(data, 3, static_cast(workchain) << (64 - 8), 8); + td::bitstring::bits_memcpy(data, 3 + 8, addr.bits().ptr, 0, ton::StdSmcAddress::size()); + return td::BitSlice{data, sizeof(data)}.to_hex(); + } + default: + tolk_assert(false); + } +} + +static td::RefInt256 parse_vertex_string_const_as_int(V v) { + std::string str = static_cast(v->str_val); + switch (v->modifier) { + case 'u': { + td::RefInt256 intval = td::hex_string_to_int256(td::hex_encode(str)); + if (str.empty()) { + v->error("empty integer ascii-constant"); + } + if (intval.is_null()) { + v->error("too long integer ascii-constant"); + } + return intval; + } + case 'h': + case 'H': { + unsigned char hash[32]; + digest::hash_str(hash, str.data(), str.size()); + return td::bits_to_refint(hash, (v->modifier == 'h') ? 32 : 256, false); + } + case 'c': { + return td::make_refint(td::crc32(td::Slice{str})); + } + default: + tolk_assert(false); + } +} + + +struct ConstantEvaluator { + static bool is_overflow(const td::RefInt256& intval) { + return intval.is_null() || !intval->signed_fits_bits(257); + } + + static ConstantValue handle_unary_operator(V v, const ConstantValue& rhs) { + if (!rhs.is_int()) { + v->error("invalid operator, expecting integer"); + } + td::RefInt256 intval = std::get(rhs.value); + + switch (v->tok) { + case tok_minus: + intval = -intval; + break; + case tok_plus: + break; + case tok_bitwise_not: + intval = ~intval; + break; + case tok_logical_not: + intval = td::make_refint(intval == 0 ? -1 : 0); + break; + default: + v->error("not a constant expression"); + } + + if (is_overflow(intval)) { + v->error("integer overflow"); + } + return ConstantValue::from_int(std::move(intval)); + } + + static ConstantValue handle_binary_operator(V v, const ConstantValue& lhs, const ConstantValue& rhs) { + if (!lhs.is_int() || !rhs.is_int()) { + v->error("invalid operator, expecting integer"); + } + td::RefInt256 lhs_intval = std::get(lhs.value); + td::RefInt256 rhs_intval = std::get(rhs.value); + td::RefInt256 intval; + + switch (v->tok) { + case tok_minus: + intval = lhs_intval - rhs_intval; + break; + case tok_plus: + intval = lhs_intval + rhs_intval; + break; + case tok_mul: + intval = lhs_intval * rhs_intval; + break; + case tok_div: + intval = lhs_intval / rhs_intval; + break; + case tok_mod: + intval = lhs_intval % rhs_intval; + break; + case tok_lshift: + intval = lhs_intval << static_cast(rhs_intval->to_long()); + break; + case tok_rshift: + intval = lhs_intval >> static_cast(rhs_intval->to_long()); + break; + case tok_bitwise_and: + intval = lhs_intval & rhs_intval; + break; + case tok_bitwise_or: + intval = lhs_intval | rhs_intval; + break; + case tok_bitwise_xor: + intval = lhs_intval ^ rhs_intval; + break; + case tok_eq: + intval = td::make_refint(lhs_intval == rhs_intval ? -1 : 0); + break; + case tok_lt: + intval = td::make_refint(lhs_intval < rhs_intval ? -1 : 0); + break; + case tok_gt: + intval = td::make_refint(lhs_intval > rhs_intval ? -1 : 0); + break; + case tok_leq: + intval = td::make_refint(lhs_intval <= rhs_intval ? -1 : 0); + break; + case tok_geq: + intval = td::make_refint(lhs_intval >= rhs_intval ? -1 : 0); + break; + case tok_neq: + intval = td::make_refint(lhs_intval != rhs_intval ? -1 : 0); + break; + default: + v->error("unsupported binary operator in constant expression"); + } + + if (is_overflow(intval)) { + v->error("integer overflow"); + } + return ConstantValue::from_int(std::move(intval)); + } + + static ConstantValue handle_identifier(V v) { + // todo better handle "appears, directly or indirectly, in its own initializer" + const Symbol* sym = lookup_global_symbol(v->name); + if (!sym) { + v->error("undefined symbol `" + static_cast(v->name) + "`"); + } + const GlobalConstData* const_ref = sym->try_as(); + if (!const_ref) { + v->error("symbol `" + static_cast(v->name) + "` is not a constant"); + } + return {const_ref->value}; + } + + static ConstantValue visit(AnyExprV v) { + if (auto v_int = v->try_as()) { + return ConstantValue::from_int(v_int->intval); + } + if (auto v_bool = v->try_as()) { + return ConstantValue::from_int(v_bool->bool_val ? -1 : 0); + } + if (auto v_unop = v->try_as()) { + return handle_unary_operator(v_unop, visit(v_unop->get_rhs())); + } + if (auto v_binop = v->try_as()) { + return handle_binary_operator(v_binop, visit(v_binop->get_lhs()), visit(v_binop->get_rhs())); + } + if (auto v_ident = v->try_as()) { + return handle_identifier(v_ident); + } + if (auto v_par = v->try_as()) { + return visit(v_par->get_expr()); + } + if (v->try_as()) { + return eval_const_init_value(v); + } + v->error("not a constant expression"); + } + + static ConstantValue eval_const_init_value(AnyExprV init_value) { + // it init_value is incorrect, an exception is thrown + return visit(init_value); + } +}; + +ConstantValue eval_const_init_value(AnyExprV init_value) { + // at first, handle most simple cases, not to launch heavy computation algorithm: just a number, just a string + // just `c = 1` or `c = 0xFF` + if (auto v_int = init_value->try_as()) { + return {v_int->intval}; + } + // just `c = "strval"`, probably with modifier (address, etc.) + if (auto v_string = init_value->try_as()) { + if (v_string->is_bitslice()) { + return {parse_vertex_string_const_as_slice(v_string)}; + } else { + return {parse_vertex_string_const_as_int(v_string)}; + } + } + // something more complex, like `c = anotherC` or `c = 1 << 8` + return ConstantEvaluator::eval_const_init_value(init_value); +} + +} // namespace tolk diff --git a/tolk/constant-evaluator.h b/tolk/constant-evaluator.h new file mode 100644 index 00000000..0f99867d --- /dev/null +++ b/tolk/constant-evaluator.h @@ -0,0 +1,45 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "fwd-declarations.h" +#include "crypto/common/refint.h" +#include + +namespace tolk { + +struct ConstantValue { + std::variant value; + + bool is_int() const { return std::holds_alternative(value); } + bool is_slice() const { return std::holds_alternative(value); } + + td::RefInt256 as_int() const { return std::get(value); } + const std::string& as_slice() const { return std::get(value); } + + static ConstantValue from_int(int value) { + return {td::make_refint(value)}; + } + + static ConstantValue from_int(td::RefInt256 value) { + return {std::move(value)}; + } +}; + +ConstantValue eval_const_init_value(AnyExprV init_value); + +} // namespace tolk diff --git a/tolk/fwd-declarations.h b/tolk/fwd-declarations.h new file mode 100644 index 00000000..d2197e66 --- /dev/null +++ b/tolk/fwd-declarations.h @@ -0,0 +1,39 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +namespace tolk { + +struct ASTNodeBase; +struct ASTNodeExpressionBase; +struct ASTNodeStatementBase; + +using AnyV = const ASTNodeBase*; +using AnyExprV = const ASTNodeExpressionBase*; +using AnyStatementV = const ASTNodeStatementBase*; + +struct Symbol; +struct LocalVarData; +struct FunctionData; +struct GlobalVarData; +struct GlobalConstData; + +struct TypeExpr; + +struct SrcFile; + +} // namespace tolk diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp deleted file mode 100644 index fb085ae9..00000000 --- a/tolk/gen-abscode.cpp +++ /dev/null @@ -1,429 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" -#include "compiler-state.h" - -using namespace std::literals::string_literals; - -namespace tolk { - -/* - * - * EXPRESSIONS - * - */ - -Expr* Expr::copy() const { - auto res = new Expr{*this}; - for (auto& arg : res->args) { - arg = arg->copy(); - } - return res; -} - -Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { - sym = lookup_symbol(name_idx); - if (!sym) { - } -} - -void Expr::deduce_type() { - if (e_type) { - return; - } - switch (cls) { - case _Apply: { - if (!sym) { - return; - } - SymValFunc* sym_val = dynamic_cast(sym->value); - if (!sym_val || !sym_val->get_type()) { - return; - } - std::vector arg_types; - arg_types.reserve(args.size()); - for (const Expr* arg : args) { - arg_types.push_back(arg->e_type); - } - TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_tensor(arg_types), TypeExpr::new_hole()); - try { - unify(fun_type, sym_val->sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " - << fun_type->args[0] << ": " << ue; - throw ParseError(here, os.str()); - } - e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - return; - } - case _VarApply: { - tolk_assert(args.size() == 2); - TypeExpr* fun_type = TypeExpr::new_map(args[1]->e_type, TypeExpr::new_hole()); - try { - unify(fun_type, args[0]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type - << ": " << ue; - throw ParseError(here, os.str()); - } - e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - return; - } - case _GrabMutatedVars: { - tolk_assert(args.size() == 2 && args[0]->cls == _Apply && sym); - SymValFunc* called_f = dynamic_cast(sym->value); - tolk_assert(called_f->has_mutate_params()); - TypeExpr* sym_type = called_f->get_type(); - if (sym_type->constr == TypeExpr::te_ForAll) { - TypeExpr::remove_forall(sym_type); - } - tolk_assert(sym_type->args[1]->constr == TypeExpr::te_Tensor); - e_type = sym_type->args[1]->args[sym_type->args[1]->args.size() - 1]; - TypeExpr::remove_indirect(e_type); - return; - } - case _ReturnSelf: { - tolk_assert(args.size() == 2 && sym); - Expr* this_arg = args[1]; - e_type = this_arg->e_type; - TypeExpr::remove_indirect(e_type); - return; - } - case _Letop: { - tolk_assert(args.size() == 2); - try { - // std::cerr << "in assignment: " << args[0]->e_type << " from " << args[1]->e_type << std::endl; - unify(args[0]->e_type, args[1]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " - << args[0]->e_type << ": " << ue; - throw ParseError(here, os.str()); - } - e_type = args[0]->e_type; - TypeExpr::remove_indirect(e_type); - return; - } - case _CondExpr: { - tolk_assert(args.size() == 3); - auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(args[0]->e_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; - throw ParseError(here, os.str()); - } - try { - unify(args[1]->e_type, args[2]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " - << args[2]->e_type << " : " << ue; - throw ParseError(here, os.str()); - } - e_type = args[1]->e_type; - TypeExpr::remove_indirect(e_type); - return; - } - default: - throw Fatal("unexpected cls=" + std::to_string(cls) + " in Expr::deduce_type()"); - } -} - -void Expr::define_new_vars(CodeBlob& code) { - switch (cls) { - case _Tensor: - case _MkTuple: { - for (Expr* item : args) { - item->define_new_vars(code); - } - break; - } - case _Var: - if (val < 0) { - val = code.create_var(e_type, sym->sym_idx, here); - sym->value->idx = val; - } - break; - case _Hole: - if (val < 0) { - val = code.create_tmp_var(e_type, here); - } - break; - default: - break; - } -} - -void Expr::predefine_vars() { - switch (cls) { - case _Tensor: - case _MkTuple: { - for (Expr* item : args) { - item->predefine_vars(); - } - break; - } - case _Var: - if (!sym) { - tolk_assert(val < 0 && here.is_defined()); - sym = define_symbol(~val, false, here); - // std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl; - if (!sym) { - throw ParseError{here, std::string{"redefined variable `"} + G.symbols.get_name(~val) + "`"}; - } - sym->value = new SymValVariable(-1, e_type); - if (is_immutable()) { - dynamic_cast(sym->value)->flags |= SymValVariable::flagImmutable; - } - } - break; - default: - break; - } -} - -var_idx_t Expr::new_tmp(CodeBlob& code) const { - return code.create_tmp_var(e_type, here); -} - -void add_set_globs(CodeBlob& code, std::vector>& globs, SrcLocation here) { - for (const auto& p : globs) { - auto& op = code.emplace_back(here, Op::_SetGlob, std::vector{}, std::vector{ p.second }, p.first); - op.set_impure(code); - } -} - -std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) { - if (lhs->is_mktuple()) { - if (rhs->is_mktuple()) { - return pre_compile_let(code, lhs->args.at(0), rhs->args.at(0), here); - } - auto right = rhs->pre_compile(code); - TypeExpr::remove_indirect(rhs->e_type); - auto unpacked_type = rhs->e_type->args.at(0); - std::vector tmp{code.create_tmp_var(unpacked_type, rhs->here)}; - code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); - auto tvar = new Expr{Expr::_Var, lhs->here}; - tvar->set_val(tmp[0]); - tvar->set_location(rhs->here); - tvar->e_type = unpacked_type; - pre_compile_let(code, lhs->args.at(0), tvar, here); - return tmp; - } - auto right = rhs->pre_compile(code); - std::vector> globs; - auto left = lhs->pre_compile(code, &globs); - for (var_idx_t v : left) { - code.on_var_modification(v, here); - } - code.emplace_back(here, Op::_Let, std::move(left), right); - add_set_globs(code, globs, here); - return right; -} - -std::vector pre_compile_tensor(const std::vector& args, CodeBlob &code, - std::vector> *lval_globs) { - const size_t n = args.size(); - if (n == 0) { // just `()` - return {}; - } - if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments - return args[0]->pre_compile(code, lval_globs); - } - std::vector> res_lists(n); - - struct ModifiedVar { - size_t i, j; - std::unique_ptr* cur_ops; // `LET tmp = v_ij` will be inserted before this - }; - std::vector modified_vars; - for (size_t i = 0; i < n; ++i) { - res_lists[i] = args[i]->pre_compile(code, lval_globs); - for (size_t j = 0; j < res_lists[i].size(); ++j) { - TmpVar& var = code.vars.at(res_lists[i][j]); - if (!lval_globs && !var.is_unnamed()) { - var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable { - if (!done) { - done = true; - modified_vars.push_back({i, j, cur_ops}); - } - }); - } else { - var.on_modification.push_back([](SrcLocation) { - }); - } - } - } - for (const auto& list : res_lists) { - for (var_idx_t v : list) { - tolk_assert(!code.vars.at(v).on_modification.empty()); - code.vars.at(v).on_modification.pop_back(); - } - } - for (size_t idx = modified_vars.size(); idx--; ) { - const ModifiedVar &m = modified_vars[idx]; - var_idx_t orig_v = res_lists[m.i][m.j]; - var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where); - std::unique_ptr op = std::make_unique(code.vars[orig_v].where, Op::_Let); - op->left = {tmp_v}; - op->right = {orig_v}; - op->next = std::move((*m.cur_ops)); - *m.cur_ops = std::move(op); - res_lists[m.i][m.j] = tmp_v; - } - std::vector res; - for (const auto& list : res_lists) { - res.insert(res.end(), list.cbegin(), list.cend()); - } - return res; -} - -std::vector Expr::pre_compile(CodeBlob& code, std::vector>* lval_globs) const { - if (lval_globs && !(cls == _Tensor || cls == _Var || cls == _Hole || cls == _GlobVar)) { - std::cerr << "lvalue expression constructor is " << cls << std::endl; - throw Fatal{"cannot compile lvalue expression with unknown constructor"}; - } - switch (cls) { - case _Tensor: { - return pre_compile_tensor(args, code, lval_globs); - } - case _Apply: { - tolk_assert(sym); - std::vector res = pre_compile_tensor(args, code, lval_globs);; - auto rvect = new_tmp_vect(code); - auto& op = code.emplace_back(here, Op::_Call, rvect, res, sym); - if (flags & _IsImpure) { - op.set_impure(code); - } - return rvect; - } - case _GrabMutatedVars: { - SymValFunc* func_val = dynamic_cast(sym->value); - tolk_assert(func_val && func_val->has_mutate_params()); - tolk_assert(args.size() == 2 && args[0]->cls == _Apply && args[1]->cls == _Tensor); - auto right = args[0]->pre_compile(code); // apply (returning function result and mutated) - std::vector> local_globs; - if (!lval_globs) { - lval_globs = &local_globs; - } - auto left = args[1]->pre_compile(code, lval_globs); // mutated (lvalue) - auto rvect = new_tmp_vect(code); - left.push_back(rvect[0]); - for (var_idx_t v : left) { - code.on_var_modification(v, here); - } - code.emplace_back(here, Op::_Let, std::move(left), std::move(right)); - add_set_globs(code, local_globs, here); - return rvect; - } - case _ReturnSelf: { - tolk_assert(args.size() == 2 && sym); - Expr* this_arg = args[1]; - auto right = args[0]->pre_compile(code); - return this_arg->pre_compile(code); - } - case _Var: - case _Hole: - if (val < 0) { - throw ParseError{here, "unexpected variable definition"}; - } - return {val}; - case _VarApply: - if (args[0]->cls == _GlobFunc) { - auto res = args[1]->pre_compile(code); - auto rvect = new_tmp_vect(code); - auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), args[0]->sym); - if (args[0]->flags & _IsImpure) { - op.set_impure(code); - } - return rvect; - } else { - auto res = args[1]->pre_compile(code); - auto tfunc = args[0]->pre_compile(code); - if (tfunc.size() != 1) { - throw Fatal{"stack tuple used as a function"}; - } - res.push_back(tfunc[0]); - auto rvect = new_tmp_vect(code); - code.emplace_back(here, Op::_CallInd, rvect, std::move(res)); - return rvect; - } - case _Const: { - auto rvect = new_tmp_vect(code); - code.emplace_back(here, Op::_IntConst, rvect, intval); - return rvect; - } - case _GlobFunc: - case _GlobVar: { - if (auto fun_ref = dynamic_cast(sym->value)) { - fun_ref->flags |= SymValFunc::flagUsedAsNonCall; - if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { - throw ParseError(here, "saving `" + sym->name() + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); - } - if (fun_ref->has_mutate_params()) { - throw ParseError(here, "saving `" + sym->name() + "` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly"); - } - } - auto rvect = new_tmp_vect(code); - if (lval_globs) { - lval_globs->push_back({ sym, rvect[0] }); - return rvect; - } else { - code.emplace_back(here, Op::_GlobVar, rvect, std::vector{}, sym); - return rvect; - } - } - case _Letop: { - return pre_compile_let(code, args.at(0), args.at(1), here); - } - case _MkTuple: { - auto left = new_tmp_vect(code); - auto right = args[0]->pre_compile(code); - code.emplace_back(here, Op::_Tuple, left, std::move(right)); - return left; - } - case _CondExpr: { - auto cond = args[0]->pre_compile(code); - tolk_assert(cond.size() == 1); - auto rvect = new_tmp_vect(code); - Op& if_op = code.emplace_back(here, Op::_If, cond); - code.push_set_cur(if_op.block0); - code.emplace_back(here, Op::_Let, rvect, args[1]->pre_compile(code)); - code.close_pop_cur(args[1]->here); - code.push_set_cur(if_op.block1); - code.emplace_back(here, Op::_Let, rvect, args[2]->pre_compile(code)); - code.close_pop_cur(args[2]->here); - return rvect; - } - case _SliceConst: { - auto rvect = new_tmp_vect(code); - code.emplace_back(here, Op::_SliceConst, rvect, strval); - return rvect; - } - default: - std::cerr << "expression constructor is " << cls << std::endl; - throw Fatal{"cannot compile expression with unknown constructor"}; - } -} - -} // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 17eb4544..4278f040 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -15,9 +15,9 @@ along with TON Blockchain Library. If not, see . */ #include "lexer.h" -#include "compiler-state.h" -#include "symtable.h" #include +#include +#include namespace tolk { @@ -406,7 +406,6 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (TokenType kw_tok = maybe_keyword(str_val)) { lex->add_token(kw_tok, str_val); } else { - G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); } return true; @@ -421,7 +420,7 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { const char* str_begin = lex->c_str(); lex->skip_chars(1); while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') { - if (std::isspace(lex->char_at())) { // probably, I'll remove this restriction after rewriting symtable and cur_sym_idx + if (std::isspace(lex->char_at())) { lex->error("an identifier can't have a space in its name (even inside backticks)"); } lex->skip_chars(1); @@ -432,7 +431,6 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); lex->skip_chars(1); - G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); return true; } diff --git a/tolk/lexer.h b/tolk/lexer.h index 8a25f952..bf116188 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -57,10 +57,29 @@ enum TokenType { tok_dot, tok_plus, + tok_set_plus, tok_minus, + tok_set_minus, tok_mul, + tok_set_mul, tok_div, + tok_set_div, tok_mod, + tok_set_mod, + tok_lshift, + tok_set_lshift, + tok_rshift, + tok_set_rshift, + tok_rshiftR, + tok_rshiftC, + tok_bitwise_and, + tok_set_bitwise_and, + tok_bitwise_or, + tok_set_bitwise_or, + tok_bitwise_xor, + tok_set_bitwise_xor, + tok_bitwise_not, + tok_question, tok_comma, tok_semicolon, @@ -77,32 +96,14 @@ enum TokenType { tok_logical_not, tok_logical_and, tok_logical_or, - tok_bitwise_and, - tok_bitwise_or, - tok_bitwise_xor, - tok_bitwise_not, tok_eq, tok_neq, tok_leq, tok_geq, tok_spaceship, - tok_lshift, - tok_rshift, - tok_rshiftR, - tok_rshiftC, tok_divR, tok_divC, - tok_set_plus, - tok_set_minus, - tok_set_mul, - tok_set_div, - tok_set_mod, - tok_set_lshift, - tok_set_rshift, - tok_set_bitwise_and, - tok_set_bitwise_or, - tok_set_bitwise_xor, tok_return, tok_repeat, diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 7257bfb0..3c02c7d1 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -19,1066 +19,591 @@ #include "ast.h" #include "compiler-state.h" #include "common/refint.h" -#include "openssl/digest.hpp" -#include "crypto/common/util.h" -#include "td/utils/crypto.h" -#include "ton/ton-types.h" +#include "constant-evaluator.h" /* - * In this module, we convert modern AST representation to legacy representation - * (global state, Expr, CodeBlob, etc.) to make the rest of compiling process remain unchanged for now. - * Since time goes, I'll gradually get rid of legacy, since most of the code analysis - * should be done at AST level. + * This pipe is the last one operating AST: it transforms AST to IR. + * IR is described as "Op" struct. So, here AST is transformed to Ops, and then all the rest "legacy" + * kernel (initially forked from FunC) comes into play. + * Up to this point, all types have been inferred, all validity checks have been passed, etc. + * All properties in AST nodes are assigned and can be safely used (fun_ref, etc.). + * So, if execution reaches this pass, the input is correct, and code generation should succeed. */ namespace tolk { -static int calc_sym_idx(std::string_view sym_name) { - return G.symbols.lookup(sym_name); -} +struct LValGlobs { + std::vector> globs; -void Expr::fire_error_rvalue_expected() const { - // generally, almost all vertices are rvalue, that's why code leading to "not rvalue" - // should be very strange, like `var x = _` - throw ParseError(here, "rvalue expected"); -} + void add_modified_glob(const GlobalVarData* g_sym, var_idx_t local_ir_idx) { + globs.emplace_back(g_sym, local_ir_idx); + } -void Expr::fire_error_lvalue_expected(const std::string& details) const { - // "lvalue expected" is when a user modifies something unmodifiable - // example: `f() = 32` - // example: `loadUint(c.beginParse(), 32)` (since `loadUint()` mutates the first argument) - throw ParseError(here, "lvalue expected (" + details + ")"); -} + void gen_ops_set_globs(CodeBlob& code, SrcLocation loc) const { + for (const auto& [g_sym, ir_idx] : globs) { + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, std::vector{ ir_idx }, g_sym); + op.set_impure_flag(); + } + } +}; -void Expr::fire_error_modifying_immutable(const std::string& details) const { - // "modifying immutable variable" is when a user assigns to a variable declared `val` - // example: `immutable_val = 32` - // example: `(regular_var, immutable_val) = f()` - // for better error message, try to print out variable name if possible - std::string variable_name; - if (cls == _Var || cls == _Const) { - variable_name = sym->name(); - } else if (cls == _Tensor || cls == _MkTuple) { - for (const Expr* arg : (cls == _Tensor ? args : args[0]->args)) { - if (arg->is_immutable() && (arg->cls == _Var || arg->cls == _Const)) { - variable_name = arg->sym->name(); - break; +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs = nullptr); +void process_statement(AnyV v, CodeBlob& code); + + +static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, + LValGlobs* lval_globs) { + const int n = static_cast(args.size()); + if (n == 0) { // just `()` + return {}; + } + if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments + return {pre_compile_expr(args[0], code, lval_globs)}; + } + + // the purpose is to handle such cases: `return (x, x += y, x)` + // without this, ops will be { _Call $2 = +($0_x, $1_y); _Return $0_x, $2, $0_x } - invalid + // with this, ops will be { _Let $3 = $0_x; _Call $2 = ...; _Return $3, $2, $0_x } - valid, tmp var for x + // how it works: for every arg, after transforming to ops, start tracking ir_idx inside it + // on modification attempt, create Op::_Let to a tmp var and replace old ir_idx with tmp_idx in result + struct WatchingVarList { + std::vector watched_vars; + std::vector> res_lists; + + explicit WatchingVarList(int n_args) { + res_lists.reserve(n_args); + } + + bool is_watched(var_idx_t ir_idx) const { + return std::find(watched_vars.begin(), watched_vars.end(), ir_idx) != watched_vars.end(); + } + + void add_and_watch_modifications(std::vector&& vars_of_ith_arg, CodeBlob& code) { + for (var_idx_t ir_idx : vars_of_ith_arg) { + if (code.vars[ir_idx].v_sym && !is_watched(ir_idx)) { + watched_vars.emplace_back(ir_idx); + code.vars[ir_idx].on_modification.emplace_back([this, &code, ir_idx](SrcLocation loc) { + on_var_modified(ir_idx, loc, code); + }); + } + } + res_lists.emplace_back(std::move(vars_of_ith_arg)); + } + + void on_var_modified(var_idx_t ir_idx, SrcLocation loc, CodeBlob& code) { + tolk_assert(is_watched(ir_idx)); + var_idx_t tmp_idx = code.create_tmp_var(code.vars[ir_idx].v_type, loc); + code.emplace_back(loc, Op::_Let, std::vector{tmp_idx}, std::vector{ir_idx}); + for (std::vector& prev_vars : res_lists) { + std::replace(prev_vars.begin(), prev_vars.end(), ir_idx, tmp_idx); } } - } - if (variable_name == "self") { - throw ParseError(here, "modifying `self` (" + details + "), which is immutable by default; probably, you want to declare `mutate self`"); - } else if (!variable_name.empty()) { - throw ParseError(here, "modifying an immutable variable `" + variable_name + "` (" + details + ")"); - } else { - throw ParseError(here, "modifying an immutable variable (" + details + ")"); - } -} - -GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN -static void fire_error_invalid_mutate_arg_passed(SrcLocation loc, const SymDef* func_sym, const SymDef* param_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { - std::string func_name = func_sym->name(); - std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as()->name : "obj"); - const SymValFunc* func_val = dynamic_cast(func_sym->value); - const SymValVariable* param_val = dynamic_cast(param_sym->value); - - // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` - if (param_val->is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && param_val->idx == 0 && func_val->does_accept_self()) { - throw ParseError(loc, "`" + func_name + "` is a mutating method; consider calling `" + arg_str + "." + func_name + "()`, not `" + func_name + "(" + arg_str + ")`"); - } - // case: `cs.mutating_function()`; suggest: `mutating_function(mutate cs)` or make it a method - if (param_val->is_mutate_parameter() && called_as_method && param_val->idx == 0 && !func_val->does_accept_self()) { - throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; consider calling `" + func_name + "(mutate " + arg_str + ")`, not `" + arg_str + "." + func_name + "`(); alternatively, rename parameter to `self` to make it a method"); - } - // case: `mutating_function(arg)`; suggest: `mutate arg` - if (param_val->is_mutate_parameter() && !arg_passed_as_mutate) { - throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; you need to specify `mutate` when passing an argument, like `mutate " + arg_str + "`"); - } - // case: `usual_function(mutate arg)` - if (!param_val->is_mutate_parameter() && arg_passed_as_mutate) { - throw ParseError(loc, "incorrect `mutate`, since `" + func_name + "` does not mutate this parameter"); - } - throw Fatal("unreachable"); -} - -// parse address like "EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5" -// based on unpack_std_smc_addr() from block.cpp -// (which is not included to avoid linking with ton_crypto) -static bool parse_friendly_address(const char packed[48], ton::WorkchainId& workchain, ton::StdSmcAddress& addr) { - unsigned char buffer[36]; - if (!td::buff_base64_decode(td::MutableSlice{buffer, 36}, td::Slice{packed, 48}, true)) { - return false; - } - td::uint16 crc = td::crc16(td::Slice{buffer, 34}); - if (buffer[34] != (crc >> 8) || buffer[35] != (crc & 0xff) || (buffer[0] & 0x3f) != 0x11) { - return false; - } - workchain = (td::int8)buffer[1]; - std::memcpy(addr.data(), buffer + 2, 32); - return true; -} - -// parse address like "0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8" -// based on StdAddress::parse_addr() from block.cpp -// (which is not included to avoid linking with ton_crypto) -static bool parse_raw_address(const std::string& acc_string, int& workchain, ton::StdSmcAddress& addr) { - size_t pos = acc_string.find(':'); - if (pos != std::string::npos) { - td::Result r_wc = td::to_integer_safe(acc_string.substr(0, pos)); - if (r_wc.is_error()) { - return false; - } - workchain = r_wc.move_as_ok(); - pos++; - } else { - pos = 0; - } - if (acc_string.size() != pos + 64) { - return false; - } - - for (int i = 0; i < 64; ++i) { // loop through each hex digit - char c = acc_string[pos + i]; - int x; - if (c >= '0' && c <= '9') { - x = c - '0'; - } else if (c >= 'a' && c <= 'z') { - x = c - 'a' + 10; - } else if (c >= 'A' && c <= 'Z') { - x = c - 'A' + 10; - } else { - return false; - } - - if ((i & 1) == 0) { - addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0x0F) | (x << 4)); - } else { - addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0xF0) | x); - } - } - return true; -} - -static Expr* create_expr_apply(SrcLocation loc, SymDef* sym, std::vector&& args) { - Expr* apply = new Expr(Expr::_Apply, sym, std::move(args)); - apply->here = loc; - apply->flags = Expr::_IsRvalue; - apply->deduce_type(); - return apply; -} - -static Expr* create_expr_int_const(SrcLocation loc, int int_val) { - Expr* int_const = new Expr(Expr::_Const, loc); - int_const->intval = td::make_refint(int_val); - int_const->flags = Expr::_IsRvalue; - int_const->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - return int_const; -} - -namespace blk_fl { -enum { end = 1, ret = 2, empty = 4 }; -typedef int val; -constexpr val init = end | empty; -void combine(val& x, const val y) { - x |= y & ret; - x &= y | ~(end | empty); -} -void combine_parallel(val& x, const val y) { - x &= y | ~(ret | empty); - x |= y & end; -} -} // namespace blk_fl - -Expr* process_expr(AnyV v, CodeBlob& code); -blk_fl::val process_statement(AnyV v, CodeBlob& code); - -static void check_global_func(SrcLocation loc, sym_idx_t func_name) { - SymDef* sym_def = lookup_symbol(func_name); - if (!sym_def) { - throw ParseError(loc, "undefined symbol `" + G.symbols.get_name(func_name) + "`"); - } -} - -static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_sym) { - if (!v_usage->loc.is_symbol_from_same_or_builtin_file(used_sym->loc)) { - const SrcFile* declared_in = used_sym->loc.get_src_file(); - bool has_import = false; - for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { - if (import_stmt.imported_file == declared_in) { - has_import = true; + std::vector> clear_and_stop_watching(CodeBlob& code) { + for (var_idx_t ir_idx : watched_vars) { + code.vars[ir_idx].on_modification.pop_back(); } + watched_vars.clear(); + return std::move(res_lists); } - if (!has_import) { - v_usage->error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); - } + }; + + WatchingVarList watched_vars(n); + for (int arg_idx = 0; arg_idx < n; ++arg_idx) { + std::vector vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, lval_globs); + watched_vars.add_and_watch_modifications(std::move(vars_of_ith_arg), code); } + return watched_vars.clear_and_stop_watching(code); } -static Expr* create_new_local_variable(SrcLocation loc, std::string_view var_name, TypeExpr* var_type, bool is_immutable) { - SymDef* sym = lookup_symbol(calc_sym_idx(var_name)); - if (sym) { // creating a new variable, but something found in symtable - if (sym->level != G.scope_level) { - sym = nullptr; // declaring a new variable with the same name, but in another scope - } else { - throw ParseError(loc, "redeclaration of local variable `" + static_cast(var_name) + "`"); - } +static std::vector pre_compile_tensor(CodeBlob& code, const std::vector& args, + LValGlobs* lval_globs = nullptr) { + std::vector> res_lists = pre_compile_tensor_inner(code, args, lval_globs); + std::vector res; + for (const std::vector& list : res_lists) { + res.insert(res.end(), list.cbegin(), list.cend()); } - Expr* x = new Expr{Expr::_Var, loc}; - x->val = ~calc_sym_idx(var_name); - x->e_type = var_type; - x->flags = Expr::_IsLvalue | (is_immutable ? Expr::_IsImmutable : 0); - return x; + return res; } -static Expr* create_new_underscore_variable(SrcLocation loc, TypeExpr* var_type) { - Expr* x = new Expr{Expr::_Hole, loc}; - x->val = -1; - x->flags = Expr::_IsLvalue; - x->e_type = var_type; - return x; +static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) { + // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" + if (lhs->type == ast_tensor_square && rhs->type == ast_tensor_square) { + std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); + LValGlobs globs; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + code.on_var_modification(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), right); + globs.gen_ops_set_globs(code, loc); + return right; + } + // [lhs] = rhs; it's un-tuple to N left vars + if (lhs->type == ast_tensor_square) { + std::vector right = pre_compile_expr(rhs, code); + TypeExpr* rhs_type = rhs->inferred_type; + TypeExpr::remove_indirect(rhs_type); + TypeExpr* unpacked_type = rhs_type->args.at(0); // rhs->inferred_type is tuple> + std::vector rvect = {code.create_tmp_var(unpacked_type, rhs->loc)}; + code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); + LValGlobs globs; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + code.on_var_modification(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), rvect); + globs.gen_ops_set_globs(code, loc); + return rvect; + } + // lhs = rhs + std::vector right = pre_compile_expr(rhs, code); + LValGlobs globs; + std::vector left = pre_compile_expr(lhs, code, &globs); + code.on_var_modification(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), right); + globs.gen_ops_set_globs(code, loc); + return right; } -static Expr* process_expr(V v, CodeBlob& code) { +static std::vector gen_op_call(CodeBlob& code, TypeExpr* ret_type, SrcLocation here, + std::vector&& args_vars, const FunctionData* fun_ref) { + std::vector rvect = {code.create_tmp_var(ret_type, here)}; + Op& op = code.emplace_back(here, Op::_Call, rvect, std::move(args_vars), fun_ref); + if (!fun_ref->is_marked_as_pure()) { + op.set_impure_flag(); + } + return rvect; +} + + +static std::vector process_binary_operator(V v, CodeBlob& code) { TokenType t = v->tok; std::string operator_name = static_cast(v->operator_name); - if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || - t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || - t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { - Expr* x = process_expr(v->get_lhs(), code); - x->chk_rvalue(); - if (!x->is_lvalue()) { - x->fire_error_lvalue_expected("left side of assignment"); - } - if (x->is_immutable()) { - x->fire_error_modifying_immutable("left side of assignment"); - } - SymDef* sym = lookup_symbol(calc_sym_idx("^_" + operator_name + "_")); - Expr* y = process_expr(v->get_rhs(), code); - y->chk_rvalue(); - Expr* z = create_expr_apply(v->loc, sym, {x, y}); - Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; - res->here = v->loc; - res->flags = x->flags | Expr::_IsRvalue; - res->deduce_type(); - return res; + if (v->is_set_assign()) { + std::string_view calc_operator = std::string_view{operator_name}.substr(0, operator_name.size() - 1); + auto v_apply = createV(v->loc, calc_operator, static_cast(t - 1), v->get_lhs(), v->get_rhs()); + v_apply->assign_inferred_type(v->inferred_type); + return pre_compile_let(code, v->get_lhs(), v_apply, v->loc); } - if (t == tok_assign) { - Expr* x = process_expr(v->get_lhs(), code); - if (!x->is_lvalue()) { - x->fire_error_lvalue_expected("left side of assignment"); - } - if (x->is_immutable()) { - x->fire_error_modifying_immutable("left side of assignment"); - } - Expr* y = process_expr(v->get_rhs(), code); - y->chk_rvalue(); - x->predefine_vars(); - x->define_new_vars(code); - Expr* res = new Expr{Expr::_Letop, {x, y}}; - res->here = v->loc; - res->flags = x->flags | Expr::_IsRvalue; - res->deduce_type(); - return res; + if (v->is_assign()) { + return pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); } if (t == tok_minus || t == tok_plus || t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor || t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship || t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR || t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { - Expr* res = process_expr(v->get_lhs(), code); - res->chk_rvalue(); - SymDef* sym = lookup_symbol(calc_sym_idx("_" + operator_name + "_")); - Expr* x = process_expr(v->get_rhs(), code); - x->chk_rvalue(); - res = create_expr_apply(v->loc, sym, {res, x}); - return res; + const FunctionData* fun_ref = lookup_global_symbol("_" + operator_name + "_")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref); } if (t == tok_logical_and || t == tok_logical_or) { // do the following transformations: // a && b -> a ? (b != 0) : 0 // a || b -> a ? 1 : (b != 0) - SymDef* sym_neq = lookup_symbol(calc_sym_idx("_!=_")); - Expr* lhs = process_expr(v->get_lhs(), code); - Expr* rhs = process_expr(v->get_rhs(), code); - Expr* e_neq0 = create_expr_apply(v->loc, sym_neq, {rhs, create_expr_int_const(v->loc, 0)}); - Expr* e_when_true = t == tok_logical_and ? e_neq0 : create_expr_int_const(v->loc, -1); - Expr* e_when_false = t == tok_logical_and ? create_expr_int_const(v->loc, 0) : e_neq0; - Expr* e_ternary = new Expr(Expr::_CondExpr, {lhs, e_when_true, e_when_false}); - e_ternary->here = v->loc; - e_ternary->flags = Expr::_IsRvalue; - e_ternary->deduce_type(); - return e_ternary; + AnyExprV v_0 = createV(v->loc, td::make_refint(0), "0"); + v_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + AnyExprV v_1 = createV(v->loc, td::make_refint(-1), "-1"); + v_1->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + AnyExprV v_b_ne_0 = createV(v->loc, "!=", tok_neq, v->get_rhs(), v_0); + v_b_ne_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + std::vector cond = pre_compile_expr(v->get_lhs(), code); + tolk_assert(cond.size() == 1); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code)); + code.close_pop_cur(v->loc); + code.push_set_cur(if_op.block1); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code)); + code.close_pop_cur(v->loc); + return rvect; } - v->error("unsupported binary operator"); + throw UnexpectedASTNodeType(v, "process_binary_operator"); } -static Expr* process_expr(V v, CodeBlob& code) { - TokenType t = v->tok; - SymDef* sym = lookup_symbol(calc_sym_idx(static_cast(v->operator_name) + "_")); - Expr* x = process_expr(v->get_rhs(), code); - x->chk_rvalue(); - - // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) - // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later) - // in FunC, it was: - // `var fst = -1;` // is constantly 1 - // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on - // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), - // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here - if (t == tok_minus && x->cls == Expr::_Const) { - x->intval = -x->intval; - if (!x->intval->signed_fits_bits(257)) { - v->error("integer overflow"); - } - return x; - } - if (t == tok_plus && x->cls == Expr::_Const) { - return x; - } - - return create_expr_apply(v->loc, sym, {x}); +static std::vector process_unary_operator(V v, CodeBlob& code) { + const FunctionData* fun_ref = lookup_global_symbol(static_cast(v->operator_name) + "_")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_rhs()}); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref); } -static Expr* process_expr(V v, CodeBlob& code) { - Expr* cond = process_expr(v->get_cond(), code); - cond->chk_rvalue(); - Expr* x = process_expr(v->get_when_true(), code); - x->chk_rvalue(); - Expr* y = process_expr(v->get_when_false(), code); - y->chk_rvalue(); - Expr* res = new Expr{Expr::_CondExpr, {cond, x, y}}; - res->here = v->loc; - res->flags = Expr::_IsRvalue; - res->deduce_type(); - return res; +static std::vector process_ternary_operator(V v, CodeBlob& code) { + std::vector cond = pre_compile_expr(v->get_cond(), code); + tolk_assert(cond.size() == 1); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code)); + code.close_pop_cur(v->get_when_true()->loc); + code.push_set_cur(if_op.block1); + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code)); + code.close_pop_cur(v->get_when_false()->loc); + return rvect; } -static Expr* process_function_arguments(SymDef* func_sym, V v, Expr* lhs_of_dot_call, CodeBlob& code) { - SymValFunc* func_val = dynamic_cast(func_sym->value); - int delta_self = lhs_of_dot_call ? 1 : 0; - int n_arguments = static_cast(v->get_arguments().size()) + delta_self; - int n_parameters = static_cast(func_val->parameters.size()); - - // Tolk doesn't have optional parameters currently, so just compare counts - if (n_parameters < n_arguments) { - v->error("too many arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - if (n_arguments < n_parameters) { - v->error("too few arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - - std::vector apply_args; - apply_args.reserve(n_arguments); - if (lhs_of_dot_call) { - apply_args.push_back(lhs_of_dot_call); - } - for (int i = delta_self; i < n_arguments; ++i) { - auto v_arg = v->get_arg(i - delta_self); - if (SymDef* param_sym = func_val->parameters[i]) { // can be null (for underscore parameter) - SymValVariable* param_val = dynamic_cast(param_sym->value); - if (param_val->is_mutate_parameter() != v_arg->passed_as_mutate) { - fire_error_invalid_mutate_arg_passed(v_arg->loc, func_sym, param_sym, false, v_arg->passed_as_mutate, v_arg->get_expr()); - } - } - - Expr* arg = process_expr(v_arg->get_expr(), code); - arg->chk_rvalue(); - apply_args.push_back(arg); - } - - Expr* apply = new Expr{Expr::_Apply, func_sym, std::move(apply_args)}; - apply->flags = Expr::_IsRvalue | (!func_val->is_marked_as_pure() * Expr::_IsImpure); - apply->here = v->loc; - apply->deduce_type(); - - return apply; -} - -static Expr* process_function_call(V v, CodeBlob& code) { - // special error for "null()" which is a FunC syntax - if (v->get_called_f()->type == ast_null_keyword) { - v->error("null is not a function: use `null`, not `null()`"); - } - +static std::vector process_function_call(V v, CodeBlob& code) { // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` - Expr* lhs = process_expr(v->get_called_f(), code); - if (lhs->cls != Expr::_GlobFunc) { - Expr* tensor_arg = new Expr(Expr::_Tensor, v->loc); - std::vector type_list; - type_list.reserve(v->get_num_args()); + const FunctionData* fun_ref = v->fun_maybe; + if (!fun_ref) { + std::vector args; + args.reserve(v->get_num_args()); for (int i = 0; i < v->get_num_args(); ++i) { - auto v_arg = v->get_arg(i); - if (v_arg->passed_as_mutate) { - v_arg->error("`mutate` used for non-mutate argument"); - } - Expr* arg = process_expr(v_arg->get_expr(), code); - arg->chk_rvalue(); - tensor_arg->pb_arg(arg); - type_list.push_back(arg->e_type); + args.push_back(v->get_arg(i)->get_expr()); } - tensor_arg->flags = Expr::_IsRvalue; - tensor_arg->e_type = TypeExpr::new_tensor(std::move(type_list)); - - Expr* var_apply = new Expr{Expr::_VarApply, {lhs, tensor_arg}}; - var_apply->here = v->loc; - var_apply->flags = Expr::_IsRvalue; - var_apply->deduce_type(); - return var_apply; + std::vector args_vars = pre_compile_tensor(code, args); + std::vector tfunc = pre_compile_expr(v->get_called_f(), code); + tolk_assert(tfunc.size() == 1); + args_vars.push_back(tfunc[0]); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars)); + op.set_impure_flag(); + return rvect; } - Expr* apply = process_function_arguments(lhs->sym, v->get_arg_list(), nullptr, code); + std::vector args; + args.reserve(v->get_num_args()); + for (int i = 0; i < v->get_num_args(); ++i) { + args.push_back(v->get_arg(i)->get_expr()); + } + std::vector args_vars = pre_compile_tensor(code, args); - if (dynamic_cast(apply->sym->value)->has_mutate_params()) { - const std::vector& args = apply->args; - SymValFunc* func_val = dynamic_cast(apply->sym->value); - tolk_assert(func_val->parameters.size() == args.size()); - Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); - std::vector type_list; - for (int i = 0; i < static_cast(args.size()); ++i) { - SymDef* param_def = func_val->parameters[i]; - if (param_def && dynamic_cast(param_def->value)->is_mutate_parameter()) { - if (!args[i]->is_lvalue()) { - args[i]->fire_error_lvalue_expected("call a mutating function"); - } - if (args[i]->is_immutable()) { - args[i]->fire_error_modifying_immutable("call a mutating function"); - } - grabbed_vars->pb_arg(args[i]->copy()); - type_list.emplace_back(args[i]->e_type); + TypeExpr* op_call_type = v->inferred_type; + if (fun_ref->has_mutate_params()) { + std::vector types_list; + for (int i = 0; i < v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { + types_list.push_back(args[i]->inferred_type); } } - grabbed_vars->flags = Expr::_IsRvalue; - Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, apply->sym, {apply, grabbed_vars}); - grab_mutate->here = v->loc; - grab_mutate->flags = apply->flags; - grab_mutate->deduce_type(); - return grab_mutate; + types_list.push_back(v->inferred_type); + op_call_type = TypeExpr::new_tensor(std::move(types_list)); } - return apply; + std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref); + + if (fun_ref->has_mutate_params()) { + LValGlobs local_globs; + std::vector left; + for (int i = 0; i < v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { + AnyExprV arg_i = v->get_arg(i)->get_expr(); + tolk_assert(arg_i->is_lvalue); + std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); + left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); + } + } + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + left.push_back(rvect[0]); + code.on_var_modification(left, v->loc); + code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); + local_globs.gen_ops_set_globs(code, v->loc); + return rvect; + } + + return rvect_apply; } -static Expr* process_dot_method_call(V v, CodeBlob& code) { - sym_idx_t name_idx = calc_sym_idx(v->method_name); - check_global_func(v->loc, name_idx); - SymDef* func_sym = lookup_symbol(name_idx); - SymValFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val != nullptr); - - Expr* obj = process_expr(v->get_obj(), code); - obj->chk_rvalue(); - - if (func_val->parameters.empty()) { - v->error("`" + func_sym->name() + "` has no parameters and can not be called as method"); - } - if (!func_val->does_accept_self() && func_val->parameters[0] && dynamic_cast(func_val->parameters[0]->value)->is_mutate_parameter()) { - fire_error_invalid_mutate_arg_passed(v->loc, func_sym, func_val->parameters[0], true, false, v->get_obj()); +static std::vector process_dot_method_call(V v, CodeBlob& code) { + std::vector args; + args.reserve(1 + v->get_num_args()); + args.push_back(v->get_obj()); + for (int i = 0; i < v->get_num_args(); ++i) { + args.push_back(v->get_arg(i)->get_expr()); } + std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, nullptr); - Expr* apply = process_function_arguments(func_sym, v->get_arg_list(), obj, code); - - Expr* obj_lval = apply->args[0]; - if (!obj_lval->is_lvalue()) { - if (obj_lval->cls == Expr::_ReturnSelf) { - obj_lval = obj_lval->args[1]; - } else { - Expr* tmp_var = create_new_underscore_variable(v->loc, obj_lval->e_type); - tmp_var->define_new_vars(code); - Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, obj_lval}); - assign_to_tmp_var->here = v->loc; - assign_to_tmp_var->flags = Expr::_IsRvalue; - assign_to_tmp_var->deduce_type(); - apply->args[0] = assign_to_tmp_var; - obj_lval = tmp_var; + TypeExpr* op_call_type = v->inferred_type; + TypeExpr* real_ret_type = v->inferred_type; + if (v->fun_ref->does_return_self()) { + real_ret_type = TypeExpr::new_unit(); + if (!v->fun_ref->parameters[0].is_mutate_parameter()) { + op_call_type = TypeExpr::new_unit(); } } + if (v->fun_ref->has_mutate_params()) { + std::vector types_list; + for (int i = 0; i < 1 + v->get_num_args(); ++i) { + if (v->fun_ref->parameters[i].is_mutate_parameter()) { + types_list.push_back(args[i]->inferred_type); + } + } + types_list.push_back(real_ret_type); + op_call_type = TypeExpr::new_tensor(std::move(types_list)); + } - if (func_val->has_mutate_params()) { - tolk_assert(func_val->parameters.size() == apply->args.size()); - Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); - std::vector type_list; - for (int i = 0; i < static_cast(apply->args.size()); ++i) { - SymDef* param_sym = func_val->parameters[i]; - if (param_sym && dynamic_cast(param_sym->value)->is_mutate_parameter()) { - Expr* ith_arg = apply->args[i]; - if (ith_arg->is_immutable()) { - ith_arg->fire_error_modifying_immutable("call a mutating method"); - } + std::vector args_vars; + for (const std::vector& list : vars_per_arg) { + args_vars.insert(args_vars.end(), list.cbegin(), list.cend()); + } + std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), v->fun_ref); - Expr* var_to_mutate = nullptr; - if (ith_arg->is_lvalue()) { - var_to_mutate = ith_arg->copy(); - } else if (i == 0) { - var_to_mutate = obj_lval; + AnyExprV obj_leftmost = args[0]; + while (obj_leftmost->type == ast_dot_method_call && obj_leftmost->as()->fun_ref->does_return_self()) { + obj_leftmost = obj_leftmost->as()->get_obj(); + } + + if (v->fun_ref->has_mutate_params()) { + LValGlobs local_globs; + std::vector left; + for (int i = 0; i < 1 + v->get_num_args(); ++i) { + if (v->fun_ref->parameters[i].is_mutate_parameter()) { + AnyExprV arg_i = i == 0 ? obj_leftmost : args[i]; + tolk_assert (arg_i->is_lvalue || i == 0); + if (arg_i->is_lvalue) { + std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); + left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); } else { - ith_arg->fire_error_lvalue_expected("call a mutating method"); + left.insert(left.end(), vars_per_arg[0].begin(), vars_per_arg[0].end()); } - tolk_assert(var_to_mutate->is_lvalue() && !var_to_mutate->is_immutable()); - grabbed_vars->pb_arg(var_to_mutate); - type_list.emplace_back(var_to_mutate->e_type); } } - grabbed_vars->flags = Expr::_IsRvalue; - - Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, func_sym, {apply, grabbed_vars}); - grab_mutate->here = v->loc; - grab_mutate->flags = apply->flags; - grab_mutate->deduce_type(); - - apply = grab_mutate; + std::vector rvect = {code.create_tmp_var(real_ret_type, v->loc)}; + left.push_back(rvect[0]); + code.on_var_modification(left, v->loc); + code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); + local_globs.gen_ops_set_globs(code, v->loc); + rvect_apply = rvect; } - if (func_val->does_return_self()) { - Expr* self_arg = obj_lval; - tolk_assert(self_arg->is_lvalue()); - - Expr* return_self = new Expr(Expr::_ReturnSelf, func_sym, {apply, self_arg}); - return_self->here = v->loc; - return_self->flags = Expr::_IsRvalue; - return_self->deduce_type(); - - apply = return_self; - } - - return apply; -} - -static Expr* process_expr(V v, CodeBlob& code) { - if (v->empty()) { - Expr* res = new Expr{Expr::_Tensor, {}}; - res->flags = Expr::_IsRvalue; - res->here = v->loc; - res->e_type = TypeExpr::new_unit(); - return res; - } - - Expr* res = process_expr(v->get_item(0), code); - std::vector type_list; - type_list.push_back(res->e_type); - int f = res->flags; - res = new Expr{Expr::_Tensor, {res}}; - for (int i = 1; i < v->size(); ++i) { - Expr* x = process_expr(v->get_item(i), code); - res->pb_arg(x); - f &= (x->flags | Expr::_IsImmutable); - f |= (x->flags & Expr::_IsImmutable); - type_list.push_back(x->e_type); - } - res->here = v->loc; - res->flags = f; - res->e_type = TypeExpr::new_tensor(std::move(type_list)); - return res; -} - -static Expr* process_expr(V v, CodeBlob& code) { - if (v->empty()) { - Expr* res = new Expr{Expr::_Tensor, {}}; - res->flags = Expr::_IsRvalue; - res->here = v->loc; - res->e_type = TypeExpr::new_unit(); - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = Expr::_IsRvalue; - res->here = v->loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - return res; - } - - Expr* res = process_expr(v->get_item(0), code); - std::vector type_list; - type_list.push_back(res->e_type); - int f = res->flags; - res = new Expr{Expr::_Tensor, {res}}; - for (int i = 1; i < v->size(); ++i) { - Expr* x = process_expr(v->get_item(i), code); - res->pb_arg(x); - f &= (x->flags | Expr::_IsImmutable); - f |= (x->flags & Expr::_IsImmutable); - type_list.push_back(x->e_type); - } - res->here = v->loc; - res->flags = f; - res->e_type = TypeExpr::new_tensor(std::move(type_list), false); - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = f; - res->here = v->loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - return res; -} - -static Expr* process_expr(V v) { - Expr* res = new Expr{Expr::_Const, v->loc}; - res->flags = Expr::_IsRvalue; - res->intval = td::string_to_int256(static_cast(v->int_val)); - if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { - v->error("invalid integer constant"); - } - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - return res; -} - -static Expr* process_expr(V v) { - std::string str = static_cast(v->str_val); - Expr* res; - switch (v->modifier) { - case 0: - case 's': - case 'a': - res = new Expr{Expr::_SliceConst, v->loc}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); - break; - case 'u': - case 'h': - case 'H': - case 'c': - res = new Expr{Expr::_Const, v->loc}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - break; - default: - v->error("invalid string modifier '" + std::string(1, v->modifier) + "'"); - } - res->flags = Expr::_IsRvalue; - switch (v->modifier) { - case 0: { - res->strval = td::hex_encode(str); - break; + if (v->fun_ref->does_return_self()) { + if (obj_leftmost->is_lvalue) { // to handle if obj is global var, potentially re-assigned inside a chain + rvect_apply = pre_compile_expr(obj_leftmost, code); + } else { // temporary object, not lvalue, pre_compile_expr + rvect_apply = vars_per_arg[0]; } - case 's': { - res->strval = str; - unsigned char buff[128]; - int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); - if (bits < 0) { - v->error("invalid hex bitstring constant '" + str + "'"); - } - break; - } - case 'a': { // MsgAddress - int workchain; - ton::StdSmcAddress addr; - bool correct = (str.size() == 48 && parse_friendly_address(str.data(), workchain, addr)) || - (str.size() != 48 && parse_raw_address(str, workchain, addr)); - if (!correct) { - v->error("invalid standard address '" + str + "'"); - } - if (workchain < -128 || workchain >= 128) { - v->error("anycast addresses not supported"); - } - - unsigned char data[3 + 8 + 256]; // addr_std$10 anycast:(Maybe Anycast) workchain_id:int8 address:bits256 = MsgAddressInt; - td::bitstring::bits_store_long_top(data, 0, static_cast(4) << (64 - 3), 3); - td::bitstring::bits_store_long_top(data, 3, static_cast(workchain) << (64 - 8), 8); - td::bitstring::bits_memcpy(data, 3 + 8, addr.bits().ptr, 0, addr.size()); - res->strval = td::BitSlice{data, sizeof(data)}.to_hex(); - break; - } - case 'u': { - res->intval = td::hex_string_to_int256(td::hex_encode(str)); - if (str.empty()) { - v->error("empty integer ascii-constant"); - } - if (res->intval.is_null()) { - v->error("too long integer ascii-constant"); - } - break; - } - case 'h': - case 'H': { - unsigned char hash[32]; - digest::hash_str(hash, str.data(), str.size()); - res->intval = td::bits_to_refint(hash, (v->modifier == 'h') ? 32 : 256, false); - break; - } - case 'c': { - res->intval = td::make_refint(td::crc32(td::Slice{str})); - break; - } - default: - tolk_assert(false); } - return res; + + return rvect_apply; } -static Expr* process_expr(V v) { - SymDef* builtin_sym = lookup_symbol(calc_sym_idx(v->bool_val ? "__true" : "__false")); - return create_expr_apply(v->loc, builtin_sym, {}); +static std::vector process_tensor(V v, CodeBlob& code, LValGlobs* lval_globs) { + return pre_compile_tensor(code, v->get_items(), lval_globs); } -static Expr* process_expr(V v) { - SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__null")); - return create_expr_apply(v->loc, builtin_sym, {}); +static std::vector process_tensor_square(V v, CodeBlob& code, LValGlobs* lval_globs) { + if (lval_globs) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work + v->error("[...] can not be used as lvalue here"); + } + std::vector left = std::vector{code.create_tmp_var(v->inferred_type, v->loc)}; + std::vector right = pre_compile_tensor(code, v->get_items()); + code.emplace_back(v->loc, Op::_Tuple, left, std::move(right)); + return left; } -static Expr* process_expr(V v, CodeBlob& code) { - if (!code.func_val->does_accept_self()) { - v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); - } - SymDef* sym = lookup_symbol(calc_sym_idx("self")); - tolk_assert(sym); - SymValVariable* sym_val = dynamic_cast(sym->value); - Expr* res = new Expr(Expr::_Var, v->loc); - res->sym = sym; - res->val = sym_val->idx; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (sym_val->is_immutable() ? Expr::_IsImmutable : 0); - res->e_type = sym_val->get_type(); - return res; +static std::vector process_int_const(V v, CodeBlob& code) { + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + code.emplace_back(v->loc, Op::_IntConst, rvect, v->intval); + return rvect; } -static Expr* process_identifier(V v) { - SymDef* sym = lookup_symbol(calc_sym_idx(v->name)); - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(v, sym); - Expr* res = new Expr{Expr::_GlobVar, v->loc}; - res->e_type = sym->value->get_type(); - res->sym = sym; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(v, sym); - auto val = dynamic_cast(sym->value); - Expr* res = nullptr; - if (val->get_kind() == SymValConst::IntConst) { - res = new Expr{Expr::_Const, v->loc}; - res->intval = val->get_int_value(); - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - } else if (val->get_kind() == SymValConst::SliceConst) { - res = new Expr{Expr::_SliceConst, v->loc}; - res->strval = val->get_str_value(); - res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); - } else { - v->error("invalid symbolic constant type"); - } - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImmutable; - res->sym = sym; - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(v, sym); - } - Expr* res = new Expr{Expr::_Var, v->loc}; - if (!sym) { - check_global_func(v->loc, calc_sym_idx(v->name)); - sym = lookup_symbol(calc_sym_idx(v->name)); - tolk_assert(sym); - } - res->sym = sym; - bool impure = false; - bool immutable = false; - if (const SymValFunc* func_val = dynamic_cast(sym->value)) { - res->e_type = func_val->get_type(); - res->cls = Expr::_GlobFunc; - impure = !func_val->is_marked_as_pure(); - } else if (const SymValVariable* var_val = dynamic_cast(sym->value)) { - tolk_assert(var_val->idx >= 0) - res->val = var_val->idx; - res->e_type = var_val->get_type(); - immutable = var_val->is_immutable(); - // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; +static std::vector process_string_const(V v, CodeBlob& code) { + ConstantValue value = eval_const_init_value(v); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + if (value.is_int()) { + code.emplace_back(v->loc, Op::_IntConst, rvect, value.as_int()); } else { - v->error("undefined identifier '" + static_cast(v->name) + "'"); + code.emplace_back(v->loc, Op::_SliceConst, rvect, value.as_slice()); } - // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0) | (immutable ? Expr::_IsImmutable : 0); - res->deduce_type(); - return res; + return rvect; } -Expr* process_expr(AnyV v, CodeBlob& code) { +static std::vector process_bool_const(V v, CodeBlob& code) { + const FunctionData* builtin_sym = lookup_global_symbol(v->bool_val ? "__true" : "__false")->as(); + return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); +} + +static std::vector process_null_keyword(V v, CodeBlob& code) { + const FunctionData* builtin_sym = lookup_global_symbol("__null")->as(); + return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); +} + +static std::vector process_self_keyword(V v, CodeBlob& code) { + tolk_assert(code.fun_ref->does_accept_self() && v->param_ref); + tolk_assert(v->param_ref->idx == 0); + return {0}; +} + +static std::vector process_identifier(V v, CodeBlob& code, LValGlobs* lval_globs) { + const Symbol* sym = v->sym; + if (const auto* glob_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + if (lval_globs) { + lval_globs->add_modified_glob(glob_ref, rvect[0]); + return rvect; + } else { + code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector{}, glob_ref); + return rvect; + } + } + if (const auto* const_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + if (const_ref->is_int_const()) { + code.emplace_back(v->loc, Op::_IntConst, rvect, const_ref->as_int_const()); + } else { + code.emplace_back(v->loc, Op::_SliceConst, rvect, const_ref->as_slice_const()); + } + return rvect; + } + if (const auto* fun_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); + return rvect; + } + if (const auto* var_ref = sym->try_as()) { +#ifdef TOLK_DEBUG + tolk_assert(var_ref->idx != -1); +#endif + return {var_ref->idx}; + } + throw UnexpectedASTNodeType(v, "process_identifier"); +} + +static std::vector process_local_var(V v, CodeBlob& code, LValGlobs* lval_globs) { + if (v->marked_as_redef) { + return process_identifier(v->get_identifier()->as(), code, lval_globs); + } + if (v->get_identifier()->try_as()) { + const LocalVarData* var_ref = v->var_maybe->as(); + tolk_assert(var_ref->idx == -1); + var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, var_ref, v->loc)); + return {var_ref->idx}; + } + return {code.create_tmp_var(v->inferred_type, v->loc)}; // underscore +} + +static std::vector process_underscore(V v, CodeBlob& code) { + return {code.create_tmp_var(v->inferred_type, v->loc)}; +} + +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs) { switch (v->type) { case ast_binary_operator: - return process_expr(v->as(), code); + return process_binary_operator(v->as(), code); case ast_unary_operator: - return process_expr(v->as(), code); + return process_unary_operator(v->as(), code); case ast_ternary_operator: - return process_expr(v->as(), code); + return process_ternary_operator(v->as(), code); case ast_function_call: return process_function_call(v->as(), code); case ast_dot_method_call: return process_dot_method_call(v->as(), code); - case ast_parenthesized_expr: - return process_expr(v->as()->get_expr(), code); + case ast_parenthesized_expression: + return pre_compile_expr(v->as()->get_expr(), code, lval_globs); case ast_tensor: - return process_expr(v->as(), code); + return process_tensor(v->as(), code, lval_globs); case ast_tensor_square: - return process_expr(v->as(), code); + return process_tensor_square(v->as(), code, lval_globs); case ast_int_const: - return process_expr(v->as()); + return process_int_const(v->as(), code); case ast_string_const: - return process_expr(v->as()); + return process_string_const(v->as(), code); case ast_bool_const: - return process_expr(v->as()); + return process_bool_const(v->as(), code); case ast_null_keyword: - return process_expr(v->as()); + return process_null_keyword(v->as(), code); case ast_self_keyword: - return process_expr(v->as(), code); + return process_self_keyword(v->as(), code); case ast_identifier: - return process_identifier(v->as()); + return process_identifier(v->as(), code, lval_globs); + case ast_local_var: + return process_local_var(v->as(), code, lval_globs); case ast_underscore: - return create_new_underscore_variable(v->loc, TypeExpr::new_hole()); + return process_underscore(v->as(), code); default: - throw UnexpectedASTNodeType(v, "process_expr"); + throw UnexpectedASTNodeType(v, "pre_compile_expr"); } } -static Expr* process_local_vars_lhs(AnyV v, CodeBlob& code) { - switch (v->type) { - case ast_local_var: { - auto v_var = v->as(); - if (v_var->marked_as_redef) { - Expr* redef_var = process_identifier(v_var->get_identifier()->as()); - if (redef_var->is_immutable()) { - redef_var->fire_error_modifying_immutable("left side of assignment"); - } - return redef_var; - } - TypeExpr* var_type = v_var->declared_type ? v_var->declared_type : TypeExpr::new_hole(); - if (auto v_ident = v->as()->get_identifier()->try_as()) { - return create_new_local_variable(v->loc, v_ident->name, var_type, v_var->is_immutable); - } else { - return create_new_underscore_variable(v->loc, var_type); - } - } - case ast_parenthesized_expr: - return process_local_vars_lhs(v->as()->get_expr(), code); - case ast_tensor: { - std::vector type_list; - Expr* res = new Expr{Expr::_Tensor, v->loc}; - for (AnyV item : v->as()->get_items()) { - Expr* x = process_local_vars_lhs(item, code); - res->pb_arg(x); - res->flags |= x->flags; - type_list.push_back(x->e_type); - } - res->e_type = TypeExpr::new_tensor(std::move(type_list)); - return res; - } - case ast_tensor_square: { - std::vector type_list; - Expr* res = new Expr{Expr::_Tensor, v->loc}; - for (AnyV item : v->as()->get_items()) { - Expr* x = process_local_vars_lhs(item, code); - res->pb_arg(x); - res->flags |= x->flags; - type_list.push_back(x->e_type); - } - res->e_type = TypeExpr::new_tensor(std::move(type_list)); - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = res->args.at(0)->flags; - res->here = v->loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - return res; - } - default: - throw UnexpectedASTNodeType(v, "process_local_vars_lhs"); - } + +static void process_local_vars_declaration(V v, CodeBlob& code) { + pre_compile_let(code, v->get_lhs(), v->get_assigned_val(), v->loc); } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* x = process_local_vars_lhs(v->get_lhs(), code); - Expr* y = process_expr(v->get_assigned_val(), code); - y->chk_rvalue(); - x->predefine_vars(); - x->define_new_vars(code); - Expr* res = new Expr{Expr::_Letop, {x, y}}; - res->here = v->loc; - res->flags = x->flags | Expr::_IsRvalue; - res->deduce_type(); - res->chk_rvalue(); - res->pre_compile(code); - return blk_fl::end; -} - -static bool is_expr_valid_as_return_self(Expr* return_expr) { - // `return self` - if (return_expr->cls == Expr::_Var && return_expr->val == 0) { - return true; - } - if (return_expr->cls == Expr::_ReturnSelf) { - return is_expr_valid_as_return_self(return_expr->args[1]); - } - if (return_expr->cls == Expr::_CondExpr) { - return is_expr_valid_as_return_self(return_expr->args[1]) && is_expr_valid_as_return_self(return_expr->args[2]); - } - return false; -} - -// for mutating functions, having `return expr`, transform it to `return (modify_var1, ..., expr)` -static Expr* wrap_return_value_with_mutate_params(SrcLocation loc, CodeBlob& code, Expr* return_expr) { - Expr* tmp_var; - if (return_expr->cls != Expr::_Var) { - // `return complex_expr` - extract this into temporary variable (eval it before return) - // this is mandatory if it assigns to one of modified vars - tmp_var = create_new_underscore_variable(loc, return_expr->e_type); - tmp_var->predefine_vars(); - tmp_var->define_new_vars(code); - Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, return_expr}); - assign_to_tmp_var->here = loc; - assign_to_tmp_var->flags = tmp_var->flags | Expr::_IsRvalue; - assign_to_tmp_var->deduce_type(); - assign_to_tmp_var->pre_compile(code); - } else { - tmp_var = return_expr; - } - - Expr* ret_tensor = new Expr(Expr::_Tensor, loc); - std::vector type_list; - for (SymDef* p_sym: code.func_val->parameters) { - if (p_sym && dynamic_cast(p_sym->value)->is_mutate_parameter()) { - Expr* p_expr = new Expr{Expr::_Var, p_sym->loc}; - p_expr->sym = p_sym; - p_expr->val = p_sym->value->idx; - p_expr->flags = Expr::_IsRvalue; - p_expr->e_type = p_sym->value->get_type(); - ret_tensor->pb_arg(p_expr); - type_list.emplace_back(p_expr->e_type); - } - } - ret_tensor->pb_arg(tmp_var); - type_list.emplace_back(tmp_var->e_type); - ret_tensor->flags = Expr::_IsRvalue; - ret_tensor->e_type = TypeExpr::new_tensor(std::move(type_list)); - return ret_tensor; -} - -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_return_value(), code); - if (code.func_val->does_return_self()) { - if (!is_expr_valid_as_return_self(expr)) { - v->error("invalid return from `self` function"); - } - Expr* var_self = new Expr(Expr::_Var, v->loc); - var_self->flags = Expr::_IsRvalue | Expr::_IsLvalue; - var_self->e_type = code.func_val->parameters[0]->value->get_type(); - Expr* assign_to_self = new Expr(Expr::_Letop, {var_self, expr}); - assign_to_self->here = v->loc; - assign_to_self->flags = Expr::_IsRvalue; - assign_to_self->deduce_type(); - assign_to_self->pre_compile(code); - Expr* empty_tensor = new Expr(Expr::_Tensor, {}); - empty_tensor->here = v->loc; - empty_tensor->flags = Expr::_IsRvalue; - empty_tensor->e_type = TypeExpr::new_tensor({}); - expr = empty_tensor; - } - if (code.func_val->has_mutate_params()) { - expr = wrap_return_value_with_mutate_params(v->loc, code, expr); - } - expr->chk_rvalue(); - try { - unify(expr->e_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; - v->error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); - return blk_fl::ret; -} - -static void append_implicit_ret_stmt(SrcLocation loc_end, CodeBlob& code) { - Expr* expr = new Expr{Expr::_Tensor, {}}; - expr->flags = Expr::_IsRvalue; - expr->here = loc_end; - expr->e_type = TypeExpr::new_unit(); - if (code.func_val->does_return_self()) { - throw ParseError(loc_end, "missing return; forgot `return self`?"); - } - if (code.func_val->has_mutate_params()) { - expr = wrap_return_value_with_mutate_params(loc_end, code, expr); - } - try { - unify(expr->e_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with implicit end-of-block return type " << expr->e_type << ": " << ue; - throw ParseError(loc_end, os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - code.emplace_back(loc_end, Op::_Return, std::move(tmp_vars)); -} - -static blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { - if (!no_new_scope) { - open_scope(v->loc); - } - blk_fl::val res = blk_fl::init; - bool warned = false; +static void process_sequence(V v, CodeBlob& code) { for (AnyV item : v->get_items()) { - if (!(res & blk_fl::end) && !warned) { - item->loc.show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, process_statement(item, code)); + process_statement(item, code); } - if (!no_new_scope) { - close_scope(); - } - return res; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_cond(), code); - expr->chk_rvalue(); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); + +static void process_assert_statement(V v, CodeBlob& code) { + std::vector args(3); + if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { + args[0] = v->get_thrown_code(); + args[1] = v->get_cond()->as()->get_rhs(); + args[2] = createV(v->loc, true); + args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } else { + args[0] = v->get_thrown_code(); + args[1] = v->get_cond(); + args[2] = createV(v->loc, false); + args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - v->get_cond()->error("repeat count value is not a singleton"); + + const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as(); + std::vector args_vars = pre_compile_tensor(code, args); + gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); +} + +static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { + if (auto v_ident = v_catch_var->try_as()) { + const LocalVarData* var_ref = v_ident->sym->as(); + tolk_assert(var_ref->idx == -1); + var_ref->mutate()->assign_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc)); } +} + +static void process_try_catch_statement(V v, CodeBlob& code) { + code.require_callxargs = true; + Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); + code.push_set_cur(try_catch_op.block0); + process_statement(v->get_try_body(), code); + code.close_pop_cur(v->get_try_body()->loc_end); + code.push_set_cur(try_catch_op.block1); + + // transform catch (excNo, arg) into TVM-catch (arg, excNo), where arg is untyped and thus almost useless now + const std::vector& catch_vars = v->get_catch_expr()->get_items(); + tolk_assert(catch_vars.size() == 2); + process_catch_variable(catch_vars[0], code); + process_catch_variable(catch_vars[1], code); + try_catch_op.left = pre_compile_tensor(code, {catch_vars[1], catch_vars[0]}); + process_statement(v->get_catch_body(), code); + code.close_pop_cur(v->get_catch_body()->loc_end); +} + +static void process_repeat_statement(V v, CodeBlob& code) { + std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); - blk_fl::val res = process_vertex(v->get_body(), code); + process_statement(v->get_body(), code); code.close_pop_cur(v->get_body()->loc_end); - return res | blk_fl::end; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_cond(), code); - expr->chk_rvalue(); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); +static void process_if_statement(V v, CodeBlob& code) { + std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); + Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars)); + code.push_set_cur(if_op.block0); + process_statement(v->get_if_body(), code); + code.close_pop_cur(v->get_if_body()->loc_end); + code.push_set_cur(if_op.block1); + process_statement(v->get_else_body(), code); + code.close_pop_cur(v->get_else_body()->loc_end); + if (v->is_ifnot) { + std::swap(if_op.block0, if_op.block1); } - Op& while_op = code.emplace_back(v->loc, Op::_While); - code.push_set_cur(while_op.block0); - while_op.left = expr->pre_compile(code); - code.close_pop_cur(v->get_body()->loc); - if (while_op.left.size() != 1) { - v->get_cond()->error("while condition value is not a singleton"); - } - code.push_set_cur(while_op.block1); - blk_fl::val res1 = process_vertex(v->get_body(), code); - code.close_pop_cur(v->get_body()->loc_end); - return res1 | blk_fl::end; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { +static void process_do_while_statement(V v, CodeBlob& code) { Op& until_op = code.emplace_back(v->loc, Op::_Until); code.push_set_cur(until_op.block0); - open_scope(v->loc); - blk_fl::val res = process_vertex(v->get_body(), code, true); + process_statement(v->get_body(), code); // in TVM, there is only "do until", but in Tolk, we want "do while" // here we negate condition to pass it forward to legacy to Op::_Until // also, handle common situations as a hardcoded "optimization": replace (a<0) with (a>=0) and so on // todo these hardcoded conditions should be removed from this place in the future - AnyV cond = v->get_cond(); - AnyV until_cond; + AnyExprV cond = v->get_cond(); + AnyExprV until_cond; if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { until_cond = v_not->get_rhs(); } else if (auto v_eq = cond->try_as(); v_eq && v_eq->tok == tok_eq) { @@ -1096,215 +621,114 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { } else { until_cond = createV(cond->loc, "!", tok_logical_not, cond); } + until_cond->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - Expr* expr = process_expr(until_cond, code); - expr->chk_rvalue(); - close_scope(); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`while` condition value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - until_op.left = expr->pre_compile(code); + until_op.left = pre_compile_expr(until_cond, code); code.close_pop_cur(v->get_body()->loc_end); - if (until_op.left.size() != 1) { - v->get_cond()->error("`while` condition value is not a singleton"); - } - return res & ~blk_fl::empty; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - std::vector args; - SymDef* builtin_sym; +static void process_while_statement(V v, CodeBlob& code) { + Op& while_op = code.emplace_back(v->loc, Op::_While); + code.push_set_cur(while_op.block0); + while_op.left = pre_compile_expr(v->get_cond(), code); + code.close_pop_cur(v->get_body()->loc); + code.push_set_cur(while_op.block1); + process_statement(v->get_body(), code); + code.close_pop_cur(v->get_body()->loc_end); +} + +static void process_throw_statement(V v, CodeBlob& code) { if (v->has_thrown_arg()) { - builtin_sym = lookup_symbol(calc_sym_idx("__throw_arg")); - args.push_back(process_expr(v->get_thrown_arg(), code)); - args.push_back(process_expr(v->get_thrown_code(), code)); + const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()}); + gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); } else { - builtin_sym = lookup_symbol(calc_sym_idx("__throw")); - args.push_back(process_expr(v->get_thrown_code(), code)); + const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_code()}); + gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); } - - Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); - apply->flags |= Expr::_IsImpure; - apply->pre_compile(code); - return blk_fl::end; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - std::vector args(3); - if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { - args[0] = process_expr(v->get_thrown_code(), code); - args[1] = process_expr(v->get_cond()->as()->get_rhs(), code); - args[2] = process_expr(createV(v->loc, true), code); - } else { - args[0] = process_expr(v->get_thrown_code(), code); - args[1] = process_expr(v->get_cond(), code); - args[2] = process_expr(createV(v->loc, false), code); +static void process_return_statement(V v, CodeBlob& code) { + std::vector return_vars = pre_compile_expr(v->get_return_value(), code); + if (code.fun_ref->does_return_self()) { + tolk_assert(return_vars.size() == 1); + return_vars = {}; } - - SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__throw_if_unless")); - Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); - apply->flags |= Expr::_IsImpure; - apply->pre_compile(code); - return blk_fl::end; + if (code.fun_ref->has_mutate_params()) { + std::vector mutated_vars; + for (const LocalVarData& p_sym: code.fun_ref->parameters) { + if (p_sym.is_mutate_parameter()) { + mutated_vars.push_back(p_sym.idx); + } + } + return_vars.insert(return_vars.begin(), mutated_vars.begin(), mutated_vars.end()); + } + code.emplace_back(v->loc, Op::_Return, std::move(return_vars)); } -static Expr* process_catch_variable(AnyV catch_var, TypeExpr* var_type) { - if (auto v_ident = catch_var->try_as()) { - return create_new_local_variable(catch_var->loc, v_ident->name, var_type, true); - } - return create_new_underscore_variable(catch_var->loc, var_type); -} - -static blk_fl::val process_vertex(V v, CodeBlob& code) { - code.require_callxargs = true; - Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); - code.push_set_cur(try_catch_op.block0); - blk_fl::val res0 = process_vertex(v->get_try_body(), code); - code.close_pop_cur(v->get_try_body()->loc_end); - code.push_set_cur(try_catch_op.block1); - open_scope(v->get_catch_expr()->loc); - - // transform catch (excNo, arg) into TVM-catch (arg, excNo), where arg is untyped and thus almost useless now - TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); - const std::vector& catch_items = v->get_catch_expr()->get_items(); - tolk_assert(catch_items.size() == 2); - Expr* e_catch = new Expr{Expr::_Tensor, v->get_catch_expr()->loc}; - e_catch->pb_arg(process_catch_variable(catch_items[1], tvm_error_type->args[0])); - e_catch->pb_arg(process_catch_variable(catch_items[0], tvm_error_type->args[1])); - e_catch->flags = Expr::_IsLvalue; - e_catch->e_type = tvm_error_type; - e_catch->predefine_vars(); - e_catch->define_new_vars(code); - try_catch_op.left = e_catch->pre_compile(code); - tolk_assert(try_catch_op.left.size() == 2); - - blk_fl::val res1 = process_vertex(v->get_catch_body(), code); - close_scope(); - code.close_pop_cur(v->get_catch_body()->loc_end); - blk_fl::combine_parallel(res0, res1); - return res0; -} - -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_cond(), code); - expr->chk_rvalue(); - TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - v->get_cond()->error("condition value is not a singleton"); - } - Op& if_op = code.emplace_back(v->loc, Op::_If, tmp_vars); - code.push_set_cur(if_op.block0); - blk_fl::val res1 = process_vertex(v->get_if_body(), code); - blk_fl::val res2 = blk_fl::init; - code.close_pop_cur(v->get_if_body()->loc_end); - code.push_set_cur(if_op.block1); - res2 = process_vertex(v->get_else_body(), code); - code.close_pop_cur(v->get_else_body()->loc_end); - if (v->is_ifnot) { - std::swap(if_op.block0, if_op.block1); - } - blk_fl::combine_parallel(res1, res2); - return res1; -} - -blk_fl::val process_statement(AnyV v, CodeBlob& code) { - switch (v->type) { - case ast_local_vars_declaration: - return process_vertex(v->as(), code); - case ast_return_statement: - return process_vertex(v->as(), code); - case ast_sequence: - return process_vertex(v->as(), code); - case ast_empty: - return blk_fl::init; - case ast_repeat_statement: - return process_vertex(v->as(), code); - case ast_if_statement: - return process_vertex(v->as(), code); - case ast_do_while_statement: - return process_vertex(v->as(), code); - case ast_while_statement: - return process_vertex(v->as(), code); - case ast_throw_statement: - return process_vertex(v->as(), code); - case ast_assert_statement: - return process_vertex(v->as(), code); - case ast_try_catch_statement: - return process_vertex(v->as(), code); - default: { - Expr* expr = process_expr(v, code); - expr->chk_rvalue(); - expr->pre_compile(code); - return blk_fl::end; +static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code) { + std::vector mutated_vars; + if (code.fun_ref->has_mutate_params()) { + for (const LocalVarData& p_sym: code.fun_ref->parameters) { + if (p_sym.is_mutate_parameter()) { + mutated_vars.push_back(p_sym.idx); + } } } + code.emplace_back(loc_end, Op::_Return, std::move(mutated_vars)); } -static FormalArg process_vertex(V v, SymDef* param_sym) { - if (!param_sym) { - return std::make_tuple(v->param_type, nullptr, v->loc); + +void process_statement(AnyV v, CodeBlob& code) { + switch (v->type) { + case ast_local_vars_declaration: + return process_local_vars_declaration(v->as(), code); + case ast_sequence: + return process_sequence(v->as(), code); + case ast_return_statement: + return process_return_statement(v->as(), code); + case ast_repeat_statement: + return process_repeat_statement(v->as(), code); + case ast_if_statement: + return process_if_statement(v->as(), code); + case ast_do_while_statement: + return process_do_while_statement(v->as(), code); + case ast_while_statement: + return process_while_statement(v->as(), code); + case ast_throw_statement: + return process_throw_statement(v->as(), code); + case ast_assert_statement: + return process_assert_statement(v->as(), code); + case ast_try_catch_statement: + return process_try_catch_statement(v->as(), code); + case ast_empty_statement: + return; + default: + pre_compile_expr(reinterpret_cast(v), code); } - SymDef* new_sym_def = define_symbol(calc_sym_idx(v->get_identifier()->name), true, v->loc); - if (!new_sym_def || new_sym_def->value) { - v->error("redefined parameter"); - } - const SymValVariable* param_val = dynamic_cast(param_sym->value); - new_sym_def->value = new SymValVariable(*param_val); - return std::make_tuple(v->param_type, new_sym_def, v->loc); } static void convert_function_body_to_CodeBlob(V v, V v_body) { - SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); - SymValCodeFunc* sym_val = dynamic_cast(sym_def->value); - tolk_assert(sym_val != nullptr); - - open_scope(v->loc); - CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, sym_val, v->ret_type}; - if (v->marked_as_pure) { - blob->flags |= CodeBlob::_ForbidImpure; - } + CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, v->fun_ref, v->ret_type}; FormalArgList legacy_arg_list; for (int i = 0; i < v->get_num_params(); ++i) { - legacy_arg_list.emplace_back(process_vertex(v->get_param(i), sym_val->parameters[i])); + legacy_arg_list.emplace_back(v->get_param(i)->declared_type, &v->fun_ref->parameters[i], v->loc); } blob->import_params(std::move(legacy_arg_list)); - blk_fl::val res = blk_fl::init; - bool warned = false; for (AnyV item : v_body->get_items()) { - if (!(res & blk_fl::end) && !warned) { - item->loc.show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, process_statement(item, *blob)); + process_statement(item, *blob); } - if (res & blk_fl::end) { - append_implicit_ret_stmt(v_body->loc_end, *blob); + if (v->fun_ref->is_implicit_return()) { + append_implicit_return_statement(v_body->loc_end, *blob); } blob->close_blk(v_body->loc_end); - close_scope(); - sym_val->set_code(blob); + std::get(v->fun_ref->body)->set_code(blob); } static void convert_asm_body_to_AsmOp(V v, V v_body) { - SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); - SymValAsmFunc* sym_val = dynamic_cast(sym_def->value); - tolk_assert(sym_val != nullptr); - int cnt = v->get_num_params(); int width = v->ret_type->get_width(); std::vector asm_ops; @@ -1332,14 +756,11 @@ static void convert_asm_body_to_AsmOp(V v, Vset_code(std::move(asm_ops)); + std::get(v->fun_ref->body)->set_code(std::move(asm_ops)); } - void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) { for (const SrcFile* file : all_src_files) { - tolk_assert(file->ast); - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { if (auto v_func = v->try_as()) { if (v_func->is_asm_function()) { diff --git a/tolk/pipe-calc-rvalue-lvalue.cpp b/tolk/pipe-calc-rvalue-lvalue.cpp new file mode 100644 index 00000000..1738226b --- /dev/null +++ b/tolk/pipe-calc-rvalue-lvalue.cpp @@ -0,0 +1,192 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe assigns lvalue/rvalue flags for AST expressions. + * It happens after identifiers have been resolved, but before type inferring (before methods binding). + * + * Example: `a = b`, `a` is lvalue, `b` is rvalue. + * Example: `a + b`, both are rvalue. + * + * Note, that this pass only assigns, not checks. So, for `f() = 4`, expr `f()` is lvalue. + * Checking (firing this as incorrect later) is performed after type inferring, see pipe-check-rvalue-lvalue. + */ + +namespace tolk { + +enum class MarkingState { + None, + LValue, + RValue, + LValueAndRValue +}; + +class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { + MarkingState cur_state = MarkingState::None; + + MarkingState enter_state(MarkingState activated) { + MarkingState saved = cur_state; + cur_state = activated; + return saved; + } + + void restore_state(MarkingState saved) { + cur_state = saved; + } + + void mark_vertex_cur_or_rvalue(AnyExprV v) const { + if (cur_state == MarkingState::LValue || cur_state == MarkingState::LValueAndRValue) { + v->mutate()->assign_lvalue_true(); + } + if (cur_state == MarkingState::RValue || cur_state == MarkingState::LValueAndRValue || cur_state == MarkingState::None) { + v->mutate()->assign_rvalue_true(); + } + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(v->passed_as_mutate ? MarkingState::LValueAndRValue : MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v->get_obj()); + enter_state(MarkingState::RValue); + parent::visit(v->get_arg_list()); + restore_state(saved); + } + + void visit(V v) override { + // underscore is a placeholder to ignore left side of assignment: `(a, _) = get2params()` + // so, if current state is "lvalue", `_` will be marked as lvalue, and ok + // but if used incorrectly, like `f(_)` or just `_;`, it will be marked rvalue + // and will fire an error later, in pipe lvalue/rvalue check + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(v->is_set_assign() ? MarkingState::LValueAndRValue : v->is_assign() ? MarkingState::LValue : MarkingState::RValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_rhs()); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); // both cond, when_true and when_false are rvalue, `(cond ? a : b) = 5` prohibited + restore_state(saved); + } + + void visit(V v) override { + MarkingState saved = enter_state(MarkingState::LValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_assigned_val()); + restore_state(saved); + } + + void visit(V v) override { + tolk_assert(cur_state == MarkingState::LValue); + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + parent::visit(v->get_try_body()); + MarkingState saved = enter_state(MarkingState::LValue); + parent::visit(v->get_catch_expr()); + restore_state(saved); + parent::visit(v->get_catch_body()); + } +}; + +void pipeline_calculate_rvalue_lvalue(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-check-pure-impure.cpp b/tolk/pipe-check-pure-impure.cpp new file mode 100644 index 00000000..6cef9f15 --- /dev/null +++ b/tolk/pipe-check-pure-impure.cpp @@ -0,0 +1,107 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" +#include "platform-utils.h" + +/* + * This pipe checks for impure operations inside pure functions. + * It happens after type inferring (after methods binding) since it operates fun_ref of calls. + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_impure_operation_inside_pure_function(AnyV v) { + v->error("an impure operation in a pure function"); +} + +class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFunctionBody { + static void fire_if_global_var(AnyExprV v) { + if (auto v_ident = v->try_as()) { + if (v_ident->sym->try_as()) { + fire_error_impure_operation_inside_pure_function(v); + } + } + } + + void visit(V v) override { + if (v->marked_as_redef) { + fire_if_global_var(v->get_identifier()); + } + } + + void visit(V v) override { + if (v->is_set_assign() || v->is_assign()) { + fire_if_global_var(v->get_lhs()); + } + + parent::visit(v); + } + + void visit(V v) override { + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + if (!v->fun_maybe) { + // calling variables is always impure, no considerations about what's there at runtime + fire_error_impure_operation_inside_pure_function(v); + } + + if (!v->fun_maybe->is_marked_as_pure()) { + fire_error_impure_operation_inside_pure_function(v); + } + + parent::visit(v); + } + + void visit(V v) override { + if (!v->fun_ref->is_marked_as_pure()) { + fire_error_impure_operation_inside_pure_function(v); + } + + parent::visit(v); + } + + void visit(V v) override { + if (v->passed_as_mutate) { + fire_if_global_var(v->get_expr()); + } + + parent::visit(v); + } + + void visit(V v) override { + fire_error_impure_operation_inside_pure_function(v); + } + + void visit(V v) override { + fire_error_impure_operation_inside_pure_function(v); + } + +public: + void start_visiting_function(V v_function) override { + if (v_function->marked_as_pure) { + parent::visit(v_function->get_body()); + } + } +}; + +void pipeline_check_pure_impure_operations(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-check-rvalue-lvalue.cpp b/tolk/pipe-check-rvalue-lvalue.cpp new file mode 100644 index 00000000..f5bf8526 --- /dev/null +++ b/tolk/pipe-check-rvalue-lvalue.cpp @@ -0,0 +1,172 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" +#include "platform-utils.h" + +/* + * This pipe checks lvalue/rvalue for validity. + * It happens after type inferring (after methods binding) and after lvalue/rvalue are refined based on fun_ref. + * + * Example: `f() = 4`, `f()` was earlier marked as lvalue, it's incorrect. + * Example: `f(mutate 5)`, `5` was marked also, it's incorrect. + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_be_used_as_lvalue(AnyV v, const std::string& details) { + // example: `f() = 32` + // example: `loadUint(c.beginParse(), 32)` (since `loadUint()` mutates the first argument) + v->error(details + " can not be used as lvalue"); +} + +// handle when a function used as rvalue, like `var cb = f` +static void handle_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) { + fun_ref->mutate()->assign_is_used_as_noncall(); + if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { + v->error("saving `" + fun_ref->name + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); + } + if (fun_ref->has_mutate_params()) { + v->error("saving `" + fun_ref->name + "` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly"); + } +} + +class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast(v->operator_name)); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast(v->operator_name)); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "operator ?:"); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "function call"); + } + if (!v->fun_maybe) { + parent::visit(v->get_called_f()); + } + // for `f(...)` don't visit identifier `f`, to detect `f` usage as non-call, like `var cb = f` + + for (int i = 0; i < v->get_num_args(); ++i) { + parent::visit(v->get_arg(i)); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "method call"); + } + + parent::visit(v->get_obj()); + + for (int i = 0; i < v->get_num_args(); ++i) { + parent::visit(v->get_arg(i)); + } + } + + void visit(V v) override { + if (v->marked_as_redef) { + tolk_assert(v->var_maybe); // always filled, but for `var g_var redef` might point not to a local + if (const LocalVarData* var_ref = v->var_maybe->try_as(); var_ref && var_ref->is_immutable()) { + v->error("`redef` for immutable variable"); + } + } + } + + void visit(V v) override { + if (v->is_lvalue) { + tolk_assert(v->sym); + if (const auto* var_ref = v->sym->try_as(); var_ref && var_ref->is_immutable()) { + v->error("modifying immutable variable `" + var_ref->name + "`"); + } else if (v->sym->try_as()) { + v->error("modifying immutable constant"); + } else if (v->sym->try_as()) { + v->error("function can't be used as lvalue"); + } + } + + // a reference to a function used as rvalue, like `var v = someFunction` + if (const FunctionData* fun_ref = v->sym->try_as(); fun_ref && v->is_rvalue) { + handle_function_used_as_noncall(v, fun_ref); + } + } + + void visit(V v) override { + if (v->is_lvalue && v->param_ref->is_immutable()) { + v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); + } + } + + void visit(V v) override { + if (v->is_rvalue) { + v->error("`_` can't be used as a value; it's a placeholder for a left side of assignment"); + } + } + + void visit(V v) override { + parent::visit(v->get_try_body()); + // skip catch(_,excNo), there are always vars due to grammar, lvalue/rvalue aren't set to them + parent::visit(v->get_catch_body()); + } +}; + +void pipeline_check_rvalue_lvalue(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp new file mode 100644 index 00000000..9e266e6d --- /dev/null +++ b/tolk/pipe-constant-folding.cpp @@ -0,0 +1,68 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-replacer.h" + +/* + * This pipe is supposed to do constant folding, like replacing `2 + 3` with `5`. + * It happens after type inferring and validity checks, one of the last ones. + * + * Currently, it just replaces `-1` (ast_unary_operator ast_int_const) with a number -1. + * More rich constant folding should be done some day, but even without this, IR optimizations + * (operating low-level stack variables) pretty manage to do all related optimizations. + * Constant folding in the future, done at AST level, just would slightly reduce amount of work for optimizer. + */ + +namespace tolk { + +class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { + static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { + auto v_int = createV(loc, std::move(intval), {}); + v_int->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + v_int->assign_rvalue_true(); + return v_int; + } + + AnyExprV replace(V v) override { + parent::replace(v); + + TokenType t = v->tok; + // convert "-1" (tok_minus tok_int_const) to a const -1 + if (t == tok_minus && v->get_rhs()->type == ast_int_const) { + td::RefInt256 intval = v->get_rhs()->as()->intval; + tolk_assert(!intval.is_null()); + intval = -intval; + if (intval.is_null() || !intval->signed_fits_bits(257)) { + v->error("integer overflow"); + } + return create_int_const(v->loc, std::move(intval)); + } + // same for "+1" + if (t == tok_plus && v->get_rhs()->type == ast_int_const) { + return v->get_rhs(); + } + + return v; + } +}; + +void pipeline_constant_folding(const AllSrcFiles& all_src_files) { + replace_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-detect-unreachable.cpp b/tolk/pipe-detect-unreachable.cpp new file mode 100644 index 00000000..96de2eb0 --- /dev/null +++ b/tolk/pipe-detect-unreachable.cpp @@ -0,0 +1,127 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe does two things: + * 1) detects unreachable code and prints warnings about it + * example: `fun main() { if(1){return;}else{return;} var x = 0; }` — var is unreachable + * 2) if control flow reaches end of function, store a flag to insert an implicit return + * example: `fun main() { assert(...); }` — has an implicit `return ()` statement before a brace + * + * Note, that it does not delete unreachable code, only prints warnings. + * Actual deleting is done much later (in "legacy" part), after AST is converted to Op. + * + * Note, that it's not CFG, it's just a shallow reachability detection. + * In the future, a true CFG should be introduced. For instance, in order to have nullable types, + * I'll need to implement smart casts. Then I'll think of a complicated granular control flow graph, + * considering data flow and exceptions (built before type inferring, of course), + * and detecting unreachable code will be a part of it. + */ + +namespace tolk { + +class UnreachableStatementsDetectVisitor final { + bool always_returns(AnyV v) { + switch (v->type) { + case ast_sequence: return always_returns(v->as()); + case ast_return_statement: return always_returns(v->as()); + case ast_throw_statement: return always_returns(v->as()); + case ast_function_call: return always_returns(v->as()); + case ast_repeat_statement: return always_returns(v->as()); + case ast_while_statement: return always_returns(v->as()); + case ast_do_while_statement: return always_returns(v->as()); + case ast_try_catch_statement: return always_returns(v->as()); + case ast_if_statement: return always_returns(v->as()); + default: + // unhandled statements (like assert) and statement expressions + return false; + } + } + + bool always_returns(V v) { + bool always = false; + for (AnyV item : v->get_items()) { + if (always && item->type != ast_empty_statement) { + item->loc.show_warning("unreachable code"); + break; + } + always |= always_returns(item); + } + return always; + } + + static bool always_returns([[maybe_unused]] V v) { + // quite obvious: `return expr` interrupts control flow + return true; + } + + static bool always_returns([[maybe_unused]] V v) { + // todo `throw excNo` currently does not interrupt control flow + // (in other words, `throw 1; something` - something is reachable) + // the reason is that internally it's transformed to a call of built-in function __throw(), + // which is a regular function, like __throw_if() or loadInt() + // to fix this later on, it should be deeper, introducing Op::_Throw for example, + // to make intermediate representations and stack optimizer also be aware that after it there is unreachable + return false; + } + + static bool always_returns([[maybe_unused]] V v) { + // neither annotations like @noreturn nor auto-detection of always-throwing functions also doesn't exist + // in order to do this in the future, it should be handled not only at AST/CFG level, + // but inside Op and low-level optimizer (at least if reachability detection is not moved out of there) + // see comments for `throw` above, similar to this case + return false; + } + + bool always_returns(V v) { + return always_returns(v->get_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_try_body()) && always_returns(v->get_catch_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_if_body()) && always_returns(v->get_else_body()); + } + +public: + void start_visiting_function(V v_function) { + bool control_flow_reaches_end = !always_returns(v_function->get_body()->as()); + if (control_flow_reaches_end) { + v_function->fun_ref->mutate()->assign_is_implicit_return(); + } + } +}; + + +void pipeline_detect_unreachable_statements(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-discover-parse-sources.cpp b/tolk/pipe-discover-parse-sources.cpp index a8445ae9..92cc2807 100644 --- a/tolk/pipe-discover-parse-sources.cpp +++ b/tolk/pipe-discover-parse-sources.cpp @@ -28,6 +28,14 @@ #include "ast-from-tokens.h" #include "compiler-state.h" +/* + * This is the starting point of compilation pipeline. + * It parses Tolk files to AST, analyzes `import` statements and loads/parses imported files. + * + * When it finishes, all files have been parsed to AST, and no more files will later be added. + * If a parsing error happens (invalid syntax), an exception is thrown immediately from ast-from-tokens.cpp. + */ + namespace tolk { AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { @@ -50,7 +58,7 @@ AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filena SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); file->imports.push_back(SrcFile::ImportStatement{imported}); - v_import->mutate_set_src_file(imported); + v_import->mutate()->assign_src_file(imported); } } } diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp index f83579f4..815905e6 100644 --- a/tolk/pipe-find-unused-symbols.cpp +++ b/tolk/pipe-find-unused-symbols.cpp @@ -24,51 +24,41 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" -#include "src-file.h" #include "compiler-state.h" /* - * Here we find unused symbols (global functions and variables) to strip them off codegen. - * Note, that currently it's implemented as a standalone step after AST has been transformed to legacy Expr/Op. - * The reason why it's not done on AST level is that symbol resolving is done too late. For instance, - * having `beginCell()` there is not enough information in AST whether if points to a global function - * or it's a local variable application. - * In the future, this should be done on AST level. + * This pipe finds unused symbols (global functions and variables) to strip them off codegen. + * It happens after converting AST to Op, so it does not traverse AST. + * In the future, when control flow graph is introduced, this should be done at AST level. */ namespace tolk { static void mark_function_used_dfs(const std::unique_ptr& op); -static void mark_function_used(SymValCodeFunc* func_val) { - if (!func_val->code || func_val->is_really_used) { // already handled +static void mark_function_used(const FunctionData* fun_ref) { + if (!fun_ref->is_regular_function() || fun_ref->is_really_used()) { // already handled return; } - func_val->is_really_used = true; - mark_function_used_dfs(func_val->code->ops); + fun_ref->mutate()->assign_is_really_used(); + mark_function_used_dfs(std::get(fun_ref->body)->code->ops); } -static void mark_global_var_used(SymValGlobVar* glob_val) { - glob_val->is_really_used = true; +static void mark_global_var_used(const GlobalVarData* glob_ref) { + glob_ref->mutate()->assign_is_really_used(); } static void mark_function_used_dfs(const std::unique_ptr& op) { if (!op) { return; } - // op->fun_ref, despite its name, may actually ref global var - // note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar - // (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also) - if (op->fun_ref) { - if (auto* func_val = dynamic_cast(op->fun_ref->value)) { - mark_function_used(func_val); - } else if (auto* glob_val = dynamic_cast(op->fun_ref->value)) { - mark_global_var_used(glob_val); - } else if (auto* asm_val = dynamic_cast(op->fun_ref->value)) { - } else { - tolk_assert(false); - } + + if (op->f_sym) { // for Op::_Call + mark_function_used(op->f_sym); + } + if (op->g_sym) { // for Op::_GlobVar + mark_global_var_used(op->g_sym); } mark_function_used_dfs(op->next); mark_function_used_dfs(op->block0); @@ -76,11 +66,9 @@ static void mark_function_used_dfs(const std::unique_ptr& op) { } void pipeline_find_unused_symbols() { - for (SymDef* func_sym : G.all_code_functions) { - auto* func_val = dynamic_cast(func_sym->value); - std::string name = G.symbols.get_name(func_sym->sym_idx); - if (func_val->method_id.not_null() || func_val->is_entrypoint()) { - mark_function_used(func_val); + for (const FunctionData* fun_ref : G.all_code_functions) { + if (fun_ref->is_method_id_not_empty()) { // get methods, main and other entrypoints, regular functions with @method_id + mark_function_used(fun_ref); } } } diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 91a99f96..5c0f1647 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -1,5 +1,5 @@ /* - This file is part of TON Blockchain source code. + This file is part of TON Blockchain source code-> TON Blockchain is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -30,106 +30,86 @@ namespace tolk { -bool SymValCodeFunc::does_need_codegen() const { - // when a function is declared, but not referenced from code in any way, don't generate its body - if (!is_really_used && G.settings.remove_unused_functions) { - return false; - } - // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist - if (flags & flagUsedAsNonCall) { - return true; - } - // currently, there is no inlining, all functions are codegenerated - // (but actually, unused ones are later removed by Fift) - // in the future, we may want to implement a true AST inlining for "simple" functions - return true; -} - -void SymValCodeFunc::set_code(CodeBlob* code) { +void FunctionBodyCode::set_code(CodeBlob* code) { this->code = code; } -void SymValAsmFunc::set_code(std::vector code) { - this->ext_compile = make_ext_compile(std::move(code)); +void FunctionBodyAsm::set_code(std::vector&& code) { + this->ops = std::move(code); } -static void generate_output_func(SymDef* func_sym) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val); - std::string name = G.symbols.get_name(func_sym->sym_idx); +static void generate_output_func(const FunctionData* fun_ref) { + tolk_assert(fun_ref->is_regular_function()); if (G.is_verbosity(2)) { - std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; + std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->full_type << std::endl; } - if (!func_val->code) { - throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented"); - } else { - CodeBlob& code = *(func_val->code); - if (G.is_verbosity(3)) { - code.print(std::cerr, 9); + + CodeBlob* code = std::get(fun_ref->body)->code; + if (G.is_verbosity(3)) { + code->print(std::cerr, 9); + } + code->simplify_var_types(); + if (G.is_verbosity(5)) { + std::cerr << "after simplify_var_types: \n"; + code->print(std::cerr, 0); + } + code->prune_unreachable_code(); + if (G.is_verbosity(5)) { + std::cerr << "after prune_unreachable: \n"; + code->print(std::cerr, 0); + } + code->split_vars(true); + if (G.is_verbosity(5)) { + std::cerr << "after split_vars: \n"; + code->print(std::cerr, 0); + } + for (int i = 0; i < 8; i++) { + code->compute_used_code_vars(); + if (G.is_verbosity(4)) { + std::cerr << "after compute_used_vars: \n"; + code->print(std::cerr, 6); } - code.simplify_var_types(); + code->fwd_analyze(); if (G.is_verbosity(5)) { - std::cerr << "after simplify_var_types: \n"; - code.print(std::cerr, 0); + std::cerr << "after fwd_analyze: \n"; + code->print(std::cerr, 6); } - code.prune_unreachable_code(); + code->prune_unreachable_code(); if (G.is_verbosity(5)) { std::cerr << "after prune_unreachable: \n"; - code.print(std::cerr, 0); - } - code.split_vars(true); - if (G.is_verbosity(5)) { - std::cerr << "after split_vars: \n"; - code.print(std::cerr, 0); - } - for (int i = 0; i < 8; i++) { - code.compute_used_code_vars(); - if (G.is_verbosity(4)) { - std::cerr << "after compute_used_vars: \n"; - code.print(std::cerr, 6); - } - code.fwd_analyze(); - if (G.is_verbosity(5)) { - std::cerr << "after fwd_analyze: \n"; - code.print(std::cerr, 6); - } - code.prune_unreachable_code(); - if (G.is_verbosity(5)) { - std::cerr << "after prune_unreachable: \n"; - code.print(std::cerr, 6); - } - } - code.mark_noreturn(); - if (G.is_verbosity(3)) { - code.print(std::cerr, 15); - } - if (G.is_verbosity(2)) { - std::cerr << "\n---------- resulting code for " << name << " -------------\n"; - } - const char* modifier = ""; - if (func_val->is_inline()) { - modifier = "INLINE"; - } else if (func_val->is_inline_ref()) { - modifier = "REF"; - } - std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n"; - int mode = 0; - if (G.settings.stack_layout_comments) { - mode |= Stack::_StkCmt | Stack::_CptStkCmt; - } - if (func_val->is_inline() && code.ops->noreturn()) { - mode |= Stack::_InlineFunc; - } - if (func_val->is_inline() || func_val->is_inline_ref()) { - mode |= Stack::_InlineAny; - } - code.generate_code(std::cout, mode, 2); - std::cout << std::string(2, ' ') << "}>\n"; - if (G.is_verbosity(2)) { - std::cerr << "--------------\n"; + code->print(std::cerr, 6); } } + code->mark_noreturn(); + if (G.is_verbosity(3)) { + code->print(std::cerr, 15); + } + if (G.is_verbosity(2)) { + std::cerr << "\n---------- resulting code for " << fun_ref->name << " -------------\n"; + } + const char* modifier = ""; + if (fun_ref->is_inline()) { + modifier = "INLINE"; + } else if (fun_ref->is_inline_ref()) { + modifier = "REF"; + } + std::cout << std::string(2, ' ') << fun_ref->name << " PROC" << modifier << ":<{\n"; + int mode = 0; + if (G.settings.stack_layout_comments) { + mode |= Stack::_StkCmt | Stack::_CptStkCmt; + } + if (fun_ref->is_inline() && code->ops->noreturn()) { + mode |= Stack::_InlineFunc; + } + if (fun_ref->is_inline() || fun_ref->is_inline_ref()) { + mode |= Stack::_InlineAny; + } + code->generate_code(std::cout, mode, 2); + std::cout << std::string(2, ' ') << "}>\n"; + if (G.is_verbosity(2)) { + std::cerr << "--------------\n"; + } } void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) { @@ -149,26 +129,23 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) std::cout << "PROGRAM{\n"; bool has_main_procedure = false; - for (SymDef* func_sym : G.all_code_functions) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val); - if (!func_val->does_need_codegen()) { + for (const FunctionData* fun_ref : G.all_code_functions) { + if (!fun_ref->does_need_codegen()) { if (G.is_verbosity(2)) { - std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n"; + std::cerr << fun_ref->name << ": code not generated, function does not need codegen\n"; } continue; } - std::string name = G.symbols.get_name(func_sym->sym_idx); - if (func_val->is_entrypoint() && (name == "main" || name == "onInternalMessage")) { + if (fun_ref->is_entrypoint() && (fun_ref->name == "main" || fun_ref->name == "onInternalMessage")) { has_main_procedure = true; } std::cout << std::string(2, ' '); - if (func_val->method_id.is_null()) { - std::cout << "DECLPROC " << name << "\n"; + if (fun_ref->is_method_id_not_empty()) { + std::cout << fun_ref->method_id << " DECLMETHOD " << fun_ref->name << "\n"; } else { - std::cout << func_val->method_id << " DECLMETHOD " << name << "\n"; + std::cout << "DECLPROC " << fun_ref->name << "\n"; } } @@ -176,25 +153,22 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) throw Fatal("the contract has no entrypoint; forgot `fun onInternalMessage(...)`?"); } - for (SymDef* gvar_sym : G.all_global_vars) { - auto* glob_val = dynamic_cast(gvar_sym->value); - tolk_assert(glob_val); - if (!glob_val->is_really_used && G.settings.remove_unused_functions) { + for (const GlobalVarData* var_ref : G.all_global_vars) { + if (!var_ref->is_really_used() && G.settings.remove_unused_functions) { if (G.is_verbosity(2)) { - std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n"; + std::cerr << var_ref->name << ": variable not generated, it's unused\n"; } continue; } - std::string name = G.symbols.get_name(gvar_sym->sym_idx); - std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n"; + + std::cout << std::string(2, ' ') << "DECLGLOBVAR " << var_ref->name << "\n"; } - for (SymDef* func_sym : G.all_code_functions) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - if (!func_val->does_need_codegen()) { + for (const FunctionData* fun_ref : G.all_code_functions) { + if (!fun_ref->does_need_codegen()) { continue; } - generate_output_func(func_sym); + generate_output_func(fun_ref); } std::cout << "}END>c\n"; diff --git a/tolk/pipe-infer-check-types.cpp b/tolk/pipe-infer-check-types.cpp new file mode 100644 index 00000000..8c18bae9 --- /dev/null +++ b/tolk/pipe-infer-check-types.cpp @@ -0,0 +1,524 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "src-file.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe does type inferring. + * It will be fully rewritten, because current type system is based on Hindley-Milner (unifying usages), + * and I am going to introduce a static type system, drop TypeExpr completely, etc. + * Currently, after this inferring, lots of `te_Indirect` and partially complete types still exist, + * whey are partially refined during converting AST to legacy. + */ + +namespace tolk { + +class InferAndCheckTypesInsideFunctionVisitor final : public ASTVisitorFunctionBody { + const FunctionData* current_function = nullptr; + + static bool expect_integer(TypeExpr* inferred) { + try { + TypeExpr* t_int = TypeExpr::new_atomic(TypeExpr::_Int); + unify(inferred, t_int); + return true; + } catch (UnifyError&) { + return false; + } + } + + static bool expect_integer(AnyExprV v_inferred) { + return expect_integer(v_inferred->inferred_type); + } + + static bool is_expr_valid_as_return_self(AnyExprV return_expr) { + // `return self` + if (return_expr->type == ast_self_keyword) { + return true; + } + // `return self.someMethod()` + if (auto v_call = return_expr->try_as()) { + return v_call->fun_ref->does_return_self() && is_expr_valid_as_return_self(v_call->get_obj()); + } + // `return cond ? ... : ...` + if (auto v_ternary = return_expr->try_as()) { + return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); + } + return false; + } + + void visit(V v) override { + parent::visit(v->get_expr()); + v->mutate()->assign_inferred_type(v->get_expr()->inferred_type); + } + + void visit(V v) override { + if (v->empty()) { + v->mutate()->assign_inferred_type(TypeExpr::new_unit()); + return; + } + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (AnyExprV item : v->get_items()) { + parent::visit(item); + types_list.emplace_back(item->inferred_type); + } + v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list))); + } + + void visit(V v) override { + if (v->empty()) { + v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_unit())); + return; + } + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (AnyExprV item : v->get_items()) { + parent::visit(item); + types_list.emplace_back(item->inferred_type); + } + v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types_list), false))); + } + + void visit(V v) override { + if (const auto* glob_ref = v->sym->try_as()) { + v->mutate()->assign_inferred_type(glob_ref->declared_type); + } else if (const auto* const_ref = v->sym->try_as()) { + v->mutate()->assign_inferred_type(const_ref->inferred_type); + } else if (const auto* fun_ref = v->sym->try_as()) { + v->mutate()->assign_inferred_type(fun_ref->full_type); + } else if (const auto* var_ref = v->sym->try_as()) { + v->mutate()->assign_inferred_type(var_ref->declared_type); + } + } + + void visit(V v) override { + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } + + void visit(V v) override { + switch (v->modifier) { + case 0: + case 's': + case 'a': + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Slice)); + break; + case 'u': + case 'h': + case 'H': + case 'c': + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + break; + default: + break; + } + } + + void visit(V v) override { + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } + + void visit(V v) override { + const FunctionData* fun_ref = lookup_global_symbol("__null")->as(); + TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_unit(), TypeExpr::new_hole()); + TypeExpr* sym_type = fun_ref->full_type; + try { + unify(fun_type, sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type " + << fun_type->args[0] << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + v->mutate()->assign_inferred_type(e_type); + } + + void visit(V v) override { + v->mutate()->assign_inferred_type(v->param_ref->declared_type); + } + + void visit(V v) override { + parent::visit(v->get_expr()); + v->mutate()->assign_inferred_type(v->get_expr()->inferred_type); + } + + void visit(V v) override { + if (v->empty()) { + v->mutate()->assign_inferred_type(TypeExpr::new_unit()); + return; + } + std::vector types_list; + types_list.reserve(v->size()); + for (AnyExprV item : v->get_arguments()) { + parent::visit(item); + types_list.emplace_back(item->inferred_type); + } + v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list))); + } + + void visit(V v) override { + // special error for "null()" which is a FunC syntax + if (v->get_called_f()->type == ast_null_keyword) { + v->error("null is not a function: use `null`, not `null()`"); + } + + parent::visit(v->get_called_f()); + visit(v->get_arg_list()); + + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + const FunctionData* fun_ref = v->fun_maybe; + if (!fun_ref) { + TypeExpr* arg_tensor = v->get_arg_list()->inferred_type; + TypeExpr* lhs_type = v->get_called_f()->inferred_type; + TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); + try { + unify(fun_type, lhs_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply expression of type " << lhs_type << " to an expression of type " << arg_tensor + << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + v->mutate()->assign_inferred_type(e_type); + return; + } + + TypeExpr* arg_tensor = v->get_arg_list()->inferred_type; + TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); + TypeExpr* sym_type = fun_ref->full_type; + try { + unify(fun_type, sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type " + << fun_type->args[0] << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + + if (fun_ref->has_mutate_params()) { + tolk_assert(e_type->constr == TypeExpr::te_Tensor); + e_type = e_type->args[e_type->args.size() - 1]; + } + + v->mutate()->assign_inferred_type(e_type); + } + + void visit(V v) override { + parent::visit(v->get_obj()); + visit(v->get_arg_list()); + std::vector arg_types; + arg_types.reserve(1 + v->get_num_args()); + arg_types.push_back(v->get_obj()->inferred_type); + for (int i = 0; i < v->get_num_args(); ++i) { + arg_types.push_back(v->get_arg(i)->inferred_type); + } + + TypeExpr* arg_tensor = TypeExpr::new_tensor(std::move(arg_types)); + TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); + TypeExpr* sym_type = v->fun_ref->full_type; + try { + unify(fun_type, sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply function " << v->fun_ref->name << " : " << v->fun_ref->full_type << " to arguments of type " + << fun_type->args[0] << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + + if (v->fun_ref->has_mutate_params()) { + tolk_assert(e_type->constr == TypeExpr::te_Tensor); + e_type = e_type->args[e_type->args.size() - 1]; + } + if (v->fun_ref->does_return_self()) { + e_type = v->get_obj()->inferred_type; + TypeExpr::remove_indirect(e_type); + } + + v->mutate()->assign_inferred_type(e_type); + } + + void visit(V v) override { + v->mutate()->assign_inferred_type(TypeExpr::new_hole()); + } + + void visit(V v) override { + parent::visit(v->get_rhs()); + if (!expect_integer(v->get_rhs())) { + v->error("operator `" + static_cast(v->operator_name) + "` expects integer operand"); + } + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } + + void visit(V v) override { + parent::visit(v->get_lhs()); + parent::visit(v->get_rhs()); + switch (v->tok) { + case tok_assign: { + TypeExpr* lhs_type = v->get_lhs()->inferred_type; + TypeExpr* rhs_type = v->get_rhs()->inferred_type; + try { + unify(lhs_type, rhs_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot assign an expression of type " << rhs_type << " to a variable or pattern of type " + << lhs_type << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = lhs_type; + TypeExpr::remove_indirect(e_type); + v->mutate()->assign_inferred_type(e_type); + break; + } + case tok_eq: + case tok_neq: + case tok_spaceship: { + if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { + v->error("comparison operators `== !=` can compare only integers"); + } + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + break; + } + case tok_logical_and: + case tok_logical_or: { + if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { + v->error("logical operators `&& ||` expect integer operands"); + } + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + break; + } + default: + if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { + v->error("operator `" + static_cast(v->operator_name) + "` expects integer operands"); + } + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } + } + + void visit(V v) override { + parent::visit(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of ternary ?: operator must be an integer"); + } + parent::visit(v->get_when_true()); + parent::visit(v->get_when_false()); + + TypeExpr* res = TypeExpr::new_hole(); + TypeExpr *ttrue = v->get_when_true()->inferred_type; + TypeExpr *tfals = v->get_when_false()->inferred_type; + unify(res, ttrue); + unify(res, tfals); + v->mutate()->assign_inferred_type(res); + } + + void visit(V v) override { + parent::visit(v->get_cond()); + parent::visit(v->get_if_body()); + parent::visit(v->get_else_body()); + TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* cond_type = v->get_cond()->inferred_type; + try { + + unify(cond_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`if` condition value of type " << cond_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + v->get_cond()->mutate()->assign_inferred_type(cond_type); + } + + void visit(V v) override { + parent::visit(v->get_cond()); + parent::visit(v->get_body()); + TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* cond_type = v->get_cond()->inferred_type; + try { + unify(cond_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "repeat count value of type " << cond_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + v->get_cond()->mutate()->assign_inferred_type(cond_type); + } + + void visit(V v) override { + parent::visit(v->get_cond()); + parent::visit(v->get_body()); + TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* cond_type = v->get_cond()->inferred_type; + try { + unify(cond_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`while` condition value of type " << cond_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + v->get_cond()->mutate()->assign_inferred_type(cond_type); + } + + void visit(V v) override { + parent::visit(v->get_body()); + parent::visit(v->get_cond()); + TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* cond_type = v->get_cond()->inferred_type; + try { + unify(cond_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`while` condition value of type " << cond_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + v->get_cond()->mutate()->assign_inferred_type(cond_type); + } + + void visit(V v) override { + parent::visit(v->get_return_value()); + if (current_function->does_return_self()) { + if (!is_expr_valid_as_return_self(v->get_return_value())) { + v->error("invalid return from `self` function"); + } + return; + } + TypeExpr* expr_type = v->get_return_value()->inferred_type; + TypeExpr* ret_type = current_function->full_type; + if (ret_type->constr == TypeExpr::te_ForAll) { + ret_type = ret_type->args[0]; + } + tolk_assert(ret_type->constr == TypeExpr::te_Map); + ret_type = ret_type->args[1]; + if (current_function->has_mutate_params()) { + tolk_assert(ret_type->constr == TypeExpr::te_Tensor); + ret_type = ret_type->args[ret_type->args.size() - 1]; + } + try { + unify(expr_type, ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << ret_type + << " cannot be unified with return statement expression type " << expr_type << ": " << ue; + v->error(os.str()); + } + } + + void visit(V v) override { + if (v->var_maybe) { // not underscore + if (const auto* var_ref = v->var_maybe->try_as()) { + v->mutate()->assign_inferred_type(var_ref->declared_type); + } else if (const auto* glob_ref = v->var_maybe->try_as()) { + v->mutate()->assign_inferred_type(glob_ref->declared_type); + } else { + tolk_assert(0); + } + } else if (v->declared_type) { // underscore with type + v->mutate()->assign_inferred_type(v->declared_type); + } else { // just underscore + v->mutate()->assign_inferred_type(TypeExpr::new_hole()); + } + v->get_identifier()->mutate()->assign_inferred_type(v->inferred_type); + } + + void visit(V v) override { + parent::visit(v->get_lhs()); + parent::visit(v->get_assigned_val()); + TypeExpr* lhs = v->get_lhs()->inferred_type; + TypeExpr* rhs = v->get_assigned_val()->inferred_type; + try { + unify(lhs, rhs); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot assign an expression of type " << rhs << " to a variable or pattern of type " << lhs << ": " << ue; + v->error(os.str()); + } + } + + void visit(V v) override { + parent::visit(v->get_try_body()); + parent::visit(v->get_catch_expr()); + + TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); + tolk_assert(v->get_catch_expr()->size() == 2); + TypeExpr* type1 = v->get_catch_expr()->get_item(0)->inferred_type; + unify(type1, tvm_error_type->args[1]); + TypeExpr* type2 = v->get_catch_expr()->get_item(1)->inferred_type; + unify(type2, tvm_error_type->args[0]); + + parent::visit(v->get_catch_body()); + } + + void visit(V v) override { + parent::visit(v->get_thrown_code()); + if (!expect_integer(v->get_thrown_code())) { + v->get_thrown_code()->error("excNo of `throw` must be an integer"); + } + if (v->has_thrown_arg()) { + parent::visit(v->get_thrown_arg()); + } + } + + void visit(V v) override { + parent::visit(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `assert` must be an integer"); + } + parent::visit(v->get_thrown_code()); + } + +public: + void start_visiting_function(V v_function) override { + current_function = v_function->fun_ref; + parent::visit(v_function->get_body()); + if (current_function->is_implicit_return()) { + if (current_function->does_return_self()) { + throw ParseError(v_function->get_body()->as()->loc_end, "missing return; forgot `return self`?"); + } + TypeExpr* expr_type = TypeExpr::new_unit(); + TypeExpr* ret_type = current_function->full_type; + if (ret_type->constr == TypeExpr::te_ForAll) { + ret_type = ret_type->args[0]; + } + tolk_assert(ret_type->constr == TypeExpr::te_Map); + ret_type = ret_type->args[1]; + if (current_function->has_mutate_params()) { + ret_type = ret_type->args[ret_type->args.size() - 1]; + } + try { + unify(expr_type, ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "implicit function return type " << expr_type + << " cannot be unified with inferred return type " << ret_type << ": " << ue; + v_function->error(os.str()); + } + } + } +}; + +void pipeline_infer_and_check_types(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-refine-lvalue-for-mutate.cpp b/tolk/pipe-refine-lvalue-for-mutate.cpp new file mode 100644 index 00000000..c4c31b51 --- /dev/null +++ b/tolk/pipe-refine-lvalue-for-mutate.cpp @@ -0,0 +1,118 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe refines rvalue/lvalue and checks `mutate` arguments validity. + * It happens after type inferring (after methods binding), because it uses fun_ref of calls. + * + * Example: `a.increment().increment()`, the first `a.increment()` becomes lvalue (assume that increment mutates self). + * Example: `increment(a)` is invalid, should be `increment(mutate a)`. + * + * Note, that explicitly specifying `mutate` for arguments, like `increment(mutate a)` is on purpose. + * If we wished `increment(a)` to be valid (to work and mutate `a`, like passing by ref), it would also be done here, + * refining `a` to be lvalue. But to avoid unexpected mutations, `mutate` keyword for an argument is required. + * So, for mutated arguments, instead of setting lvalue, we check its presence. + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_invalid_mutate_arg_passed(AnyV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { + std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as()->name : "obj"); + + // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` + if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && p_sym.idx == 0 && fun_ref->does_accept_self()) { + v->error("`" + fun_ref->name + "` is a mutating method; consider calling `" + arg_str + "." + fun_ref->name + "()`, not `" + fun_ref->name + "(" + arg_str + ")`"); + } + // case: `cs.mutating_function()`; suggest: `mutating_function(mutate cs)` or make it a method + if (p_sym.is_mutate_parameter() && called_as_method && p_sym.idx == 0 && !fun_ref->does_accept_self()) { + v->error("function `" + fun_ref->name + "` mutates parameter `" + p_sym.name + "`; consider calling `" + fun_ref->name + "(mutate " + arg_str + ")`, not `" + arg_str + "." + fun_ref->name + "`(); alternatively, rename parameter to `self` to make it a method"); + } + // case: `mutating_function(arg)`; suggest: `mutate arg` + if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate) { + v->error("function `" + fun_ref->name + "` mutates parameter `" + p_sym.name + "`; you need to specify `mutate` when passing an argument, like `mutate " + arg_str + "`"); + } + // case: `usual_function(mutate arg)` + if (!p_sym.is_mutate_parameter() && arg_passed_as_mutate) { + v->error("incorrect `mutate`, since `" + fun_ref->name + "` does not mutate this parameter"); + } + throw Fatal("unreachable"); +} + + +class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBody { + void visit(V v) override { + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + const FunctionData* fun_ref = v->fun_maybe; + if (!fun_ref) { + parent::visit(v); + for (int i = 0; i < v->get_num_args(); ++i) { + auto v_arg = v->get_arg(i); + if (v_arg->passed_as_mutate) { + v_arg->error("`mutate` used for non-mutate argument"); + } + } + return; + } + + tolk_assert(static_cast(fun_ref->parameters.size()) == v->get_num_args()); + + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& p_sym = fun_ref->parameters[i]; + auto arg_i = v->get_arg(i); + if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) { + fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr()); + } + parent::visit(arg_i); + } + } + + void visit(V v) override { + parent::visit(v); + + const FunctionData* fun_ref = v->fun_ref; + tolk_assert(static_cast(fun_ref->parameters.size()) == 1 + v->get_num_args()); + + if (fun_ref->does_mutate_self()) { + bool will_be_extracted_as_tmp_var = v->get_obj()->type == ast_function_call || v->get_obj()->type == ast_dot_method_call; + if (!will_be_extracted_as_tmp_var) { + v->get_obj()->mutate()->assign_lvalue_true(); + } + } + + if (!fun_ref->does_accept_self() && fun_ref->parameters[0].is_mutate_parameter()) { + fire_error_invalid_mutate_arg_passed(v, fun_ref, fun_ref->parameters[0], true, false, v->get_obj()); + } + + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& p_sym = fun_ref->parameters[1 + i]; + auto arg_i = v->get_arg(i); + if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) { + fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr()); + } + } + } +}; + +void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index 569d434a..478bc727 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -13,65 +13,50 @@ You should have received a copy of the GNU General Public License along with TON Blockchain. If not, see . - - In addition, as a special exception, the copyright holders give permission - to link the code of portions of this program with the OpenSSL library. - You must obey the GNU General Public License in all respects for all - of the code used other than OpenSSL. If you modify file(s) with this - exception, you may extend this exception to your version of the file(s), - but you are not obligated to do so. If you do not wish to do so, delete this - exception statement from your version. If you delete this exception statement - from all source files in the program, then also delete it here. */ #include "tolk.h" #include "platform-utils.h" #include "src-file.h" #include "ast.h" #include "compiler-state.h" +#include "constant-evaluator.h" #include "td/utils/crypto.h" #include +/* + * This pipe registers global symbols: functions, constants, global vars, etc. + * It happens just after all files have been parsed to AST. + * + * "Registering" means adding symbols to a global symbol table. + * After this pass, any global symbol can be looked up. + * Note, that local variables are not analyzed here, it's a later step. + * Before digging into locals, we need a global symtable to be filled, exactly done here. + */ + namespace tolk { -Expr* process_expr(AnyV v, CodeBlob& code); - -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_redefinition_of_symbol(V v_ident, SymDef* existing) { - if (existing->loc.is_stdlib()) { - v_ident->error("redefinition of a symbol from stdlib"); - } else if (existing->loc.is_defined()) { - v_ident->error("redefinition of symbol, previous was at: " + existing->loc.to_string()); - } else { - v_ident->error("redefinition of built-in symbol"); - } -} - -static int calc_sym_idx(std::string_view sym_name) { - return G.symbols.lookup_add(sym_name); -} - -static td::RefInt256 calculate_method_id_for_entrypoint(std::string_view func_name) { +static int calculate_method_id_for_entrypoint(std::string_view func_name) { if (func_name == "main" || func_name == "onInternalMessage") { - return td::make_refint(0); + return 0; } if (func_name == "onExternalMessage") { - return td::make_refint(-1); + return -1; } if (func_name == "onRunTickTock") { - return td::make_refint(-2); + return -2; } if (func_name == "onSplitPrepare") { - return td::make_refint(-3); + return -3; } if (func_name == "onSplitInstall") { - return td::make_refint(-4); + return -4; } tolk_assert(false); } -static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { +static int calculate_method_id_by_func_name(std::string_view func_name) { unsigned int crc = td::crc16(static_cast(func_name)); - return td::make_refint((crc & 0xffff) | 0x10000); + return static_cast(crc & 0xffff) | 0x10000; } static void calc_arg_ret_order_of_asm_function(V v_body, V param_list, TypeExpr* ret_type, @@ -89,7 +74,7 @@ static void calc_arg_ret_order_of_asm_function(V v_body, V v_param = param_list->get_param(i); - int arg_width = v_param->param_type->get_width(); + int arg_width = v_param->declared_type->get_width(); if (arg_width < 0 || arg_width > 16) { v_param->error("parameters of an assembler built-in function must have a well-defined fixed width"); } @@ -130,102 +115,39 @@ static void calc_arg_ret_order_of_asm_function(V v_body, V v) { - AnyV init_value = v->get_init_value(); - SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); - if (sym_def->value) { - fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); - } + ConstantValue init_value = eval_const_init_value(v->get_init_value()); + GlobalConstData* c_sym = new GlobalConstData(static_cast(v->get_identifier()->name), v->loc, std::move(init_value)); - // todo currently, constant value calculation is dirty and roughly: init_value is evaluated to fif code - // and waited to be a single expression - // although it works, of course it should be later rewritten using AST calculations, as well as lots of other parts - CodeBlob code("tmp", v->loc, nullptr, nullptr); - Expr* x = process_expr(init_value, code); - if (!x->is_rvalue()) { - v->get_init_value()->error("expression is not strictly Rvalue"); - } - if (v->declared_type && !v->declared_type->equals_to(x->e_type)) { + if (v->declared_type && !v->declared_type->equals_to(c_sym->inferred_type)) { v->error("expression type does not match declared type"); } - SymValConst* sym_val = nullptr; - if (x->cls == Expr::_Const) { // Integer constant - sym_val = new SymValConst(static_cast(G.all_constants.size()), x->intval); - } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) - sym_val = new SymValConst(static_cast(G.all_constants.size()), x->strval); - } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) - code.emplace_back(v->loc, Op::_Import, std::vector()); - auto tmp_vars = x->pre_compile(code); - code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); - code.emplace_back(v->loc, Op::_Nop); - // It is REQUIRED to execute "optimizations" as in tolk.cpp - code.simplify_var_types(); - code.prune_unreachable_code(); - code.split_vars(true); - for (int i = 0; i < 16; i++) { - code.compute_used_code_vars(); - code.fwd_analyze(); - code.prune_unreachable_code(); - } - code.mark_noreturn(); - AsmOpList out_list(0, &code.vars); - code.generate_code(out_list); - if (out_list.list_.size() != 1) { - init_value->error("precompiled expression must result in single operation"); - } - auto op = out_list.list_[0]; - if (!op.is_const()) { - init_value->error("precompiled expression must result in compilation time constant"); - } - if (op.origin.is_null() || !op.origin->is_valid()) { - init_value->error("precompiled expression did not result in a valid integer constant"); - } - sym_val = new SymValConst(static_cast(G.all_constants.size()), op.origin); - } else { - init_value->error("integer or slice literal or constant expected"); - } - sym_def->value = sym_val; -#ifdef TOLK_DEBUG - sym_def->value->sym_name = v->get_identifier()->name; -#endif - G.all_constants.push_back(sym_def); + G.symtable.add_global_const(c_sym); + G.all_constants.push_back(c_sym); + v->mutate()->assign_const_ref(c_sym); } static void register_global_var(V v) { - SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); - if (sym_def->value) { - fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); - } + GlobalVarData* g_sym = new GlobalVarData(static_cast(v->get_identifier()->name), v->loc, v->declared_type); - sym_def->value = new SymValGlobVar(static_cast(G.all_global_vars.size()), v->declared_type); -#ifdef TOLK_DEBUG - sym_def->value->sym_name = v->get_identifier()->name; -#endif - G.all_global_vars.push_back(sym_def); + G.symtable.add_global_var(g_sym); + G.all_global_vars.push_back(g_sym); + v->mutate()->assign_var_ref(g_sym); } -static SymDef* register_parameter(V v, int idx) { +static LocalVarData register_parameter(V v, int idx) { if (v->is_underscore()) { - return nullptr; - } - SymDef* sym_def = define_parameter(calc_sym_idx(v->get_identifier()->name), v->loc); - if (sym_def->value) { - // todo always false now, how to detect similar parameter names? (remember about underscore) - v->error("redefined parameter"); + return {"", v->loc, idx, v->declared_type}; } - SymValVariable* sym_val = new SymValVariable(idx, v->param_type); + LocalVarData p_sym(static_cast(v->param_name), v->loc, idx, v->declared_type); if (v->declared_as_mutate) { - sym_val->flags |= SymValVariable::flagMutateParameter; + p_sym.flags |= LocalVarData::flagMutateParameter; } - if (!v->declared_as_mutate && idx == 0 && v->get_identifier()->name == "self") { - sym_val->flags |= SymValVariable::flagImmutable; + if (!v->declared_as_mutate && idx == 0 && v->param_name == "self") { + p_sym.flags |= LocalVarData::flagImmutable; } - sym_def->value = sym_val; -#ifdef TOLK_DEBUG - sym_def->value->sym_name = v->get_identifier()->name; -#endif - return sym_def; + return p_sym; } static void register_function(V v) { @@ -235,16 +157,16 @@ static void register_function(V v) { TypeExpr* params_tensor_type = nullptr; int n_params = v->get_num_params(); int n_mutate_params = 0; - std::vector parameters_syms; + std::vector parameters; if (n_params) { std::vector param_tensor_items; param_tensor_items.reserve(n_params); - parameters_syms.reserve(n_params); + parameters.reserve(n_params); for (int i = 0; i < n_params; ++i) { auto v_param = v->get_param(i); n_mutate_params += static_cast(v_param->declared_as_mutate); - param_tensor_items.emplace_back(v_param->param_type); - parameters_syms.emplace_back(register_parameter(v_param, i)); + param_tensor_items.emplace_back(v_param->declared_type); + parameters.emplace_back(register_parameter(v_param, i)); } params_tensor_type = TypeExpr::new_tensor(std::move(param_tensor_items)); } else { @@ -261,24 +183,20 @@ static void register_function(V v) { function_type = TypeExpr::new_forall(std::move(type_vars), function_type); } if (v->marked_as_builtin) { - const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); - const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; - if (!func_val || !func_val->is_builtin()) { + const Symbol* builtin_func = lookup_global_symbol(func_name); + const FunctionData* func_val = builtin_func ? builtin_func->as() : nullptr; + if (!func_val || !func_val->is_builtin_function()) { v->error("`builtin` used for non-builtin function"); } #ifdef TOLK_DEBUG // in release, we don't need this check, since `builtin` is used only in stdlib, which is our responsibility - if (!func_val->sym_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { + if (!func_val->full_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { v->error("declaration for `builtin` function doesn't match an actual one"); } #endif return; } - SymDef* sym_def = define_global_symbol(calc_sym_idx(func_name), v->loc); - if (sym_def->value) { - fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); - } if (G.is_verbosity(1)) { std::cerr << "fun " << func_name << " : " << function_type << std::endl; } @@ -286,67 +204,61 @@ static void register_function(V v) { v->error("a pure function should return something, otherwise it will be optimized out anyway"); } - SymValFunc* sym_val = nullptr; - if (const auto* v_seq = v->get_body()->try_as()) { - sym_val = new SymValCodeFunc(std::move(parameters_syms), static_cast(G.all_code_functions.size()), function_type); - } else if (const auto* v_asm = v->get_body()->try_as()) { - std::vector arg_order, ret_order; - calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, arg_order, ret_order); - sym_val = new SymValAsmFunc(std::move(parameters_syms), function_type, std::move(arg_order), std::move(ret_order), 0); - } else { - v->error("Unexpected function body statement"); + FunctionBody f_body = v->get_body()->type == ast_sequence ? static_cast(new FunctionBodyCode) : static_cast(new FunctionBodyAsm); + FunctionData* f_sym = new FunctionData(static_cast(func_name), v->loc, function_type, std::move(parameters), 0, f_body); + + if (const auto* v_asm = v->get_body()->try_as()) { + calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, f_sym->arg_order, f_sym->ret_order); } if (v->method_id) { - sym_val->method_id = td::string_to_int256(static_cast(v->method_id->int_val)); - if (sym_val->method_id.is_null()) { + if (v->method_id->intval.is_null() || !v->method_id->intval->signed_fits_bits(32)) { v->method_id->error("invalid integer constant"); } + f_sym->method_id = static_cast(v->method_id->intval->to_long()); } else if (v->marked_as_get_method) { - sym_val->method_id = calculate_method_id_by_func_name(func_name); - for (const SymDef* other : G.all_get_methods) { - if (!td::cmp(dynamic_cast(other->value)->method_id, sym_val->method_id)) { - v->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" << static_cast(func_name) << "` produce the same hash. Consider renaming one of these functions."); + f_sym->method_id = calculate_method_id_by_func_name(func_name); + for (const FunctionData* other : G.all_get_methods) { + if (other->method_id == f_sym->method_id) { + v->error(PSTRING() << "GET methods hash collision: `" << other->name << "` and `" << f_sym->name << "` produce the same hash. Consider renaming one of these functions."); } } } else if (v->is_entrypoint) { - sym_val->method_id = calculate_method_id_for_entrypoint(func_name); + f_sym->method_id = calculate_method_id_for_entrypoint(func_name); } if (v->marked_as_pure) { - sym_val->flags |= SymValFunc::flagMarkedAsPure; + f_sym->flags |= FunctionData::flagMarkedAsPure; } if (v->marked_as_inline) { - sym_val->flags |= SymValFunc::flagInline; + f_sym->flags |= FunctionData::flagInline; } if (v->marked_as_inline_ref) { - sym_val->flags |= SymValFunc::flagInlineRef; + f_sym->flags |= FunctionData::flagInlineRef; } if (v->marked_as_get_method) { - sym_val->flags |= SymValFunc::flagGetMethod; + f_sym->flags |= FunctionData::flagGetMethod; } if (v->is_entrypoint) { - sym_val->flags |= SymValFunc::flagIsEntrypoint; + f_sym->flags |= FunctionData::flagIsEntrypoint; } if (n_mutate_params) { - sym_val->flags |= SymValFunc::flagHasMutateParams; + f_sym->flags |= FunctionData::flagHasMutateParams; } if (v->accepts_self) { - sym_val->flags |= SymValFunc::flagAcceptsSelf; + f_sym->flags |= FunctionData::flagAcceptsSelf; } if (v->returns_self) { - sym_val->flags |= SymValFunc::flagReturnsSelf; + f_sym->flags |= FunctionData::flagReturnsSelf; } - sym_def->value = sym_val; -#ifdef TOLK_DEBUG - sym_def->value->sym_name = func_name; -#endif - if (dynamic_cast(sym_val)) { - G.all_code_functions.push_back(sym_def); + G.symtable.add_function(f_sym); + if (f_sym->is_regular_function()) { + G.all_code_functions.push_back(f_sym); } - if (sym_val->is_get_method()) { - G.all_get_methods.push_back(sym_def); + if (f_sym->is_get_method()) { + G.all_get_methods.push_back(f_sym); } + v->mutate()->assign_fun_ref(f_sym); } static void iterate_through_file_symbols(const SrcFile* file) { diff --git a/tolk/pipe-resolve-symbols.cpp b/tolk/pipe-resolve-symbols.cpp new file mode 100644 index 00000000..31d25229 --- /dev/null +++ b/tolk/pipe-resolve-symbols.cpp @@ -0,0 +1,272 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "platform-utils.h" +#include "src-file.h" +#include "ast.h" +#include "ast-visitor.h" +#include "compiler-state.h" +#include + +/* + * This pipe resolves identifiers (local variables) in all functions bodies. + * It happens before type inferring, but after all global symbols are registered. + * It means, that for any symbol `x` we can look up whether it's a global name or not. + * + * Example: `var x = 10; x = 20;` both `x` point to one LocalVarData. + * Example: `x = 20` undefined symbol `x` is also here (unless it's a global) + * Variables scoping and redeclaration are also here. + * + * As a result of this step, every V::sym is filled, pointing either to a local var/parameter, + * or to a global var / constant / function. + */ + +namespace tolk { + +static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { + SrcLocation sym_loc = used_sym->loc; + if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { + const SrcFile* declared_in = sym_loc.get_src_file(); + bool has_import = false; + for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { + if (import_stmt.imported_file == declared_in) { + has_import = true; + } + } + if (!has_import) { + v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + } + } +} + +struct NameAndScopeResolver { + std::vector> scopes; + + static uint64_t key_hash(std::string_view name_key) { + return std::hash{}(name_key); + } + + void open_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "open_scope " << scopes.size() + 1 << " at " << loc << std::endl; + scopes.emplace_back(); + } + + void close_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "close_scope " << scopes.size() << " at " << loc << std::endl; + if (UNLIKELY(scopes.empty())) { + throw Fatal{"cannot close the outer scope"}; + } + scopes.pop_back(); + } + + const Symbol* lookup_symbol(std::string_view name) const { + uint64_t key = key_hash(name); + for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) { // NOLINT(*-loop-convert) + const auto& scope = *it; + if (auto it_sym = scope.find(key); it_sym != scope.end()) { + return it_sym->second; + } + } + return G.symtable.lookup(name); + } + + const Symbol* add_local_var(const LocalVarData* v_sym) { + if (UNLIKELY(scopes.empty())) { + throw Fatal("unexpected scope_level = 0"); + } + if (v_sym->name.empty()) { // underscore + return v_sym; + } + + uint64_t key = key_hash(v_sym->name); + const auto& [_, inserted] = scopes.rbegin()->emplace(key, v_sym); + if (UNLIKELY(!inserted)) { + throw ParseError(v_sym->loc, "redeclaration of local variable `" + v_sym->name + "`"); + } + return v_sym; + } +}; + + +class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { + // more correctly this field shouldn't be static, but currently there is no need to make it a part of state + static NameAndScopeResolver current_scope; + + static const Symbol* create_local_var_sym(std::string_view name, SrcLocation loc, TypeExpr* var_type, bool immutable) { + LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, -1, var_type); + if (immutable) { + v_sym->flags |= LocalVarData::flagImmutable; + } + return current_scope.add_local_var(v_sym); + } + + static void process_catch_variable(AnyV catch_var) { + if (auto v_ident = catch_var->try_as()) { + const Symbol* sym = create_local_var_sym(v_ident->name, catch_var->loc, TypeExpr::new_hole(), true); + v_ident->mutate()->assign_sym(sym); + } + } + + static void process_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { + int delta_self = lhs_of_dot_call ? 1 : 0; + int n_arguments = static_cast(v->get_arguments().size()) + delta_self; + int n_parameters = static_cast(fun_ref->parameters.size()); + + // Tolk doesn't have optional parameters currently, so just compare counts + if (n_parameters < n_arguments) { + v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + if (n_arguments < n_parameters) { + v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + } + + void visit(V v) override { + if (v->marked_as_redef) { + auto v_ident = v->get_identifier()->as(); + const Symbol* sym = current_scope.lookup_symbol(v_ident->name); + if (sym == nullptr) { + v->error("`redef` for unknown variable"); + } + if (!sym->try_as() && !sym->try_as()) { + v->error("`redef` for unknown variable"); + } + v->mutate()->assign_var_ref(sym); + v_ident->mutate()->assign_sym(sym); + } else if (auto v_ident = v->get_identifier()->try_as()) { + TypeExpr* var_type = v->declared_type ? v->declared_type : TypeExpr::new_hole(); + const Symbol* sym = create_local_var_sym(v_ident->name, v->loc, var_type, v->is_immutable); + v->mutate()->assign_var_ref(sym); + v_ident->mutate()->assign_sym(sym); + } else { + // underscore, do nothing, v->sym remains nullptr + } + } + + void visit(V v) override { + parent::visit(v->get_assigned_val()); + parent::visit(v->get_lhs()); + } + + void visit(V v) override { + const Symbol* sym = current_scope.lookup_symbol(v->name); + if (!sym) { + v->error("undefined symbol `" + static_cast(v->name) + "`"); + } + v->mutate()->assign_sym(sym); + + // for global functions, global vars and constants, `import` must exist + if (!sym->try_as()) { + check_import_exists_when_using_sym(v, sym); + } + } + + void visit(V v) override { + parent::visit(v->get_called_f()); + parent::visit(v->get_arg_list()); + + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + // for such corner cases, sym remains nullptr + if (auto v_ident = v->get_called_f()->try_as()) { + if (const auto* fun_ref = v_ident->sym->try_as()) { + v->mutate()->assign_fun_ref(fun_ref); + process_function_arguments(fun_ref, v->get_arg_list(), nullptr); + } + } + // for `some_var(args)`, if it's called with wrong arguments count, the error is not here + // it will be fired later, it's a type checking error + } + + void visit(V v) override { + const Symbol* sym = lookup_global_symbol(v->method_name); + if (!sym) { + v->error("undefined symbol `" + static_cast(v->method_name) + "`"); + } + const auto* fun_ref = sym->try_as(); + if (!fun_ref) { + v->error("`" + static_cast(v->method_name) + "` is not a method"); + } + + if (fun_ref->parameters.empty()) { + v->error("`" + static_cast(v->method_name) + "` has no parameters and can not be called as method"); + } + + v->mutate()->assign_fun_ref(fun_ref); + parent::visit(v); + process_function_arguments(fun_ref, v->get_arg_list(), v->get_obj()); + } + + void visit(V v) override { + const Symbol* sym = current_scope.lookup_symbol("self"); + if (!sym) { + v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + } + v->mutate()->assign_param_ref(sym->as()); + } + + void visit(V v) override { + if (v->empty()) { + return; + } + current_scope.open_scope(v->loc); + parent::visit(v); + current_scope.close_scope(v->loc_end); + } + + void visit(V v) override { + current_scope.open_scope(v->loc); + parent::visit(v->get_body()); + parent::visit(v->get_cond()); // in 'while' condition it's ok to use variables declared inside do + current_scope.close_scope(v->get_body()->loc_end); + } + + void visit(V v) override { + visit(v->get_try_body()); + current_scope.open_scope(v->get_catch_body()->loc); + const std::vector& catch_items = v->get_catch_expr()->get_items(); + tolk_assert(catch_items.size() == 2); + process_catch_variable(catch_items[1]); + process_catch_variable(catch_items[0]); + parent::visit(v->get_catch_body()); + current_scope.close_scope(v->get_catch_body()->loc_end); + } + +public: + void start_visiting_function(V v_function) override { + auto v_seq = v_function->get_body()->try_as(); + tolk_assert(v_seq != nullptr); + + current_scope.open_scope(v_function->loc); + + for (int i = 0; i < v_function->get_num_params(); ++i) { + current_scope.add_local_var(&v_function->fun_ref->parameters[i]); + v_function->get_param(i)->mutate()->assign_param_ref(&v_function->fun_ref->parameters[i]); + } + parent::visit(v_seq); + + current_scope.close_scope(v_seq->loc_end); + tolk_assert(current_scope.scopes.empty()); + } +}; + +NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; + +void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipeline.h b/tolk/pipeline.h index fdfd2b99..5c0fe656 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -33,6 +33,14 @@ namespace tolk { AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); void pipeline_register_global_symbols(const AllSrcFiles&); +void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles&); +void pipeline_calculate_rvalue_lvalue(const AllSrcFiles&); +void pipeline_detect_unreachable_statements(const AllSrcFiles&); +void pipeline_infer_and_check_types(const AllSrcFiles&); +void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles&); +void pipeline_check_rvalue_lvalue(const AllSrcFiles&); +void pipeline_check_pure_impure_operations(const AllSrcFiles&); +void pipeline_constant_folding(const AllSrcFiles&); void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&); void pipeline_find_unused_symbols(); diff --git a/tolk/src-file.h b/tolk/src-file.h index 815dccbe..9fbbfbb4 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -18,11 +18,10 @@ #include #include +#include "fwd-declarations.h" namespace tolk { -struct ASTNodeBase; - struct SrcFile { struct SrcPosition { int offset; @@ -39,7 +38,7 @@ struct SrcFile { std::string rel_filename; // relative to cwd std::string abs_filename; // absolute from root std::string text; // file contents loaded into memory, every Token::str_val points inside it - const ASTNodeBase* ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here + AnyV ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here std::vector imports; // to check strictness (can't use a symbol without importing its file) SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text) diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index abaeb084..dc715031 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -16,154 +16,85 @@ */ #include "symtable.h" #include "compiler-state.h" +#include "platform-utils.h" #include #include namespace tolk { +bool FunctionData::does_need_codegen() const { + // when a function is declared, but not referenced from code in any way, don't generate its body + if (!is_really_used() && G.settings.remove_unused_functions) { + return false; + } + // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist + if (is_used_as_noncall()) { + return true; + } + // currently, there is no inlining, all functions are codegenerated + // (but actually, unused ones are later removed by Fift) + // in the future, we may want to implement a true AST inlining for "simple" functions + return true; +} -std::string Symbol::unknown_symbol_name(sym_idx_t i) { - if (!i) { - return "_"; - } else { - std::ostringstream os; - os << "SYM#" << i; - return os.str(); +void FunctionData::assign_is_really_used() { + this->flags |= flagReallyUsed; +} + +void FunctionData::assign_is_used_as_noncall() { + this->flags |= flagUsedAsNonCall; +} + +void FunctionData::assign_is_implicit_return() { + this->flags |= flagImplicitReturn; +} + +void GlobalVarData::assign_is_really_used() { + this->flags |= flagReallyUsed; +} + +void LocalVarData::assign_idx(int idx) { + this->idx = idx; +} + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_redefinition_of_symbol(SrcLocation loc, const Symbol* previous) { + SrcLocation prev_loc = previous->loc; + if (prev_loc.is_stdlib()) { + throw ParseError(loc, "redefinition of a symbol from stdlib"); + } + if (prev_loc.is_defined()) { + throw ParseError(loc, "redefinition of symbol, previous was at: " + prev_loc.to_string()); + } + throw ParseError(loc, "redefinition of built-in symbol"); +} + +void GlobalSymbolTable::add_function(const FunctionData* f_sym) { + auto key = key_hash(f_sym->name); + auto [it, inserted] = entries.emplace(key, f_sym); + if (!inserted) { + fire_error_redefinition_of_symbol(f_sym->loc, it->second); } } -sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) { - unsigned long long h1 = 1, h2 = 1; - for (char c : str) { - h1 = ((h1 * 239) + (unsigned char)(c)) % SIZE_PRIME; - h2 = ((h2 * 17) + (unsigned char)(c)) % (SIZE_PRIME - 1); - } - ++h2; - ++h1; - while (true) { - if (sym[h1]) { - if (sym[h1]->str == str) { - return (mode & 2) ? not_found : sym_idx_t(h1); - } - h1 += h2; - if (h1 > SIZE_PRIME) { - h1 -= SIZE_PRIME; - } - } else { - if (!(mode & 1)) { - return not_found; - } - if (def_sym >= ((long long)SIZE_PRIME * 3) / 4) { - throw SymTableOverflow{def_sym}; - } - sym[h1] = std::make_unique(static_cast(str), idx <= 0 ? sym_idx_t(h1) : -idx); - ++def_sym; - return sym_idx_t(h1); - } +void GlobalSymbolTable::add_global_var(const GlobalVarData* g_sym) { + auto key = key_hash(g_sym->name); + auto [it, inserted] = entries.emplace(key, g_sym); + if (!inserted) { + fire_error_redefinition_of_symbol(g_sym->loc, it->second); } } -std::string SymDef::name() const { - return G.symbols.get_name(sym_idx); +void GlobalSymbolTable::add_global_const(const GlobalConstData* c_sym) { + auto key = key_hash(c_sym->name); + auto [it, inserted] = entries.emplace(key, c_sym); + if (!inserted) { + fire_error_redefinition_of_symbol(c_sym->loc, it->second); + } } -void open_scope(SrcLocation loc) { - ++G.scope_level; - G.scope_opened_at.push_back(loc); -} - -void close_scope() { - if (!G.scope_level) { - throw Fatal{"cannot close the outer scope"}; - } - while (!G.symbol_stack.empty() && G.symbol_stack.back().first == G.scope_level) { - SymDef old_def = G.symbol_stack.back().second; - auto idx = old_def.sym_idx; - G.symbol_stack.pop_back(); - SymDef* cur_def = G.sym_def[idx]; - assert(cur_def); - assert(cur_def->level == G.scope_level && cur_def->sym_idx == idx); - //std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl; - if (cur_def->value) { - //std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl; - delete cur_def->value; - } - if (!old_def.level && !old_def.value) { - delete cur_def; // ??? keep the definition always? - G.sym_def[idx] = nullptr; - } else { - cur_def->value = old_def.value; - cur_def->level = old_def.level; - } - old_def.value = nullptr; - } - --G.scope_level; - G.scope_opened_at.pop_back(); -} - -SymDef* lookup_symbol(sym_idx_t idx) { - if (!idx) { - return nullptr; - } - if (G.sym_def[idx]) { - return G.sym_def[idx]; - } - if (G.global_sym_def[idx]) { - return G.global_sym_def[idx]; - } - return nullptr; -} - -SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc) { - if (SymDef* found = G.global_sym_def[name_idx]) { - return found; // found->value is filled; it means, that a symbol is redefined - } - - SymDef* registered = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc); -#ifdef TOLK_DEBUG - registered->sym_name = registered->name(); -#endif - return registered; // registered->value is nullptr; it means, it's just created -} - -SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc) { - // note, that parameters (defined at function declaration) are not inserted into symtable - // their SymDef is registered to be inserted into SymValFunc::parameters - // (and later ->value is filled with SymValVariable) - - SymDef* registered = new SymDef(0, name_idx, loc); -#ifdef TOLK_DEBUG - registered->sym_name = registered->name(); -#endif - return registered; -} - -SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { - if (!name_idx) { - return nullptr; - } - if (!G.scope_level) { - throw Fatal("unexpected scope_level = 0"); - } - auto found = G.sym_def[name_idx]; - if (found) { - if (found->level < G.scope_level) { - G.symbol_stack.emplace_back(G.scope_level, *found); - found->level = G.scope_level; - } else if (found->value && force_new) { - return nullptr; - } - found->value = nullptr; - found->loc = loc; - return found; - } - found = G.sym_def[name_idx] = new SymDef(G.scope_level, name_idx, loc); - G.symbol_stack.emplace_back(G.scope_level, SymDef{0, name_idx, loc}); -#ifdef TOLK_DEBUG - found->sym_name = found->name(); - G.symbol_stack.back().second.sym_name = found->name(); -#endif - return found; +const Symbol* lookup_global_symbol(std::string_view name) { + return G.symtable.lookup(name); } } // namespace tolk diff --git a/tolk/symtable.h b/tolk/symtable.h index 69e2eaa8..53a5e52e 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -18,97 +18,194 @@ #include "src-file.h" #include "type-expr.h" -#include -#include +#include "constant-evaluator.h" +#include "crypto/common/refint.h" +#include +#include +#include namespace tolk { -typedef int var_idx_t; -typedef int sym_idx_t; - -enum class SymValKind { _Var, _Func, _GlobVar, _Const }; - -struct SymValBase { - SymValKind kind; - int idx; - TypeExpr* sym_type; -#ifdef TOLK_DEBUG - std::string sym_name; // seeing symbol name in debugger makes it much easier to delve into Tolk sources -#endif - - SymValBase(SymValKind kind, int idx, TypeExpr* sym_type) : kind(kind), idx(idx), sym_type(sym_type) { - } - virtual ~SymValBase() = default; - - TypeExpr* get_type() const { - return sym_type; - } -}; - - struct Symbol { - std::string str; - sym_idx_t idx; - - Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {} - - static std::string unknown_symbol_name(sym_idx_t i); -}; - -class SymTable { -public: - static constexpr int SIZE_PRIME = 100003; - -private: - sym_idx_t def_sym{0}; - std::unique_ptr sym[SIZE_PRIME + 1]; - sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0); - -public: - - static constexpr sym_idx_t not_found = 0; - sym_idx_t lookup(std::string_view str) { - return gen_lookup(str, 0); - } - sym_idx_t lookup_add(std::string_view str) { - return gen_lookup(str, 1); - } - Symbol* operator[](sym_idx_t i) const { - return sym[i].get(); - } - std::string get_name(sym_idx_t i) const { - return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i); - } -}; - -struct SymTableOverflow { - int sym_def; - explicit SymTableOverflow(int x) : sym_def(x) { - } -}; - - -struct SymDef { - int level; - sym_idx_t sym_idx; - SymValBase* value; + std::string name; SrcLocation loc; -#ifdef TOLK_DEBUG - std::string sym_name; -#endif - SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr) - : level(lvl), sym_idx(idx), value(val), loc(_loc) { + + Symbol(std::string name, SrcLocation loc) + : name(std::move(name)) + , loc(loc) { + } + + virtual ~Symbol() = default; + + template + const T* as() const { +#ifdef TOLK_DEBUG + assert(dynamic_cast(this) != nullptr); +#endif + return dynamic_cast(this); + } + + template + const T* try_as() const { + return dynamic_cast(this); } - std::string name() const; }; +struct LocalVarData final : Symbol { + enum { + flagMutateParameter = 1, // parameter was declared with `mutate` keyword + flagImmutable = 2, // variable was declared via `val` (not `var`) + }; -void open_scope(SrcLocation loc); -void close_scope(); -SymDef* lookup_symbol(sym_idx_t idx); + TypeExpr* declared_type; + int flags = 0; + int idx; -SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc = {}); -SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc); -SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc); + LocalVarData(std::string name, SrcLocation loc, int idx, TypeExpr* declared_type) + : Symbol(std::move(name), loc) + , declared_type(declared_type) + , idx(idx) { + } + + bool is_underscore() const { return name.empty(); } + bool is_immutable() const { return flags & flagImmutable; } + bool is_mutate_parameter() const { return flags & flagMutateParameter; } + + LocalVarData* mutate() const { return const_cast(this); } + void assign_idx(int idx); +}; + +struct FunctionBodyCode; +struct FunctionBodyAsm; +struct FunctionBodyBuiltin; + +typedef std::variant< + FunctionBodyCode*, + FunctionBodyAsm*, + FunctionBodyBuiltin* +> FunctionBody; + +struct FunctionData final : Symbol { + static constexpr int EMPTY_METHOD_ID = -10; + + enum { + flagInline = 1, // marked `@inline` + flagInlineRef = 2, // marked `@inline_ref` + flagReallyUsed = 4, // calculated via dfs from used functions; declared but unused functions are not codegenerated + flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) + flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out + flagImplicitReturn = 32, // control flow reaches end of function, so it needs implicit return at the end + flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned + flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc. + flagHasMutateParams = 256, // has parameters declared as `mutate` + flagAcceptsSelf = 512, // is a member function (has `self` first parameter) + flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable + }; + + int method_id = EMPTY_METHOD_ID; + int flags; + TypeExpr* full_type; // currently, TypeExpr::_Map, probably wrapped with forall + + std::vector parameters; + std::vector arg_order, ret_order; + + FunctionBody body; + + FunctionData(std::string name, SrcLocation loc, TypeExpr* full_type, std::vector parameters, int initial_flags, FunctionBody body) + : Symbol(std::move(name), loc) + , flags(initial_flags) + , full_type(full_type) + , parameters(std::move(parameters)) + , body(body) { + } + + const std::vector* get_arg_order() const { + return arg_order.empty() ? nullptr : &arg_order; + } + const std::vector* get_ret_order() const { + return ret_order.empty() ? nullptr : &ret_order; + } + + bool is_regular_function() const { return std::holds_alternative(body); } + bool is_asm_function() const { return std::holds_alternative(body); } + bool is_builtin_function() const { return std::holds_alternative(body); } + + bool is_inline() const { return flags & flagInline; } + bool is_inline_ref() const { return flags & flagInlineRef; } + bool is_really_used() const { return flags & flagReallyUsed; } + bool is_used_as_noncall() const { return flags & flagUsedAsNonCall; } + bool is_marked_as_pure() const { return flags & flagMarkedAsPure; } + bool is_implicit_return() const { return flags & flagImplicitReturn; } + bool is_get_method() const { return flags & flagGetMethod; } + bool is_method_id_not_empty() const { return method_id != EMPTY_METHOD_ID; } + bool is_entrypoint() const { return flags & flagIsEntrypoint; } + bool has_mutate_params() const { return flags & flagHasMutateParams; } + bool does_accept_self() const { return flags & flagAcceptsSelf; } + bool does_return_self() const { return flags & flagReturnsSelf; } + bool does_mutate_self() const { return (flags & flagAcceptsSelf) && parameters[0].is_mutate_parameter(); } + + bool does_need_codegen() const; + + FunctionData* mutate() const { return const_cast(this); } + void assign_is_really_used(); + void assign_is_used_as_noncall(); + void assign_is_implicit_return(); +}; + +struct GlobalVarData final : Symbol { + enum { + flagReallyUsed = 1, // calculated via dfs from used functions; unused globals are not codegenerated + }; + + TypeExpr* declared_type; + int flags = 0; + + GlobalVarData(std::string name, SrcLocation loc, TypeExpr* declared_type) + : Symbol(std::move(name), loc) + , declared_type(declared_type) { + } + + bool is_really_used() const { return flags & flagReallyUsed; } + + GlobalVarData* mutate() const { return const_cast(this); } + void assign_is_really_used(); +}; + +struct GlobalConstData final : Symbol { + ConstantValue value; + TypeExpr* inferred_type; + + GlobalConstData(std::string name, SrcLocation loc, ConstantValue&& value) + : Symbol(std::move(name), loc) + , value(std::move(value)) + , inferred_type(TypeExpr::new_atomic(this->value.is_int() ? TypeExpr::_Int : TypeExpr::_Slice)) { + } + + bool is_int_const() const { return value.is_int(); } + bool is_slice_const() const { return value.is_slice(); } + + td::RefInt256 as_int_const() const { return value.as_int(); } + const std::string& as_slice_const() const { return value.as_slice(); } +}; + +class GlobalSymbolTable { + std::unordered_map entries; + + static uint64_t key_hash(std::string_view name_key) { + return std::hash{}(name_key); + } + +public: + void add_function(const FunctionData* f_sym); + void add_global_var(const GlobalVarData* g_sym); + void add_global_const(const GlobalConstData* c_sym); + + const Symbol* lookup(std::string_view name) const { + const auto it = entries.find(key_hash(name)); + return it == entries.end() ? nullptr : it->second; + } +}; + +const Symbol* lookup_global_symbol(std::string_view name); } // namespace tolk diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 9268cc62..f2255ce3 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -54,6 +54,14 @@ int tolk_proceed(const std::string &entrypoint_filename) { AllSrcFiles all_files = pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); pipeline_register_global_symbols(all_files); + pipeline_resolve_identifiers_and_assign_symbols(all_files); + pipeline_calculate_rvalue_lvalue(all_files); + pipeline_detect_unreachable_statements(all_files); + pipeline_infer_and_check_types(all_files); + pipeline_refine_lvalue_for_mutate_arguments(all_files); + pipeline_check_rvalue_lvalue(all_files); + pipeline_check_pure_impure_operations(all_files); + pipeline_constant_folding(all_files); pipeline_convert_ast_to_legacy_Expr_Op(all_files); pipeline_find_unused_symbols(); diff --git a/tolk/tolk.h b/tolk/tolk.h index 971ca35d..0408648f 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -22,6 +22,7 @@ #include "symtable.h" #include "crypto/common/refint.h" #include "td/utils/Status.h" +#include #include #include #include @@ -64,22 +65,27 @@ void unify(TypeExpr*& te1, TypeExpr*& te2); * */ -using const_idx_t = int; +typedef int var_idx_t; +typedef int const_idx_t; struct TmpVar { TypeExpr* v_type; var_idx_t idx; - sym_idx_t sym_idx; + const LocalVarData* v_sym; // points to var defined in code; nullptr for implicitly created tmp vars int coord; SrcLocation where; std::vector> on_modification; - TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc); - bool is_unnamed() const { return sym_idx == 0; } + TmpVar(var_idx_t _idx, TypeExpr* _type, const LocalVarData* v_sym, SrcLocation loc) + : v_type(_type) + , idx(_idx) + , v_sym(v_sym) + , coord(0) + , where(loc) { + } void show(std::ostream& os, int omit_idx = 0) const; void dump(std::ostream& os) const; - void set_location(SrcLocation loc); }; struct VarDescr { @@ -171,7 +177,6 @@ struct VarDescr { void set_const(long long value); void set_const(td::RefInt256 value); void set_const(std::string value); - void set_const_nan(); void operator+=(const VarDescr& y) { flags &= y.flags; } @@ -303,7 +308,8 @@ struct Op { enum { _Disabled = 1, _NoReturn = 4, _Impure = 24 }; int flags; std::unique_ptr next; - SymDef* fun_ref; // despite its name, it may actually ref global var; applicable not only to Op::_Call, but for other kinds also + const FunctionData* f_sym = nullptr; + const GlobalVarData* g_sym = nullptr; SrcLocation where; VarDescrList var_info; std::vector args; @@ -311,27 +317,41 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; - Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { + Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), f_sym(nullptr), where(_where) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(std::move(_left)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, td::RefInt256 _const) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left), int_const(_const) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, std::string _const) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left), str_const(_const) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right) + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left), right(_right) { + } + Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right) + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(std::move(_left)), right(std::move(_right)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, - SymDef* _fun = nullptr) - : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) { + const FunctionData* _fun) + : cl(_cl), flags(0), f_sym(_fun), where(_where), left(_left), right(_right) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, - SymDef* _fun = nullptr) - : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { + const FunctionData* _fun) + : cl(_cl), flags(0), f_sym(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, + const GlobalVarData* _gvar) + : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(_left), right(_right) { + } + Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, + const GlobalVarData* _gvar) + : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(std::move(_left)), right(std::move(_right)) { } bool disabled() const { return flags & _Disabled; } @@ -343,8 +363,7 @@ struct Op { bool set_noreturn(bool flag); bool impure() const { return flags & _Impure; } - void set_impure(const CodeBlob &code); - void set_impure(const CodeBlob &code, bool flag); + void set_impure_flag(); void show(std::ostream& os, const std::vector& vars, std::string pfx = "", int mode = 0) const; void show_var_list(std::ostream& os, const std::vector& idx_list, const std::vector& vars) const; @@ -391,247 +410,16 @@ inline ListIterator end(const Op* op_list) { return ListIterator{}; } -typedef std::tuple FormalArg; +typedef std::tuple FormalArg; typedef std::vector FormalArgList; struct AsmOpList; -/* - * - * SYMBOL VALUES - * - */ - -struct SymValVariable : SymValBase { - enum SymValFlag { - flagMutateParameter = 1, // parameter was declared with `mutate` keyword - flagImmutable = 2, // variable was declared via `val` (not `var`) - }; - int flags{0}; - - ~SymValVariable() override = default; - SymValVariable(int val, TypeExpr* sym_type) - : SymValBase(SymValKind::_Var, val, sym_type) {} - - bool is_function_parameter() const { - return idx >= 0; - } - bool is_mutate_parameter() const { - return flags & flagMutateParameter; - } - bool is_local_var() const { - return idx == -1; - } - bool is_immutable() const { - return flags & flagImmutable; - } -}; - -struct SymValFunc : SymValBase { - enum SymValFlag { - flagInline = 1, // marked `@inline` - flagInlineRef = 2, // marked `@inline_ref` - flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) - flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out - flagBuiltinFunction = 32, // was created via `define_builtin_func()`, not from source code - flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned - flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc. - flagHasMutateParams = 256, // has parameters declared as `mutate` - flagAcceptsSelf = 512, // is a member function (has `self` first parameter) - flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable - }; - - td::RefInt256 method_id; // todo why int256? it's small - int flags{0}; - std::vector parameters; // [i]-th may be nullptr for underscore; if not, its val is SymValVariable - std::vector arg_order, ret_order; - - ~SymValFunc() override = default; - SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags) - : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)) { - } - SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags, std::initializer_list arg_order, std::initializer_list ret_order) - : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)), arg_order(arg_order), ret_order(ret_order) { - } - - const std::vector* get_arg_order() const { - return arg_order.empty() ? nullptr : &arg_order; - } - const std::vector* get_ret_order() const { - return ret_order.empty() ? nullptr : &ret_order; - } - - bool is_inline() const { - return flags & flagInline; - } - bool is_inline_ref() const { - return flags & flagInlineRef; - } - bool is_marked_as_pure() const { - return flags & flagMarkedAsPure; - } - bool is_builtin() const { - return flags & flagBuiltinFunction; - } - bool is_get_method() const { - return flags & flagGetMethod; - } - bool is_entrypoint() const { - return flags & flagIsEntrypoint; - } - bool has_mutate_params() const { - return flags & flagHasMutateParams; - } - bool does_accept_self() const { - return flags & flagAcceptsSelf; - } - bool does_return_self() const { - return flags & flagReturnsSelf; - } -}; - -struct SymValCodeFunc : SymValFunc { +struct FunctionBodyCode { CodeBlob* code; - bool is_really_used{false}; // calculated via dfs; unused functions are not codegenerated - ~SymValCodeFunc() override = default; - SymValCodeFunc(std::vector parameters, int val, TypeExpr* _ft) - : SymValFunc(std::move(parameters), val, _ft, 0), code(nullptr) { - } - bool does_need_codegen() const; void set_code(CodeBlob* code); }; -struct SymValGlobVar : SymValBase { - bool is_really_used{false}; // calculated via dfs from used functions; unused globals are not codegenerated - - SymValGlobVar(int val, TypeExpr* gvtype) - : SymValBase(SymValKind::_GlobVar, val, gvtype) { - } - ~SymValGlobVar() override = default; -}; - -struct SymValConst : SymValBase { - enum ConstKind { IntConst, SliceConst }; - - td::RefInt256 intval; - std::string strval; - ConstKind kind; - - SymValConst(int idx, td::RefInt256 value) - : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Int)), intval(std::move(value)), kind(IntConst) { - } - SymValConst(int idx, std::string value) - : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Slice)), strval(std::move(value)), kind(SliceConst) { - } - ~SymValConst() override = default; - td::RefInt256 get_int_value() const { - return intval; - } - std::string get_str_value() const { - return strval; - } - ConstKind get_kind() const { - return kind; - } -}; - - -/* - * - * EXPRESSIONS - * - */ - -struct Expr { - enum ExprCls { - _Apply, - _VarApply, - _GrabMutatedVars, - _ReturnSelf, - _MkTuple, - _Tensor, - _Const, - _Var, - _GlobFunc, - _GlobVar, - _Letop, - _Hole, - _CondExpr, - _SliceConst, - }; - ExprCls cls; - int val{0}; - enum { _IsRvalue = 2, _IsLvalue = 4, _IsImmutable = 8, _IsImpure = 32 }; - int flags{0}; - SrcLocation here; - td::RefInt256 intval; - std::string strval; - SymDef* sym{nullptr}; - TypeExpr* e_type{nullptr}; - std::vector args; - Expr(ExprCls c, SrcLocation loc) : cls(c), here(loc) { - } - Expr(ExprCls c, std::vector _args) : cls(c), args(std::move(_args)) { - } - Expr(ExprCls c, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { - } - Expr(ExprCls c, SymDef* _sym, std::initializer_list _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { - } - Expr(ExprCls c, SymDef* _sym, std::vector _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { - } - Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist); - ~Expr() { - for (auto& arg_ptr : args) { - delete arg_ptr; - } - } - Expr* copy() const; - void pb_arg(Expr* expr) { - args.push_back(expr); - } - void set_val(int _val) { - val = _val; - } - bool is_rvalue() const { - return flags & _IsRvalue; - } - bool is_lvalue() const { - return flags & _IsLvalue; - } - bool is_immutable() const { - return flags & _IsImmutable; - } - bool is_mktuple() const { - return cls == _MkTuple; - } - void chk_rvalue() const { - if (!is_rvalue()) { - fire_error_rvalue_expected(); - } - } - void deduce_type(); - void set_location(SrcLocation loc) { - here = loc; - } - SrcLocation get_location() const { - return here; - } - void define_new_vars(CodeBlob& code); - void predefine_vars(); - std::vector pre_compile(CodeBlob& code, std::vector>* lval_globs = nullptr) const; - var_idx_t new_tmp(CodeBlob& code) const; - std::vector new_tmp_vect(CodeBlob& code) const { - return {new_tmp(code)}; - } - - GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN - void fire_error_rvalue_expected() const; - GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN - void fire_error_lvalue_expected(const std::string& details) const; - GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN - void fire_error_modifying_immutable(const std::string& details) const; -}; - /* * * GENERATE CODE @@ -651,7 +439,6 @@ struct AsmOp { int a, b; bool gconst{false}; std::string op; - td::RefInt256 origin; struct SReg { int idx; SReg(int _idx) : idx(_idx) { @@ -671,9 +458,6 @@ struct AsmOp { AsmOp(Type _t, int _a, int _b, std::string _op) : t(_t), a(_a), b(_b), op(std::move(_op)) { compute_gconst(); } - AsmOp(Type _t, int _a, int _b, std::string _op, td::RefInt256 x) : t(_t), a(_a), b(_b), op(std::move(_op)), origin(x) { - compute_gconst(); - } void out(std::ostream& os) const; void out_indent_nl(std::ostream& os, bool no_nl = false) const; std::string to_string() const; @@ -786,20 +570,20 @@ struct AsmOp { static AsmOp BlkReverse(int a, int b); static AsmOp make_stk2(int a, int b, const char* str, int delta); static AsmOp make_stk3(int a, int b, int c, const char* str, int delta); - static AsmOp IntConst(td::RefInt256 value); + static AsmOp IntConst(const td::RefInt256& x); static AsmOp BoolConst(bool f); - static AsmOp Const(std::string push_op, td::RefInt256 origin = {}) { - return AsmOp(a_const, 0, 1, std::move(push_op), origin); + static AsmOp Const(std::string push_op) { + return AsmOp(a_const, 0, 1, std::move(push_op)); } - static AsmOp Const(int arg, std::string push_op, td::RefInt256 origin = {}); - static AsmOp Comment(std::string comment) { + static AsmOp Const(int arg, const std::string& push_op); + static AsmOp Comment(const std::string& comment) { return AsmOp(a_none, std::string{"// "} + comment); } - static AsmOp Custom(std::string custom_op) { + static AsmOp Custom(const std::string& custom_op) { return AsmOp(a_custom, 255, 255, custom_op); } - static AsmOp Parse(std::string custom_op); - static AsmOp Custom(std::string custom_op, int args, int retv = 1) { + static AsmOp Parse(const std::string& custom_op); + static AsmOp Custom(const std::string& custom_op, int args, int retv = 1) { return AsmOp(a_custom, args, retv, custom_op); } static AsmOp Parse(std::string custom_op, int args, int retv = 1); @@ -887,18 +671,6 @@ inline std::ostream& operator<<(std::ostream& os, const AsmOpList& op_list) { return os; } -class IndentGuard { - AsmOpList& aol_; - - public: - IndentGuard(AsmOpList& aol) : aol_(aol) { - aol.indent(); - } - ~IndentGuard() { - aol_.undent(); - } -}; - struct AsmOpCons { std::unique_ptr car; std::unique_ptr cdr; @@ -1321,71 +1093,57 @@ struct Stack { */ typedef std::function&, std::vector&, SrcLocation)> simple_compile_func_t; -typedef std::function&, std::vector&)> compile_func_t; inline simple_compile_func_t make_simple_compile(AsmOp op) { return [op](std::vector& out, std::vector& in, SrcLocation) -> AsmOp { return op; }; } -inline compile_func_t make_ext_compile(std::vector&& ops) { - return [ops = std::move(ops)](AsmOpList& dest, std::vector& out, std::vector& in)->bool { - return dest.append(ops); - }; -} - -inline compile_func_t make_ext_compile(AsmOp op) { - return - [op](AsmOpList& dest, std::vector& out, std::vector& in) -> bool { return dest.append(op); }; -} - -struct SymValAsmFunc : SymValFunc { +struct FunctionBodyBuiltin { simple_compile_func_t simple_compile; - compile_func_t ext_compile; - ~SymValAsmFunc() override = default; - SymValAsmFunc(std::vector parameters, TypeExpr* ft, std::vector&& arg_order, std::vector&& ret_order, int flags) - : SymValFunc(std::move(parameters), -1, ft, flags) { - this->arg_order = std::move(arg_order); - this->ret_order = std::move(ret_order); - } - SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags) - : SymValFunc(std::move(parameters), -1, ft, flags), simple_compile(std::move(_compile)) { - } - SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags, - std::initializer_list arg_order, std::initializer_list ret_order) - : SymValFunc(std::move(parameters), -1, ft, flags, arg_order, ret_order), simple_compile(std::move(_compile)) { - } - void set_code(std::vector code); - bool compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; + + explicit FunctionBodyBuiltin(simple_compile_func_t compile) + : simple_compile(std::move(compile)) {} + + void compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; +}; + +struct FunctionBodyAsm { + std::vector ops; + + void set_code(std::vector&& code); + void compile(AsmOpList& dest) const; }; struct CodeBlob { - enum { _ForbidImpure = 4 }; int var_cnt, in_var_cnt; TypeExpr* ret_type; - const SymValCodeFunc* func_val; + const FunctionData* fun_ref; std::string name; SrcLocation loc; std::vector vars; std::unique_ptr ops; std::unique_ptr* cur_ops; - std::vector debug_ttt; +#ifdef TOLK_DEBUG + std::vector _vector_of_ops; // to see it in debugger instead of nested pointers +#endif std::stack*> cur_ops_stack; - int flags = 0; bool require_callxargs = false; - CodeBlob(std::string name, SrcLocation loc, const SymValCodeFunc* func_val, TypeExpr* ret_type) - : var_cnt(0), in_var_cnt(0), ret_type(ret_type), func_val(func_val), name(std::move(name)), loc(loc), cur_ops(&ops) { + CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref, TypeExpr* ret_type) + : var_cnt(0), in_var_cnt(0), ret_type(ret_type), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) { } template Op& emplace_back(Args&&... args) { Op& res = *(*cur_ops = std::make_unique(args...)); cur_ops = &(res.next); - debug_ttt.push_back(&res); +#ifdef TOLK_DEBUG + _vector_of_ops.push_back(&res); +#endif return res; } - bool import_params(FormalArgList arg_list); - var_idx_t create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation loc); + bool import_params(FormalArgList&& arg_list); + var_idx_t create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation loc); var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { - return create_var(var_type, 0, loc); + return create_var(var_type, nullptr, loc); } int split_vars(bool strict = false); bool compute_used_code_vars(); @@ -1413,9 +1171,11 @@ struct CodeBlob { void generate_code(AsmOpList& out_list, int mode = 0); void generate_code(std::ostream& os, int mode = 0, int indent = 0); - void on_var_modification(var_idx_t idx, SrcLocation here) const { - for (auto& f : vars.at(idx).on_modification) { - f(here); + void on_var_modification(const std::vector& left_lval_indices, SrcLocation here) const { + for (var_idx_t ir_idx : left_lval_indices) { + for (auto& f : vars.at(ir_idx).on_modification) { + f(here); + } } } }; diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp index cee71942..3712c6f5 100644 --- a/tolk/unify-types.cpp +++ b/tolk/unify-types.cpp @@ -121,7 +121,7 @@ bool TypeExpr::equals_to(const TypeExpr *rhs) const { while (r->constr == te_Indirect) r = r->args[0]; - bool eq = l->constr == r->constr && l->value == r->value && + bool eq = l->constr == r->constr && (l->constr == te_Unknown || l->value == r->value) && l->minw == r->minw && l->maxw == r->maxw && l->was_forall_var == r->was_forall_var && l->args.size() == r->args.size();