diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index dec12e23..03638f0a 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -17,17 +17,17 @@ fun createEmptyTuple(): tuple /// Appends a value to tuple, resulting in `Tuple t' = (x1, ..., xn, value)`. /// If its size exceeds 255, throws a type check exception. @pure -fun tuplePush(mutate self: tuple, value: X): void +fun tuplePush(mutate self: tuple, value: T): void asm "TPUSH"; /// Returns the first element of a non-empty tuple. @pure -fun tupleFirst(t: tuple): X +fun tupleFirst(t: tuple): T asm "FIRST"; /// Returns the [`index`]-th element of a tuple. @pure -fun tupleAt(t: tuple, index: int): X +fun tupleAt(t: tuple, index: int): T builtin; /// Returns the size of a tuple (elements count in it). @@ -37,7 +37,7 @@ fun tupleSize(t: tuple): int /// Returns the last element of a non-empty tuple. @pure -fun tupleLast(t: tuple): int +fun tupleLast(t: tuple): T asm "LAST"; @@ -306,11 +306,11 @@ fun getBuilderDepth(b: builder): int */ /// Dump a variable [x] to the debug log. -fun debugPrint(x: X): void +fun debugPrint(x: T): void builtin; /// Dump a string [x] to the debug log. -fun debugPrintString(x: X): void +fun debugPrintString(x: T): void builtin; /// Dumps the stack (at most the top 255 values) and shows the total stack depth. diff --git a/crypto/smartcont/tolk-stdlib/gas-payments.tolk b/crypto/smartcont/tolk-stdlib/gas-payments.tolk index 1dc6f3f8..1965cc6a 100644 --- a/crypto/smartcont/tolk-stdlib/gas-payments.tolk +++ b/crypto/smartcont/tolk-stdlib/gas-payments.tolk @@ -61,3 +61,9 @@ fun calculateOriginalMessageFee(workchain: int, incomingFwdFee: int): int /// If it has no debt, `0` is returned. fun getMyStorageDuePayment(): int asm "DUEPAYMENT"; + +/// Returns the amount of nanotoncoins charged for storage. +/// (during storage phase preceeding to current computation phase) +@pure +fun getMyStoragePaidPayment(): int + asm "STORAGEFEES"; diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index d46397c6..7301f1d5 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -35,7 +35,7 @@ fun test88(x: int) { } @method_id(89) -fun test89(last: int) { +fun test89(last: int): (int, int, int, int) { var t: tuple = createEmptyTuple(); t.tuplePush(1); t.tuplePush(2); diff --git a/tolk-tester/tests/a6.tolk b/tolk-tester/tests/a6.tolk index 7f2c3946..32fd3364 100644 --- a/tolk-tester/tests/a6.tolk +++ b/tolk-tester/tests/a6.tolk @@ -9,6 +9,7 @@ fun calc_phi(): int { repeat (70) { n*=10; }; var p= 1; var `q`=1; + _=`q`; do { (p,q)=(q,p+q); } while (q <= n); //;; @@ -27,7 +28,7 @@ fun calc_sqrt2(): int { return mulDivRound(p, n, q); } -fun calc_root(m: auto): auto { +fun calc_root(m: int) { var base: int=1; repeat(70) { base *= 10; } var (a, b, c) = (1,0,-m); diff --git a/tolk-tester/tests/a6_5.tolk b/tolk-tester/tests/a6_5.tolk index 8b300c0c..43fd59c5 100644 --- a/tolk-tester/tests/a6_5.tolk +++ b/tolk-tester/tests/a6_5.tolk @@ -1,5 +1,5 @@ @deprecated -fun twice(f: auto, x: auto): auto { +fun twice(f: int -> int, x: int) { return f (f (x)); } diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index 5e0ce6b9..191bcf08 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -138,5 +138,5 @@ fun main() { inc CALLDICT // self newY }> """ -@code_hash 33262590582878205026101577472505372101182291690814957175155528952950621243206 +@code_hash 7627024945492125068389905298530400936797031708759561372406088054030801992712 */ diff --git a/tolk-tester/tests/assignment-tests.tolk b/tolk-tester/tests/assignment-tests.tolk new file mode 100644 index 00000000..89de8cf4 --- /dev/null +++ b/tolk-tester/tests/assignment-tests.tolk @@ -0,0 +1,28 @@ +fun extractFromTypedTuple(params: [int]) { + var [payload: int] = params; + return payload + 10; +} + +@method_id(101) +fun test101(x: int) { + var params = [x]; + return extractFromTypedTuple(params); +} + +fun autoInferIntNull(x: int) { + if (x > 10) { return null; } + return x; +} + +fun main(value: int) { + var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); + if (x == null && y == null) { return null; } + return x == null || y == null ? -1 : x + y; +} + +/** +@testcase | 0 | 3 | 9 +@testcase | 0 | 6 | -1 +@testcase | 0 | 11 | (null) +@testcase | 101 | 78 | 88 +*/ diff --git a/tolk-tester/tests/c2.tolk b/tolk-tester/tests/c2.tolk index ec8d32da..9b56a9c5 100644 --- a/tolk-tester/tests/c2.tolk +++ b/tolk-tester/tests/c2.tolk @@ -4,7 +4,7 @@ fun check_assoc(a: int, b: int, c: int): int { return op(op(a, b), c) == op(a, op(b, c)); } -fun unnamed_args(_: int, _: slice, _: auto): auto { +fun unnamed_args(_: int, _: slice, _: int) { return true; } @@ -14,7 +14,7 @@ fun main(x: int, y: int, z: int): int { } @method_id(101) -fun test101(x: int, z: int): auto { +fun test101(x: int, z: int) { return unnamed_args(x, "asdf", z); } diff --git a/tolk-tester/tests/c2_1.tolk b/tolk-tester/tests/c2_1.tolk index 4e52b9ee..fc16b436 100644 --- a/tolk-tester/tests/c2_1.tolk +++ b/tolk-tester/tests/c2_1.tolk @@ -1,4 +1,4 @@ -fun check_assoc(op: auto, a: int, b: int, c: int) { +fun check_assoc(op: (int, int) -> int, a: int, b: int, c: int) { return op(op(a, b), c) == op(a, op(b, c)); } diff --git a/tolk-tester/tests/generics-1.tolk b/tolk-tester/tests/generics-1.tolk new file mode 100644 index 00000000..0d872cc1 --- /dev/null +++ b/tolk-tester/tests/generics-1.tolk @@ -0,0 +1,150 @@ +fun eq1(value: X): X { return value; } +fun eq2(value: X) { return value; } +fun eq3(value: X): X { var cp: [X] = [eq1(value)]; var ((([v: X]))) = cp; return v; } +fun eq4(value: X) { return eq1(value); } + +@method_id(101) +fun test101(x: int) { + var (a, b, c) = (x, (x,x), [x,x]); + return (eq1(a), eq1(b), eq1(c), eq2(a), eq2(b), eq2(c), eq3(a), eq4(b), eq3(createEmptyTuple())); +} + +fun getTwo(): X { return 2 as X; } + +fun takeInt(a: int) { return a; } + +@method_id(102) +fun test102(): (int, int, int, [(int, int)]) { + var a: int = getTwo(); + var _: int = getTwo(); + var b = getTwo() as int; + var c: int = 1 ? getTwo() : getTwo(); + var c redef = getTwo(); + return (eq1(a), eq2(b), takeInt(getTwo()), [(getTwo(), getTwo())]); +} + +@method_id(103) +fun test103(first: int): (int, int, int) { + var t = createEmptyTuple(); + var cs = beginCell().storeInt(100, 32).endCell().beginParse(); + t.tuplePush(first); + t.tuplePush(2); + t.tuplePush(cs); + cs = t.tupleAt(2); + cs = t.tupleAt(2) as slice; + return (t.tupleAt(0), cs.loadInt(32), t.tupleAt(2).loadInt(32)); +} + +fun manyEq(a: T1, b: T2, c: T3): [T1, T2, T3] { + return [a, b, c]; +} + +@method_id(104) +fun test104(f: int) { + return ( + manyEq(1 ? 1 : 1, f ? 0 : null, !f ? getTwo() as int : null), + manyEq((f ? null as int : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool()), 0, eq4(f)) + ); +} + +fun calcSum(x: X, y: X) { return x + y; } + +@method_id(105) +fun test105() { + if (0) { calcSum(((0)), null); } + return (calcSum(1, 2)); +} + +fun calcYPlus1(value: Y) { return value + 1; } +fun calcLoad32(cs: slice) { return cs.loadInt(32); } +fun calcTensorPlus1(tens: (int, int)) { var (f, s) = tens; return (f + 1, s + 1); } +fun calcTensorMul2(tens: (int, int)) { var (f, s) = tens; return (f * 2, s * 2); } +fun cellToSlice(c: cell) { return c.beginParse(); } +fun abstractTransform(xToY: (X) -> Y, yToR: (((Y))) -> R, initialX: X): R { + var y = xToY(initialX); + return yToR(y); +} + +@method_id(106) +fun test106() { + var c = beginCell().storeInt(106, 32).endCell(); + return [ + abstractTransform(cellToSlice, calcLoad32, c), + abstractTransform(calcYPlus1, calcYPlus1, 0), + abstractTransform(calcTensorPlus1, calcTensorMul2, (2, 2)) + ]; +} + +fun callTupleFirst(t: X): Y { return t.tupleFirst(); } +fun callTuplePush(mutate self: T, v1: V, v2: V): self { self.tuplePush(v1); tuplePush(mutate self, v2); return self; } +fun getTupleLastInt(t: tuple) { return t.tupleLast(); } +fun getTupleSize(t: tuple) { return t.tupleSize(); } +fun callAnyFn(f: (TObj) -> TResult, arg: TObj) { return f(arg); } +fun callAnyFn2(f: TCallback, arg: tuple) { return f(arg); } + +global t107: tuple; + +@method_id(107) +fun test107() { + t107 = createEmptyTuple(); + callTuplePush(mutate t107, 1, 2); + t107.callTuplePush(3, 4).callTuplePush(5, 6); + var first: int = t107.callTupleFirst(); + return ( + callAnyFn(getTupleSize, t107), + callAnyFn2(getTupleSize, t107), + first, + callTupleFirst(t107) as int, + callAnyFn(getTupleLastInt, t107), + callAnyFn2(getTupleLastInt, t107) + ); +} + +global g108: int; + +fun inc108(by: int) { g108 += by; } +fun getInc108() { return inc108; } +fun returnResult(f: () -> RetT): RetT { return f(); } +fun applyAndReturn(f: () -> (ArgT) -> RetT, arg: ArgT): () -> ArgT -> RetT { + f()(arg); + return f; +} + +@method_id(108) +fun test108() { + g108 = 0; + getInc108()(1); + returnResult<(int) -> void>(getInc108)(2); + applyAndReturn(getInc108, 10)()(10); + returnResult(getInc108)(2); + applyAndReturn(getInc108, 10)()(10); + return g108; +} + +fun main(x: int): (int, [[int, int]]) { + try { if(x) { throw (1, x); } } + catch (excNo, arg) { return (arg as int, [[eq2(arg as int), getTwo()]]); } + return (0, [[x, 1]]); +} + +/** +@testcase | 0 | 1 | 1 [ [ 1 2 ] ] +@testcase | 101 | 0 | 0 0 0 [ 0 0 ] 0 0 0 [ 0 0 ] 0 0 0 [] +@testcase | 102 | | 2 2 2 [ 2 2 ] +@testcase | 103 | 0 | 0 100 100 +@testcase | 104 | 0 | [ 1 (null) 2 ] [ 2 -1 0 0 ] +@testcase | 105 | | 3 +@testcase | 106 | | [ 106 2 6 6 ] +@testcase | 107 | | 6 6 1 1 6 6 +@testcase | 108 | | 45 + +@fif_codegen DECLPROC eq1 +@fif_codegen DECLPROC eq1 +@fif_codegen DECLPROC eq1<(int,int)> +@fif_codegen DECLPROC eq1<[int,int]> +@fif_codegen DECLPROC getTwo + +@fif_codegen_avoid DECLPROC eq1 +@fif_codegen_avoid DECLPROC eq2 +@fif_codegen_avoid DECLPROC eq3 + */ diff --git a/tolk-tester/tests/invalid-call-1.tolk b/tolk-tester/tests/invalid-call-1.tolk index 57a33c4b..3542f580 100644 --- a/tolk-tester/tests/invalid-call-1.tolk +++ b/tolk-tester/tests/invalid-call-1.tolk @@ -1,9 +1,10 @@ -fun main() { - return true(); +const asdf = 1; + +fun main(x: int) { + return x.asdf(); } /** @compilation_should_fail -The message is weird now, but later I'll rework error messages anyway. -@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??2 with int +@stderr calling a non-function */ diff --git a/tolk-tester/tests/invalid-call-9.tolk b/tolk-tester/tests/invalid-call-9.tolk new file mode 100644 index 00000000..87eb61e8 --- /dev/null +++ b/tolk-tester/tests/invalid-call-9.tolk @@ -0,0 +1,10 @@ +fun getOne() { return 1; } + +fun main() { + return getOne(); +} + +/** +@compilation_should_fail +@stderr calling a not generic function with generic T + */ diff --git a/tolk-tester/tests/invalid-declaration-11.tolk b/tolk-tester/tests/invalid-declaration-11.tolk new file mode 100644 index 00000000..75ebb450 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-11.tolk @@ -0,0 +1,13 @@ +// this function is declared incorrectly, +// since it should return 2 values onto a stack (1 for returned slice, 1 for mutated int) +// but contains not 2 numbers in asm ret_order +fun loadAddress2(mutate self: int): slice + asm( -> 1 0 2) "LDMSGADDR"; + +fun main(){} + +/** +@compilation_should_fail +@stderr ret_order (after ->) expected to contain 2 numbers +@stderr asm( -> 1 0 2) + */ diff --git a/tolk-tester/tests/invalid-declaration-12.tolk b/tolk-tester/tests/invalid-declaration-12.tolk new file mode 100644 index 00000000..25ae9de6 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-12.tolk @@ -0,0 +1,16 @@ +fun proxy(x: int) { + return factorial(x); +} + +fun factorial(x: int) { + if (x <= 0) { + return 1; + } + return x * proxy(x-1); +} + +/** +@compilation_should_fail +@stderr could not infer return type of `factorial`, because it appears in a recursive call chain +@stderr fun factorial + */ diff --git a/tolk-tester/tests/invalid-declaration-13.tolk b/tolk-tester/tests/invalid-declaration-13.tolk new file mode 100644 index 00000000..758a4f21 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-13.tolk @@ -0,0 +1,7 @@ +const c: slice = 123 + 456; + +/** +@compilation_should_fail +@stderr expression type does not match declared type +@stderr const c + */ diff --git a/tolk-tester/tests/invalid-generics-1.tolk b/tolk-tester/tests/invalid-generics-1.tolk new file mode 100644 index 00000000..c8ff7fec --- /dev/null +++ b/tolk-tester/tests/invalid-generics-1.tolk @@ -0,0 +1,10 @@ +fun f(v: int, x: X) {} + +fun failCantDeduceWithoutArgument() { + return f(1); +} + +/** +@compilation_should_fail +@stderr can not deduce X for generic function `f` + */ diff --git a/tolk-tester/tests/invalid-generics-10.tolk b/tolk-tester/tests/invalid-generics-10.tolk new file mode 100644 index 00000000..c7f72bf4 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-10.tolk @@ -0,0 +1,9 @@ +fun invalidReferencingGenericMethodWithoutGeneric() { + var t = createEmptyTuple(); + var cb = t.tupleLast; +} + +/** +@compilation_should_fail +@stderr can not use a generic function `tupleLast` as non-call + */ diff --git a/tolk-tester/tests/invalid-generics-11.tolk b/tolk-tester/tests/invalid-generics-11.tolk new file mode 100644 index 00000000..a399bc91 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-11.tolk @@ -0,0 +1,11 @@ +global gVar: int; + +fun main() { + var x = gVar; + return x; +} + +/** +@compilation_should_fail +@stderr generic T not expected here + */ diff --git a/tolk-tester/tests/invalid-generics-2.tolk b/tolk-tester/tests/invalid-generics-2.tolk new file mode 100644 index 00000000..15594433 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-2.tolk @@ -0,0 +1,10 @@ +fun f(v: int, x: T) {} + +fun failCantDeduceWithPlainNull() { + return f(0, null); +} + +/** +@compilation_should_fail +@stderr can not deduce T for generic function `f` + */ diff --git a/tolk-tester/tests/invalid-generics-3.tolk b/tolk-tester/tests/invalid-generics-3.tolk new file mode 100644 index 00000000..72b7df0e --- /dev/null +++ b/tolk-tester/tests/invalid-generics-3.tolk @@ -0,0 +1,11 @@ +fun f(x: T, y: T) {} + +fun failIncompatibleTypesForT() { + return f(32, ""); +} + +/** +@compilation_should_fail +@stderr T is both int and slice for generic function `f` +@stderr f(32 + */ diff --git a/tolk-tester/tests/invalid-generics-4.tolk b/tolk-tester/tests/invalid-generics-4.tolk new file mode 100644 index 00000000..07472ba3 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-4.tolk @@ -0,0 +1,10 @@ +fun f(x: T): void asm "NOP"; + +fun failInstantiatingAsmFunctionWithNon1Slot() { + f((1, 2)); +} + +/** +@compilation_should_fail +@stderr can not call `f` with T=(int, int), because it occupies 2 stack slots in TVM, not 1 + */ diff --git a/tolk-tester/tests/invalid-generics-5.tolk b/tolk-tester/tests/invalid-generics-5.tolk new file mode 100644 index 00000000..4d4f2967 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-5.tolk @@ -0,0 +1,10 @@ +fun f(x: T): void asm "NOP"; + +fun failUsingGenericFunctionPartially() { + var cb = f; +} + +/** +@compilation_should_fail +@stderr can not use a generic function `f` as non-call + */ diff --git a/tolk-tester/tests/invalid-generics-6.tolk b/tolk-tester/tests/invalid-generics-6.tolk new file mode 100644 index 00000000..73e6403f --- /dev/null +++ b/tolk-tester/tests/invalid-generics-6.tolk @@ -0,0 +1,10 @@ +fun eq(t: X) { return t; } + +fun failUsingGenericFunctionPartially() { + var cb = createEmptyTuple().eq().eq().tuplePush; +} + +/** +@compilation_should_fail +@stderr can not use a generic function `tuplePush` as non-call + */ diff --git a/tolk-tester/tests/invalid-generics-7.tolk b/tolk-tester/tests/invalid-generics-7.tolk new file mode 100644 index 00000000..b51bb82c --- /dev/null +++ b/tolk-tester/tests/invalid-generics-7.tolk @@ -0,0 +1,18 @@ +fun failOnInstantiation(a: slice) { + var b: slice = foo(a); +} + +fun bar(value: X) : X { + return 1; +} +fun foo(value: X) : X { + return bar(value); +} + +/** +@compilation_should_fail +@stderr while instantiating generic function `foo` +@stderr while instantiating generic function `bar` +@stderr can not convert type `int` to return type `slice` +@stderr return 1 + */ diff --git a/tolk-tester/tests/invalid-generics-8.tolk b/tolk-tester/tests/invalid-generics-8.tolk new file mode 100644 index 00000000..d2c24e53 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-8.tolk @@ -0,0 +1,11 @@ +fun withT1T2(a: (T1, T2)) {} + +fun wrongTCountPassed() { + withT1T2((5, "")); +} + +/** +@compilation_should_fail +@stderr wrong count of generic T: expected 2, got 1 +@stderr + */ diff --git a/tolk-tester/tests/invalid-generics-9.tolk b/tolk-tester/tests/invalid-generics-9.tolk new file mode 100644 index 00000000..73fd6f87 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-9.tolk @@ -0,0 +1,8 @@ +fun invalidProvidingGenericTsToNotGeneric() { + beginCell(); +} + +/** +@compilation_should_fail +@stderr calling a not generic function with generic T + */ diff --git a/tolk-tester/tests/invalid-mutate-16.tolk b/tolk-tester/tests/invalid-mutate-16.tolk new file mode 100644 index 00000000..9da6e253 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-16.tolk @@ -0,0 +1,9 @@ +fun cantCallMutatingFunctionWithAssignmentLValue() { + var t: tuple = createEmptyTuple(); + (t = createEmptyTuple()).tuplePush(1); +} + +/** +@compilation_should_fail +@stderr assignment can not be used as lvalue + */ diff --git a/tolk-tester/tests/invalid-mutate-17.tolk b/tolk-tester/tests/invalid-mutate-17.tolk new file mode 100644 index 00000000..9327f07d --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-17.tolk @@ -0,0 +1,13 @@ +@pure +fun tupleMut(mutate self: tuple): int + asm "TLEN"; + +fun main() { + var t = createEmptyTuple(); + return [[t.tupleMut]]; +} + +/** +@compilation_should_fail +@stderr saving `tupleMut` into a variable is impossible, since it has `mutate` parameters + */ diff --git a/tolk-tester/tests/invalid-nopar-4.tolk b/tolk-tester/tests/invalid-nopar-4.tolk index 6e833f99..033c483e 100644 --- a/tolk-tester/tests/invalid-nopar-4.tolk +++ b/tolk-tester/tests/invalid-nopar-4.tolk @@ -4,5 +4,5 @@ fun load_u32(cs: slice): (slice, int) { /** @compilation_should_fail -@stderr expected `(`, got `32` +@stderr expected `;`, got `32` */ diff --git a/tolk-tester/tests/invalid-pure-4.tolk b/tolk-tester/tests/invalid-pure-4.tolk deleted file mode 100644 index 868498f6..00000000 --- a/tolk-tester/tests/invalid-pure-4.tolk +++ /dev/null @@ -1,16 +0,0 @@ -global set: int; - -@pure -fun someF(): int { - var set redef = 0; - return set; -} - -/** -@compilation_should_fail -@stderr -""" -an impure operation in a pure function -var set -""" -*/ diff --git a/tolk-tester/tests/invalid-self-4.tolk b/tolk-tester/tests/invalid-self-4.tolk index f4856a46..0be6b9e4 100644 --- a/tolk-tester/tests/invalid-self-4.tolk +++ b/tolk-tester/tests/invalid-self-4.tolk @@ -4,6 +4,6 @@ fun cantReturnNothingFromSelf(mutate self: int): self { /** @compilation_should_fail -@stderr missing return; forgot `return self`? +@stderr missing return @stderr } */ diff --git a/tolk-tester/tests/invalid-syntax-3.tolk b/tolk-tester/tests/invalid-syntax-3.tolk index 26ce82ac..259ea795 100644 --- a/tolk-tester/tests/invalid-syntax-3.tolk +++ b/tolk-tester/tests/invalid-syntax-3.tolk @@ -4,5 +4,5 @@ fun main(x: int) { /** @compilation_should_fail -@stderr null is not a function: use `null`, not `null()` +@stderr calling a non-function */ diff --git a/tolk-tester/tests/invalid.tolk b/tolk-tester/tests/invalid-syntax-5.tolk similarity index 100% rename from tolk-tester/tests/invalid.tolk rename to tolk-tester/tests/invalid-syntax-5.tolk diff --git a/tolk-tester/tests/invalid-syntax-6.tolk b/tolk-tester/tests/invalid-syntax-6.tolk new file mode 100644 index 00000000..12e02645 --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-6.tolk @@ -0,0 +1,9 @@ +fun main() { + var a = 1; + (a += 1) += 2; +} + +/** +@compilation_should_fail +@stderr assignment can not be used as lvalue +*/ diff --git a/tolk-tester/tests/invalid-syntax-7.tolk b/tolk-tester/tests/invalid-syntax-7.tolk new file mode 100644 index 00000000..9f63ac10 --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-7.tolk @@ -0,0 +1,9 @@ +fun main() { + var x = 1; + x += (var y = 2); +} + +/** +@compilation_should_fail +@stderr expected , got `var` +*/ diff --git a/tolk-tester/tests/invalid-typing-1.tolk b/tolk-tester/tests/invalid-typing-1.tolk index a0fe296d..0089bd62 100644 --- a/tolk-tester/tests/invalid-typing-1.tolk +++ b/tolk-tester/tests/invalid-typing-1.tolk @@ -6,5 +6,5 @@ fun main() { /** @compilation_should_fail @stderr .tolk:2 -@stderr expected , got `scli` +@stderr unknown type name `scli` */ diff --git a/tolk-tester/tests/invalid-typing-12.tolk b/tolk-tester/tests/invalid-typing-12.tolk new file mode 100644 index 00000000..3a5b1fe2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-12.tolk @@ -0,0 +1,10 @@ +fun failAssignNullToTensor() { + var ab = (1, 2); + ab = null; + return ab; +} + +/** +@compilation_should_fail +@stderr can not assign `null` to variable of type `(int, int)` + */ diff --git a/tolk-tester/tests/invalid-typing-3.tolk b/tolk-tester/tests/invalid-typing-3.tolk index fb4b0bc5..ac019a42 100644 --- a/tolk-tester/tests/invalid-typing-3.tolk +++ b/tolk-tester/tests/invalid-typing-3.tolk @@ -15,5 +15,5 @@ fun cantMixDifferentThis() { /** @compilation_should_fail -@stderr cannot apply function appendBuilder : builder -> (builder, ()) to arguments of type int: cannot unify type int with builder +@stderr can not call method for `builder` with object of type `int` */ diff --git a/tolk-tester/tests/invalid-typing-4.tolk b/tolk-tester/tests/invalid-typing-4.tolk index 0e655369..1ee71290 100644 --- a/tolk-tester/tests/invalid-typing-4.tolk +++ b/tolk-tester/tests/invalid-typing-4.tolk @@ -7,8 +7,6 @@ fun cantCallNotChainedMethodsInAChain(x: int) { } /** -The error is very weird, but nevertheless, the type system prevents of doing such errors. - @compilation_should_fail -@stderr cannot apply function incNotChained : int -> (int, ()) to arguments of type (): cannot unify type () with int +@stderr can not call method for `int` with object of type `void` */ diff --git a/tolk-tester/tests/invalid-typing-5.tolk b/tolk-tester/tests/invalid-typing-5.tolk index ba3450de..9d8cd480 100644 --- a/tolk-tester/tests/invalid-typing-5.tolk +++ b/tolk-tester/tests/invalid-typing-5.tolk @@ -7,8 +7,7 @@ fun failWhenReturnANotChainedValue(x: int): int { } /** -The error is very weird, but nevertheless, the type system prevents of doing such errors. - @compilation_should_fail -@stderr previous function return type int cannot be unified with return statement expression type (): cannot unify type () with int +@stderr x.incNotChained() +@stderr can not convert type `void` to return type `int` */ diff --git a/tolk-tester/tests/invalid-typing-6.tolk b/tolk-tester/tests/invalid-typing-6.tolk index dcdab5f1..da0ac9bb 100644 --- a/tolk-tester/tests/invalid-typing-6.tolk +++ b/tolk-tester/tests/invalid-typing-6.tolk @@ -4,5 +4,5 @@ fun failWhenTernaryConditionNotInt(cs: slice) { /** @compilation_should_fail -@stderr condition of ternary ?: operator must be an integer +@stderr condition of ternary operator must be an integer */ diff --git a/tolk-tester/tests/invalid-typing-7.tolk b/tolk-tester/tests/invalid-typing-7.tolk new file mode 100644 index 00000000..c192a05b --- /dev/null +++ b/tolk-tester/tests/invalid-typing-7.tolk @@ -0,0 +1,9 @@ +fun failAssignPlainNullToVariable() { + var x = null; +} + +/** +@compilation_should_fail +@stderr can not infer type of `x`, it's always null +@stderr specify its type with `x: ` or use `null as ` + */ diff --git a/tolk-tester/tests/invalid-typing-8.tolk b/tolk-tester/tests/invalid-typing-8.tolk new file mode 100644 index 00000000..d696e132 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-8.tolk @@ -0,0 +1,8 @@ +fun failExplicitCastIncompatible(c: cell) { + return c as slice; +} + +/** +@compilation_should_fail +@stderr type `cell` can not be cast to `slice` + */ diff --git a/tolk-tester/tests/invalid-typing-9.tolk b/tolk-tester/tests/invalid-typing-9.tolk new file mode 100644 index 00000000..a0d5ee04 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-9.tolk @@ -0,0 +1,13 @@ +fun getTupleLastGetter(): tuple -> X { + return tupleLast; +} + +fun failTypeMismatch() { + var t = createEmptyTuple(); + var c: cell = getTupleLastGetter()(t); +} + +/** +@compilation_should_fail +@stderr can not assign `int` to variable of type `cell` + */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index e9774f3f..9e21a968 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -54,7 +54,8 @@ fun testDict(last: int) { @method_id(105) fun testNotNull(x: int) { - return [x == null, null == x, !(x == null), null == null, +(null != null)]; + // return [x == null, null == x, !(x == null), null == null, +(null != null)]; + return [x == null, null == x, !(x == null)]; } @method_id(106) @@ -144,8 +145,8 @@ fun main() { @testcase | 104 | 50 | 3 5 -1 @testcase | 104 | 100 | 3 5 5 @testcase | 104 | 0 | 3 -1 5 -@testcase | 105 | 0 | [ 0 0 -1 -1 0 ] -@testcase | 105 | null | [ -1 -1 0 -1 0 ] +@testcase | 105 | 0 | [ 0 0 -1 ] +@testcase | 105 | null | [ -1 -1 0 ] @testcase | 106 | | [ 0 0 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ 0 -1 ] @testcase | 107 | | [ -1 -1 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ -1 0 ] @testcase | 108 | 1 2 | -1 diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index 73a6591b..816e4c8d 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -154,7 +154,7 @@ fun getSumOfNumbersInCell(c: cell): int { @method_id(110) fun testStoreChaining() { - var b = beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32); + var b = ((beginCell()).storeUint(1, 32)).storeUint(2, 32).storeUint(3, 32); b.storeUint(4, 32); b.myStoreUint(5, 32).storeUint(6, 32); storeUint(mutate b, 7, 32); @@ -198,7 +198,7 @@ fun testStoreAndMutateBoth() { b.myStoreU32_and_mutate_x(mutate x); var cs: slice = b.endCell().beginParse(); - var (n1,n2,n3,n4,n5) = (cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32)); + var (n1,n2,n3,n4,n5) = (cs.loadUint(32),((cs)).loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32)); assert(n5 == x) throw 100; return [n1,n2,n3,n4,n5]; diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 8fcf2584..c4bd0acc 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -7,12 +7,14 @@ fun test1() { numbers = listPrepend(2, numbers); numbers = listPrepend(3, numbers); numbers = listPrepend(4, numbers); - var (h, numbers redef) = listSplit(numbers); + var (h: int, numbers redef) = listSplit(numbers); h += listGetHead(numbers); + _ = null; + (_, _) = (null, null); var t = createEmptyTuple(); do { - var num = numbers.listNext(); + var num: int = numbers.listNext(); t.tuplePush(num); } while (numbers != null); @@ -44,7 +46,7 @@ fun test3(x: int) { } fun getUntypedNull() { - var untyped = null; + var untyped: null = null; if (true) { return untyped; } @@ -52,8 +54,8 @@ fun getUntypedNull() { } @method_id(104) -fun test4() { - var (_, (_, untyped)) = (3, (createEmptyTuple, null)); +fun test4(): null { + var (_, (_, untyped: null)) = (3, (createEmptyTuple, null)); if (true) { return untyped; } @@ -62,15 +64,10 @@ fun test4() { @method_id(105) fun test5() { - var n = getUntypedNull(); + var n: slice = getUntypedNull(); return !(null == n) ? n.loadInt(32) : 100; } -@method_id(106) -fun test6(x: int) { - return x > null; // this compiles (for now), but fails at runtime -} - @method_id(107) fun test7() { var b = beginCell().storeMaybeRef(null); @@ -132,15 +129,6 @@ fun main() { }> """ -@fif_codegen -""" - test6 PROC:<{ - // x - PUSHNULL // x _1 - GREATER // _2 - }> -""" - @fif_codegen """ test7 PROC:<{ diff --git a/tolk-tester/tests/op_priority.tolk b/tolk-tester/tests/op_priority.tolk index e4f97b75..95209c99 100644 --- a/tolk-tester/tests/op_priority.tolk +++ b/tolk-tester/tests/op_priority.tolk @@ -56,7 +56,7 @@ fun test8(b: int): int { return a; } -fun `_ int) { return used_as_noncall2; } +fun receiveGetter(): () -> int { return used_as_noncall2; } @pure fun usedButOptimizedOut(x: int): int { return x + 2; } diff --git a/tolk-tester/tests/self-keyword.tolk b/tolk-tester/tests/self-keyword.tolk index ba779454..b0567696 100644 --- a/tolk-tester/tests/self-keyword.tolk +++ b/tolk-tester/tests/self-keyword.tolk @@ -187,7 +187,7 @@ fun myTupleAt(self: tuple, idx: int): T { global tup111: tuple; @method_id(111) -fun testForallFunctionsWithSelf() { +fun testForallFunctionsWithSelf(): (int, int, tuple) { var t = createEmptyTuple(); tup111 = createEmptyTuple(); t.myTuplePush(10); diff --git a/tolk-tester/tests/test-math.tolk b/tolk-tester/tests/test-math.tolk index 893035fd..95444e6b 100644 --- a/tolk-tester/tests/test-math.tolk +++ b/tolk-tester/tests/test-math.tolk @@ -218,7 +218,7 @@ fun fixed248_log2_const(): int { @pure @inline fun Pi_const_f254(): int { - var (c: auto, _) = Pi_xconst_f254(); + var (c, _) = Pi_xconst_f254(); return c; } @@ -1019,7 +1019,8 @@ fun test_nrand(n: int): tuple { repeat (n) { var x: int = fixed248_nrand(); var bucket: int = (abs(x) >> 243); // 255 buckets starting from x=0, each 1/32 wide - t.tset(bucket, t.tupleAt(bucket) + 1); + var at_bucket: int = t.tupleAt(bucket); + t.tset(bucket, at_bucket + 1); } return t; } diff --git a/tolk-tester/tests/try-func.tolk b/tolk-tester/tests/try-func.tolk index 5ce03ff1..dfd72e9e 100644 --- a/tolk-tester/tests/try-func.tolk +++ b/tolk-tester/tests/try-func.tolk @@ -1,6 +1,3 @@ -fun unsafeGetInt(any: X): int - asm "NOP"; - fun foo(x: int): int { try { if (x == 7) { @@ -28,7 +25,7 @@ fun foo_inlineref(x: int): int { if (x == 7) { throw (44, 2); } return x; } catch (_, arg) { - return unsafeGetInt(arg); + return arg as int; } } @@ -83,7 +80,7 @@ fun foo_big( } return x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + x20; } catch (code, arg) { - return unsafeGetInt(arg); + return arg as int; } } diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk index 6a84a4fa..a0918c18 100644 --- a/tolk-tester/tests/var-apply.tolk +++ b/tolk-tester/tests/var-apply.tolk @@ -92,7 +92,7 @@ fun always_throw2(x: int) { throw 239 + x; } -global global_f: int -> (); +global global_f: int -> void; @method_id(104) fun testGlobalVarApply() { @@ -105,6 +105,30 @@ fun testGlobalVarApply() { } } +@method_id(105) +fun testVarApply2() { + var creator = createEmptyTuple; + var t = creator(); + t.tuplePush(1); + var sizer = t.tupleSize; + return sizer(t); +} + +fun getTupleLastGetter(): (tuple) -> X { + return tupleLast; +} + +@method_id(106) +fun testVarApply3() { + var t = createEmptyTuple(); + t.tuplePush(1); + t.tuplePush([2]); + var getIntAt = t.tupleAt; + var getTupleFirstInt = createEmptyTuple().tupleFirst; + var getTupleLastTuple = getTupleLastGetter(); + return (getIntAt(t, 0), getTupleFirstInt(t), getTupleLastTuple(t), getTupleLastGetter()(t)); +} + fun main() {} /** @@ -112,4 +136,6 @@ fun main() {} @testcase | 102 | | 1000 @testcase | 103 | | [ 1000 1000 0 1001 ] @testcase | 104 | | 240 +@testcase | 105 | | 1 +@testcase | 106 | | 1 1 [ 2 ] [ 2 ] */ diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 0c3e7c63..2ee69686 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -10,10 +10,10 @@ set(TOLK_SOURCE constant-evaluator.cpp pipe-discover-parse-sources.cpp pipe-register-symbols.cpp - pipe-resolve-symbols.cpp + pipe-resolve-identifiers.cpp pipe-calc-rvalue-lvalue.cpp pipe-detect-unreachable.cpp - pipe-infer-check-types.cpp + pipe-infer-types-and-calls.cpp pipe-refine-lvalue-for-mutate.cpp pipe-check-rvalue-lvalue.cpp pipe-check-pure-impure.cpp @@ -21,7 +21,8 @@ set(TOLK_SOURCE pipe-ast-to-legacy.cpp pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp - unify-types.cpp + type-system.cpp + generics-helpers.cpp abscode.cpp analyzer.cpp asmops.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 253e8012..7bcb0f84 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" namespace tolk { @@ -28,7 +29,7 @@ namespace tolk { void TmpVar::dump(std::ostream& os) const { show(os); os << " : " << v_type << " (width "; - v_type->show_width(os); + os << v_type->calc_width_on_stack(); os << ")"; if (coord > 0) { os << " = _" << (coord >> 8) << '.' << (coord & 255); @@ -443,7 +444,7 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation location) { +var_idx_t CodeBlob::create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation location) { vars.emplace_back(var_cnt, var_type, v_sym, location); return var_cnt++; } @@ -454,7 +455,7 @@ bool CodeBlob::import_params(FormalArgList&& arg_list) { } std::vector list; for (const auto& par : arg_list) { - TypeExpr* arg_type; + TypePtr arg_type; const LocalVarData* arg_sym; SrcLocation arg_loc; std::tie(arg_type, arg_sym, arg_loc) = par; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 495ae03b..8539afdd 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" namespace tolk { @@ -25,38 +26,30 @@ namespace tolk { * */ -void CodeBlob::simplify_var_types() { - for (TmpVar& var : vars) { - TypeExpr::remove_indirect(var.v_type); - var.v_type->recompute_width(); - } -} - int CodeBlob::split_vars(bool strict) { int n = var_cnt, changes = 0; for (int j = 0; j < var_cnt; j++) { TmpVar& var = vars[j]; - if (strict && var.v_type->minw != var.v_type->maxw) { + int width_j = var.v_type->calc_width_on_stack(); + if (strict && width_j < 0) { throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"}; } - std::vector comp_types; - int k = var.v_type->extract_components(comp_types); - tolk_assert(k <= 254 && n <= 0x7fff00); - tolk_assert((unsigned)k == comp_types.size()); - if (k != 1) { - var.coord = ~((n << 8) + k); - for (int i = 0; i < k; i++) { - auto v = create_var(comp_types[i], vars[j].v_sym, vars[j].where); - tolk_assert(v == n + i); - tolk_assert(vars[v].idx == v); - vars[v].coord = ((int)j << 8) + i + 1; - } - n += k; - ++changes; - } else if (strict && var.v_type->minw != 1) { - throw ParseError{var.where, - "cannot work with variable or variable component of width greater than one"}; + if (width_j == 1) { + continue; } + std::vector comp_types; + var.v_type->extract_components(comp_types); + tolk_assert(width_j <= 254 && n <= 0x7fff00); + tolk_assert((unsigned)width_j == comp_types.size()); + var.coord = ~((n << 8) + width_j); + for (int i = 0; i < width_j; i++) { + auto v = create_var(comp_types[i], vars[j].v_sym, vars[j].where); + tolk_assert(v == n + i); + tolk_assert(vars[v].idx == v); + vars[v].coord = ((int)j << 8) + i + 1; + } + n += width_j; + ++changes; } if (!changes) { return 0; @@ -687,7 +680,7 @@ void CodeBlob::fwd_analyze() { tolk_assert(ops && ops->cl == Op::_Import); for (var_idx_t i : ops->left) { values += i; - if (vars[i].v_type->is_int()) { + if (vars[i].v_type == TypeDataInt::create()) { values[i]->val |= VarDescr::_Int; } } @@ -732,7 +725,7 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { } case _Call: { prepare_args(values); - if (!f_sym->is_regular_function()) { + if (!f_sym->is_code_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 22d64442..767e6066 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -16,8 +16,8 @@ */ #include "ast-from-tokens.h" #include "ast.h" +#include "type-system.h" #include "platform-utils.h" -#include "type-expr.h" #include "tolk-version.h" /* @@ -130,9 +130,10 @@ static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) } auto v_ident = createV(v->loc, "__isNull"); // built-in function + auto v_ref = createV(v->loc, v_ident, nullptr); AnyExprV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); AnyExprV v_arg = createV(v->loc, v_null, false); - AnyExprV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_arg})); + AnyExprV v_isNull = createV(v->loc, v_ref, createV(v->loc, {v_arg})); if (v->tok == tok_neq) { v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); } @@ -146,98 +147,14 @@ static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) * */ -// TE ::= TA | TA -> TE -// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] -static TypeExpr* parse_type(Lexer& lex, V genericsT_list); - -static TypeExpr* parse_type1(Lexer& lex, V genericsT_list) { - switch (lex.tok()) { - case tok_int: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Int); - case tok_cell: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Cell); - case tok_slice: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Slice); - case tok_builder: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Builder); - case tok_continuation: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Continutaion); - case tok_tuple: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Tuple); - case tok_auto: - lex.next(); - return TypeExpr::new_hole(); - case tok_void: - lex.next(); - return TypeExpr::new_tensor({}); - case tok_bool: - lex.error("bool type is not supported yet"); - case tok_self: - lex.error("`self` type can be used only as a return type of a function (enforcing it to be chainable)"); - case tok_identifier: - if (int idx = genericsT_list ? genericsT_list->lookup_idx(lex.cur_str()) : -1; idx != -1) { - lex.next(); - return genericsT_list->get_item(idx)->created_type; - } - break; - case tok_oppar: { - lex.next(); - if (lex.tok() == tok_clpar) { - lex.next(); - return TypeExpr::new_unit(); - } - std::vector sub{1, parse_type(lex, genericsT_list)}; - while (lex.tok() == tok_comma) { - lex.next(); - sub.push_back(parse_type(lex, genericsT_list)); - } - lex.expect(tok_clpar, "`)`"); - return TypeExpr::new_tensor(std::move(sub)); - } - case tok_opbracket: { - lex.next(); - if (lex.tok() == tok_clbracket) { - lex.next(); - return TypeExpr::new_tuple({}); - } - std::vector sub{1, parse_type(lex, genericsT_list)}; - while (lex.tok() == tok_comma) { - lex.next(); - sub.push_back(parse_type(lex, genericsT_list)); - } - lex.expect(tok_clbracket, "`]`"); - return TypeExpr::new_tuple(std::move(sub)); - } - default: - break; - } - lex.unexpected(""); -} - -static TypeExpr* parse_type(Lexer& lex, V genericsT_list) { - TypeExpr* res = parse_type1(lex, genericsT_list); - if (lex.tok() == tok_arrow) { - lex.next(); - TypeExpr* to = parse_type(lex, genericsT_list); - return TypeExpr::new_map(res, to); - } - return res; -} AnyExprV parse_expr(Lexer& lex); -static AnyV parse_parameter(Lexer& lex, V genericsT_list, bool is_first) { +static AnyV parse_parameter(Lexer& lex, bool is_first) { SrcLocation loc = lex.cur_location(); // optional keyword `mutate` meaning that a function will mutate a passed argument (like passed by reference) bool declared_as_mutate = false; - bool is_param_self = false; if (lex.tok() == tok_mutate) { lex.next(); declared_as_mutate = true; @@ -252,21 +169,14 @@ static AnyV parse_parameter(Lexer& lex, V genericsT_list, bo lex.error("`self` can only be the first parameter"); } param_name = "self"; - is_param_self = true; } else if (lex.tok() != tok_underscore) { lex.unexpected("parameter name"); } lex.next(); - // parameter type after colon, also mandatory (even explicit ":auto") + // parameter type after colon are mandatory lex.expect(tok_colon, "`: `"); - TypeExpr* param_type = parse_type(lex, genericsT_list); - if (declared_as_mutate && !param_type->has_fixed_width()) { - throw ParseError(loc, "`mutate` parameter must be strictly typed"); - } - if (is_param_self && !param_type->has_fixed_width()) { - throw ParseError(loc, "`self` parameter must be strictly typed"); - } + TypePtr param_type = parse_type_from_tokens(lex); return createV(loc, param_name, param_type, declared_as_mutate); } @@ -281,7 +191,7 @@ static AnyV parse_global_var_declaration(Lexer& lex, const std::vector(lex.cur_location(), lex.cur_str()); lex.next(); lex.expect(tok_colon, "`:`"); - TypeExpr* declared_type = parse_type(lex, nullptr); + TypePtr declared_type = parse_type_from_tokens(lex); if (lex.tok() == tok_comma) { lex.error("multiple declarations are not allowed, split globals on separate lines"); } @@ -301,18 +211,10 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector(lex.cur_location(), lex.cur_str()); lex.next(); - TypeExpr *declared_type = nullptr; + TypePtr declared_type = nullptr; if (lex.tok() == tok_colon) { lex.next(); - if (lex.tok() == tok_int) { - declared_type = TypeExpr::new_atomic(TypeExpr::_Int); - lex.next(); - } else if (lex.tok() == tok_slice) { - declared_type = TypeExpr::new_atomic(TypeExpr::_Slice); - lex.next(); - } else { - lex.error("a constant can be int or slice only"); - } + declared_type = parse_type_from_tokens(lex); } lex.expect(tok_assign, "`=`"); AnyExprV init_value = parse_expr(lex); @@ -324,15 +226,15 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector parse_parameter_list(Lexer& lex, V genericsT_list) { +static V parse_parameter_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); std::vector params; lex.expect(tok_oppar, "parameter list"); if (lex.tok() != tok_clpar) { - params.push_back(parse_parameter(lex, genericsT_list, true)); + params.push_back(parse_parameter(lex, true)); while (lex.tok() == tok_comma) { lex.next(); - params.push_back(parse_parameter(lex, genericsT_list, false)); + params.push_back(parse_parameter(lex, false)); } } lex.expect(tok_clpar, "`)`"); @@ -369,6 +271,26 @@ static V parse_argument_list(Lexer& lex) { return createV(loc, std::move(args)); } +static V parse_maybe_instantiationTs_after_identifier(Lexer& lex) { + lex.check(tok_lt, "`<`"); + Lexer::SavedPositionForLookahead backup = lex.save_parsing_position(); + try { + SrcLocation loc = lex.cur_location(); + lex.next(); + std::vector instantiationTs; + instantiationTs.push_back(createV(lex.cur_location(), parse_type_from_tokens(lex))); + while (lex.tok() == tok_comma) { + lex.next(); + instantiationTs.push_back(createV(lex.cur_location(), parse_type_from_tokens(lex))); + } + lex.expect(tok_gt, "`>`"); + return createV(loc, std::move(instantiationTs)); + } catch (const ParseError&) { + lex.restore_position(backup); + return nullptr; + } +} + // parse (expr) / [expr] / identifier / number static AnyExprV parse_expr100(Lexer& lex) { SrcLocation loc = lex.cur_location(); @@ -396,7 +318,7 @@ static AnyExprV parse_expr100(Lexer& lex) { lex.next(); if (lex.tok() == tok_clbracket) { lex.next(); - return createV(loc, {}); + return createV(loc, {}); } std::vector items(1, parse_expr(lex)); while (lex.tok() == tok_comma) { @@ -404,7 +326,7 @@ static AnyExprV parse_expr100(Lexer& lex) { items.emplace_back(parse_expr(lex)); } lex.expect(tok_clbracket, "`]`"); - return createV(loc, std::move(items)); + return createV(loc, std::move(items)); } case tok_int_const: { std::string_view orig_str = lex.cur_str(); @@ -443,12 +365,17 @@ static AnyExprV parse_expr100(Lexer& lex) { } case tok_self: { lex.next(); - return createV(loc); + auto v_ident = createV(loc, "self"); + return createV(loc, v_ident, nullptr); } case tok_identifier: { - std::string_view str_val = lex.cur_str(); + auto v_ident = createV(loc, lex.cur_str()); + V v_instantiationTs = nullptr; lex.next(); - return createV(loc, str_val); + if (lex.tok() == tok_lt) { + v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex); + } + return createV(loc, v_ident, v_instantiationTs); } default: { // show a proper error for `int i` (FunC-style declarations) @@ -461,25 +388,36 @@ static AnyExprV parse_expr100(Lexer& lex) { } } -// parse E(args) +// parse E(...) (left-to-right) static AnyExprV parse_expr90(Lexer& lex) { AnyExprV res = parse_expr100(lex); - if (lex.tok() == tok_oppar) { - return createV(res->loc, res, parse_argument_list(lex)); + while (lex.tok() == tok_oppar) { + res = createV(res->loc, res, parse_argument_list(lex)); } return res; } -// parse E.method(...) (left-to-right) +// parse E.field and E.method(...) (left-to-right) static AnyExprV parse_expr80(Lexer& lex) { AnyExprV lhs = parse_expr90(lex); while (lex.tok() == tok_dot) { SrcLocation loc = lex.cur_location(); lex.next(); - lex.check(tok_identifier, "method name"); - std::string_view method_name = lex.cur_str(); - lex.next(); - lhs = createV(loc, method_name, lhs, parse_argument_list(lex)); + V v_ident = nullptr; + V v_instantiationTs = nullptr; + if (lex.tok() == tok_identifier) { + v_ident = createV(lex.cur_location(), lex.cur_str()); + lex.next(); + if (lex.tok() == tok_lt) { + v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex); + } + } else { + lex.unexpected("method name"); + } + lhs = createV(loc, lhs, v_ident, v_instantiationTs); + while (lex.tok() == tok_oppar) { + lhs = createV(lex.cur_location(), lhs, parse_argument_list(lex)); + } } return lhs; } @@ -497,15 +435,27 @@ static AnyExprV parse_expr75(Lexer& lex) { return parse_expr80(lex); } +// parse E as +static AnyExprV parse_expr40(Lexer& lex) { + AnyExprV lhs = parse_expr75(lex); + if (lex.tok() == tok_as) { + SrcLocation loc = lex.cur_location(); + lex.next(); + TypePtr cast_to_type = parse_type_from_tokens(lex); + lhs = createV(loc, lhs, cast_to_type); + } + return lhs; +} + // parse E * / % ^/ ~/ E (left-to-right) static AnyExprV parse_expr30(Lexer& lex) { - AnyExprV lhs = parse_expr75(lex); + AnyExprV lhs = parse_expr40(lex); TokenType t = lex.tok(); while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyExprV rhs = parse_expr75(lex); + AnyExprV rhs = parse_expr40(lex); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); } @@ -597,15 +547,20 @@ static AnyExprV parse_expr13(Lexer& lex) { static AnyExprV parse_expr10(Lexer& lex) { AnyExprV lhs = parse_expr13(lex); TokenType t = lex.tok(); - if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || - t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || - t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor || - t == tok_assign) { + if (t == tok_assign) { SrcLocation loc = lex.cur_location(); - std::string_view operator_name = lex.cur_str(); lex.next(); AnyExprV rhs = parse_expr10(lex); - return createV(loc, operator_name, t, lhs, rhs); + return createV(loc, lhs, rhs); + } + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || + t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || + t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str().substr(0, lex.cur_str().size() - 1); // "+" for += + lex.next(); + AnyExprV rhs = parse_expr10(lex); + return createV(loc, operator_name, t, lhs, rhs); } if (t == tok_question) { SrcLocation loc = lex.cur_location(); @@ -631,7 +586,7 @@ static AnyExprV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { AnyExprV first = parse_var_declaration_lhs(lex, is_immutable); if (lex.tok() == tok_clpar) { lex.next(); - return createV(loc, first); + return first; } std::vector args(1, first); while (lex.tok() == tok_comma) { @@ -649,51 +604,51 @@ static AnyExprV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { args.push_back(parse_var_declaration_lhs(lex, is_immutable)); } lex.expect(tok_clbracket, "`]`"); - return createV(loc, std::move(args)); + return createV(loc, std::move(args)); } if (lex.tok() == tok_identifier) { auto v_ident = createV(loc, lex.cur_str()); - TypeExpr* declared_type = nullptr; + TypePtr declared_type = nullptr; bool marked_as_redef = false; lex.next(); if (lex.tok() == tok_colon) { lex.next(); - declared_type = parse_type(lex, nullptr); + declared_type = parse_type_from_tokens(lex); } else if (lex.tok() == tok_redef) { lex.next(); marked_as_redef = true; } - return createV(loc, v_ident, declared_type, is_immutable, marked_as_redef); + return createV(loc, v_ident, declared_type, is_immutable, marked_as_redef); } if (lex.tok() == tok_underscore) { - TypeExpr* declared_type = nullptr; + TypePtr declared_type = nullptr; lex.next(); if (lex.tok() == tok_colon) { lex.next(); - declared_type = parse_type(lex, nullptr); + declared_type = parse_type_from_tokens(lex); } - return createV(loc, createV(loc), declared_type, true, false); + return createV(loc, createV(loc, ""), declared_type, true, false); } lex.unexpected("variable name"); } -static AnyV parse_local_vars_declaration(Lexer& lex) { +static AnyV parse_local_vars_declaration_assignment(Lexer& lex) { SrcLocation loc = lex.cur_location(); bool is_immutable = lex.tok() == tok_val; lex.next(); - AnyExprV lhs = parse_var_declaration_lhs(lex, is_immutable); + AnyExprV lhs = createV(loc, parse_var_declaration_lhs(lex, is_immutable)); if (lex.tok() != tok_assign) { lex.error("variables declaration must be followed by assignment: `var xxx = ...`"); } lex.next(); - AnyExprV assigned_val = parse_expr(lex); + AnyExprV rhs = parse_expr(lex); if (lex.tok() == tok_comma) { lex.error("multiple declarations are not allowed, split variables on separate lines"); } lex.expect(tok_semicolon, "`;`"); - return createV(loc, lhs, assigned_val); + return createV(loc, lhs, rhs); } static V parse_sequence(Lexer& lex) { @@ -711,8 +666,8 @@ static V parse_sequence(Lexer& lex) { static AnyV parse_return_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_return, "`return`"); - AnyExprV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void) - ? createV(lex.cur_location(), {}) + AnyExprV child = lex.tok() == tok_semicolon // `return;` actually means "nothing" (inferred as void) + ? createV(lex.cur_location()) : parse_expr(lex); lex.expect(tok_semicolon, "`;`"); return createV(loc, child); @@ -784,15 +739,22 @@ static AnyExprV parse_catch_variable(Lexer& lex) { if (lex.tok() == tok_identifier) { std::string_view var_name = lex.cur_str(); lex.next(); - return createV(loc, var_name); + auto v_ident = createV(loc, var_name); + return createV(loc, v_ident, nullptr); } if (lex.tok() == tok_underscore) { lex.next(); - return createV(loc); + auto v_ident = createV(loc, ""); + return createV(loc, v_ident, nullptr); } lex.unexpected("identifier"); } +static AnyExprV create_catch_underscore_variable(const Lexer& lex) { + auto v_ident = createV(lex.cur_location(), ""); + return createV(lex.cur_location(), v_ident, nullptr); +} + static AnyV parse_throw_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_throw, "`throw`"); @@ -853,12 +815,12 @@ static AnyV parse_try_catch_statement(Lexer& lex) { lex.next(); catch_args.push_back(parse_catch_variable(lex)); } else { // catch (excNo) -> catch (excNo, _) - catch_args.push_back(createV(catch_loc)); + catch_args.push_back(create_catch_underscore_variable(lex)); } lex.expect(tok_clpar, "`)`"); } else { // catch -> catch (_, _) - catch_args.push_back(createV(catch_loc)); - catch_args.push_back(createV(catch_loc)); + catch_args.push_back(create_catch_underscore_variable(lex)); + catch_args.push_back(create_catch_underscore_variable(lex)); } V catch_expr = createV(catch_loc, std::move(catch_args)); @@ -868,9 +830,9 @@ static AnyV parse_try_catch_statement(Lexer& lex) { AnyV parse_statement(Lexer& lex) { switch (lex.tok()) { - case tok_var: - case tok_val: - return parse_local_vars_declaration(lex); + case tok_var: // `var x = 0` is technically an expression, but can not appear in "any place", + case tok_val: // only as a separate declaration + return parse_local_vars_declaration_assignment(lex); case tok_opbrace: return parse_sequence(lex); case tok_return: @@ -952,12 +914,10 @@ static AnyV parse_genericsT_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); std::vector genericsT_items; lex.expect(tok_lt, "`<`"); - int idx = 0; while (true) { lex.check(tok_identifier, "T"); std::string_view nameT = lex.cur_str(); - TypeExpr* type = TypeExpr::new_var(idx++); - genericsT_items.emplace_back(createV(lex.cur_location(), type, nameT)); + genericsT_items.emplace_back(createV(lex.cur_location(), nameT)); lex.next(); if (lex.tok() != tok_comma) { break; @@ -1040,11 +1000,11 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vectoras(); } - V v_param_list = parse_parameter_list(lex, genericsT_list)->as(); + V v_param_list = parse_parameter_list(lex)->as(); bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->param_name == "self"; int n_mutate_params = v_param_list->get_mutate_params_count(); - TypeExpr* ret_type = nullptr; + TypePtr ret_type = nullptr; bool returns_self = false; if (lex.tok() == tok_colon) { // : (if absent, it means "auto infer", not void) lex.next(); @@ -1054,9 +1014,9 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector ret_tensor_items; - ret_tensor_items.reserve(1 + n_mutate_params); - for (AnyV v_param : v_param_list->get_params()) { - if (v_param->as()->declared_as_mutate) { - ret_tensor_items.emplace_back(v_param->as()->declared_type); - } - } - ret_tensor_items.emplace_back(ret_type ? ret_type : TypeExpr::new_hole()); - ret_type = TypeExpr::new_tensor(std::move(ret_tensor_items)); - } - AnyV v_body = nullptr; if (lex.tok() == tok_builtin) { @@ -1096,32 +1044,43 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector(loc, v_ident, v_param_list, v_body); - f_declaration->ret_type = ret_type ? ret_type : TypeExpr::new_hole(); - f_declaration->is_entrypoint = is_entrypoint; - f_declaration->genericsT_list = genericsT_list; - f_declaration->marked_as_get_method = is_get_method; - f_declaration->marked_as_builtin = v_body->type == ast_empty_statement; - f_declaration->accepts_self = accepts_self; - f_declaration->returns_self = returns_self; + int flags = 0; + if (is_entrypoint) { + flags |= FunctionData::flagIsEntrypoint; + } + if (is_get_method) { + flags |= FunctionData::flagGetMethod; + } + if (accepts_self) { + flags |= FunctionData::flagAcceptsSelf; + } + if (returns_self) { + flags |= FunctionData::flagReturnsSelf; + } + td::RefInt256 method_id; for (auto v_annotation : annotations) { switch (v_annotation->kind) { case AnnotationKind::inline_simple: - f_declaration->marked_as_inline = true; + flags |= FunctionData::flagInline; break; case AnnotationKind::inline_ref: - f_declaration->marked_as_inline_ref = true; + flags |= FunctionData::flagInlineRef; break; case AnnotationKind::pure: - f_declaration->marked_as_pure = true; + flags |= FunctionData::flagMarkedAsPure; break; - case AnnotationKind::method_id: + case AnnotationKind::method_id: { if (is_get_method || genericsT_list || is_entrypoint || n_mutate_params || accepts_self) { v_annotation->error("@method_id can be specified only for regular functions"); } - f_declaration->method_id = v_annotation->get_arg()->get_item(0)->as(); + auto v_int = v_annotation->get_arg()->get_item(0)->as(); + if (v_int->intval.is_null() || !v_int->intval->signed_fits_bits(32)) { + v_int->error("invalid integer constant"); + } + method_id = v_int->intval; break; + } case AnnotationKind::deprecated: // no special handling break; @@ -1131,7 +1090,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector(loc, v_ident, v_param_list, v_body, ret_type, genericsT_list, std::move(method_id), flags); } static AnyV parse_tolk_required_version(Lexer& lex) { @@ -1148,7 +1107,7 @@ static AnyV parse_tolk_required_version(Lexer& lex) { return createV(loc, semver); // semicolon is not necessary } -static AnyV parse_import_statement(Lexer& lex) { +static AnyV parse_import_directive(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_import, "`import`"); lex.check(tok_string_const, "source file name"); @@ -1158,7 +1117,7 @@ static AnyV parse_import_statement(Lexer& lex) { } auto v_str = createV(lex.cur_location(), rel_filename, 0); lex.next(); - return createV(loc, v_str); // semicolon is not necessary + return createV(loc, v_str); // semicolon is not necessary } // the main (exported) function @@ -1179,7 +1138,7 @@ AnyV parse_src_file_to_ast(const SrcFile* file) { if (!annotations.empty()) { lex.unexpected("declaration after @annotations"); } - toplevel_declarations.push_back(parse_import_statement(lex)); + toplevel_declarations.push_back(parse_import_directive(lex)); break; case tok_semicolon: if (!annotations.empty()) { diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h index 45f4c638..c8350747 100644 --- a/tolk/ast-replacer.h +++ b/tolk/ast-replacer.h @@ -85,60 +85,65 @@ class ASTReplacerInFunctionBody : public ASTReplacer { protected: using parent = ASTReplacerInFunctionBody; + // expressions + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + // statements virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } AnyExprV replace(AnyExprV v) final { switch (v->type) { case ast_empty_expression: return replace(v->as()); case ast_parenthesized_expression: return replace(v->as()); case ast_tensor: return replace(v->as()); - case ast_tensor_square: return replace(v->as()); - case ast_identifier: return replace(v->as()); + case ast_typed_tuple: return replace(v->as()); + case ast_reference: return replace(v->as()); + case ast_local_var_lhs: return replace(v->as()); + case ast_local_vars_declaration: return replace(v->as()); case ast_int_const: return replace(v->as()); case ast_string_const: return replace(v->as()); case ast_bool_const: return replace(v->as()); case ast_null_keyword: return replace(v->as()); - case ast_self_keyword: return replace(v->as()); case ast_argument: return replace(v->as()); case ast_argument_list: return replace(v->as()); + case ast_dot_access: return replace(v->as()); case ast_function_call: return replace(v->as()); - case ast_dot_method_call: return replace(v->as()); case ast_underscore: return replace(v->as()); + case ast_assign: return replace(v->as()); + case ast_set_assign: return replace(v->as()); case ast_unary_operator: return replace(v->as()); case ast_binary_operator: return replace(v->as()); case ast_ternary_operator: return replace(v->as()); - case ast_local_var: return replace(v->as()); + case ast_cast_as_operator: return replace(v->as()); default: throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::replace"); } @@ -147,17 +152,19 @@ protected: AnyV replace(AnyV v) final { switch (v->type) { case ast_empty_statement: return replace(v->as()); - case ast_return_statement: return replace(v->as()); case ast_sequence: return replace(v->as()); + case ast_return_statement: return replace(v->as()); + case ast_if_statement: return replace(v->as()); case ast_repeat_statement: return replace(v->as()); case ast_while_statement: return replace(v->as()); case ast_do_while_statement: return replace(v->as()); case ast_throw_statement: return replace(v->as()); case ast_assert_statement: return replace(v->as()); case ast_try_catch_statement: return replace(v->as()); - case ast_if_statement: return replace(v->as()); - case ast_local_vars_declaration: return replace(v->as()); - case ast_asm_body: return replace(v->as()); +#ifdef TOLK_DEBUG + case ast_asm_body: + throw UnexpectedASTNodeType(v, "ASTReplacer::replace"); +#endif default: { // be very careful, don't forget to handle all statements (not expressions) above! AnyExprV as_expr = reinterpret_cast(v); @@ -167,21 +174,22 @@ protected: } public: - void start_replacing_in_function(V v) { - replace(v->get_body()); + virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + + void start_replacing_in_function(const FunctionData* fun_ref, V v_function) { + replace(v_function->get_body()); } }; + +const std::vector& get_all_not_builtin_functions(); + template -void replace_ast_of_all_functions(const AllSrcFiles& all_files) { - for (const SrcFile* file : all_files) { - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { - if (auto v_func = v->try_as()) { - if (v_func->is_regular_function()) { - BodyReplacerT visitor; - visitor.start_replacing_in_function(v_func); - } - } +void replace_ast_of_all_functions() { + BodyReplacerT visitor; + for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + if (visitor.should_visit_function(fun_ref)) { + visitor.start_replacing_in_function(fun_ref, fun_ref->ast_root->as()); } } } diff --git a/tolk/ast-replicator.h b/tolk/ast-replicator.h new file mode 100644 index 00000000..02198adb --- /dev/null +++ b/tolk/ast-replicator.h @@ -0,0 +1,255 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" +#include "platform-utils.h" + +namespace tolk { + +class ASTReplicator { +protected: + virtual AnyV clone(AnyV v) = 0; + virtual AnyExprV clone(AnyExprV v) = 0; + virtual TypePtr clone(TypePtr) = 0; + +public: + virtual ~ASTReplicator() = default; +}; + +class ASTReplicatorFunction : public ASTReplicator { +protected: + using parent = ASTReplicatorFunction; + + std::vector clone(const std::vector& items) { + std::vector result; + result.reserve(items.size()); + for (AnyV item : items) { + result.push_back(clone(item)); + } + return result; + } + + std::vector clone(const std::vector& items) { + std::vector result; + result.reserve(items.size()); + for (AnyExprV item : items) { + result.push_back(clone(item)); + } + return result; + } + + // expressions + + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_identifier()), v->has_instantiationTs() ? clone(v->get_instantiationTs()) : nullptr); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_identifier()), clone(v->declared_type), v->is_immutable, v->marked_as_redef); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr())); + } + virtual V clone(V v) { + return createV(v->loc, v->intval, v->orig_str); + } + virtual V clone(V v) { + return createV(v->loc, v->str_val, v->modifier); + } + virtual V clone(V v) { + return createV(v->loc, v->bool_val); + } + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr()), v->passed_as_mutate); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_arguments())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_obj()), clone(v->get_identifier()), v->has_instantiationTs() ? clone(v->get_instantiationTs()) : nullptr); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_callee()), clone(v->get_arg_list())); + } + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_lhs()), clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, v->operator_name, v->tok, clone(v->get_lhs()), clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, v->operator_name, v->tok, clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, v->operator_name, v->tok, clone(v->get_lhs()), clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_when_true()), clone(v->get_when_false())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr()), clone(v->cast_to_type)); + } + + // statements + + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, v->loc_end, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_return_value())); + } + virtual V clone(V v) { + return createV(v->loc, v->is_ifnot, clone(v->get_cond()), clone(v->get_if_body()), clone(v->get_else_body())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_body())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_body())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_body()), clone(v->get_cond())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_thrown_code()), clone(v->get_thrown_arg())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_thrown_code())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_try_body()), clone(v->get_catch_expr()), clone(v->get_catch_body())); + } + virtual V clone(V v) { + return createV(v->loc, v->arg_order, v->ret_order, clone(v->get_asm_commands())); + } + + // other + + virtual V clone(V v) { + return createV(v->loc, v->name); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->substituted_type)); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, v->param_name, clone(v->declared_type), v->declared_as_mutate); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_params())); + } + + AnyExprV clone(AnyExprV v) final { + switch (v->type) { + case ast_empty_expression: return clone(v->as()); + case ast_parenthesized_expression: return clone(v->as()); + case ast_tensor: return clone(v->as()); + case ast_typed_tuple: return clone(v->as()); + case ast_reference: return clone(v->as()); + case ast_local_var_lhs: return clone(v->as()); + case ast_local_vars_declaration: return clone(v->as()); + case ast_int_const: return clone(v->as()); + case ast_string_const: return clone(v->as()); + case ast_bool_const: return clone(v->as()); + case ast_null_keyword: return clone(v->as()); + case ast_argument: return clone(v->as()); + case ast_argument_list: return clone(v->as()); + case ast_dot_access: return clone(v->as()); + case ast_function_call: return clone(v->as()); + case ast_underscore: return clone(v->as()); + case ast_assign: return clone(v->as()); + case ast_set_assign: return clone(v->as()); + case ast_unary_operator: return clone(v->as()); + case ast_binary_operator: return clone(v->as()); + case ast_ternary_operator: return clone(v->as()); + case ast_cast_as_operator: return clone(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTReplicatorFunction::clone"); + } + } + + AnyV clone(AnyV v) final { + switch (v->type) { + case ast_empty_statement: return clone(v->as()); + case ast_sequence: return clone(v->as()); + case ast_return_statement: return clone(v->as()); + case ast_if_statement: return clone(v->as()); + case ast_repeat_statement: return clone(v->as()); + case ast_while_statement: return clone(v->as()); + case ast_do_while_statement: return clone(v->as()); + case ast_throw_statement: return clone(v->as()); + case ast_assert_statement: return clone(v->as()); + case ast_try_catch_statement: return clone(v->as()); + case ast_asm_body: return clone(v->as()); + // other AST nodes that can be children of ast nodes of function body + case ast_identifier: return clone(v->as()); + case ast_instantiationT_item: return clone(v->as()); + case ast_instantiationT_list: return clone(v->as()); + case ast_parameter: return clone(v->as()); + case ast_parameter_list: return clone(v->as()); + + default: { + // be very careful, don't forget to handle all statements/other (not expressions) above! + AnyExprV as_expr = reinterpret_cast(v); + return clone(as_expr); + } + } + } + + TypePtr clone(TypePtr t) override { + return t; + } + + public: + virtual V clone_function_body(V v_function) { + return createV( + v_function->loc, + clone(v_function->get_identifier()), + clone(v_function->get_param_list()), + clone(v_function->get_body()->as()), + clone(v_function->declared_return_type), + v_function->genericsT_list, + v_function->method_id, + v_function->flags + ); + } +}; + +} // namespace tolk diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index cc91371c..4ec72cdd 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -20,6 +20,7 @@ #include "ast.h" #include "ast-visitor.h" +#include "type-system.h" #include /* @@ -31,47 +32,55 @@ namespace tolk { class ASTStringifier final : public ASTVisitor { constexpr static std::pair name_pairs[] = { - {ast_empty_statement, "ast_empty_statement"}, + {ast_identifier, "ast_identifier"}, + // expressions {ast_empty_expression, "ast_empty_expression"}, {ast_parenthesized_expression, "ast_parenthesized_expression"}, {ast_tensor, "ast_tensor"}, - {ast_tensor_square, "ast_tensor_square"}, - {ast_identifier, "ast_identifier"}, + {ast_typed_tuple, "ast_typed_tuple"}, + {ast_reference, "ast_reference"}, + {ast_local_var_lhs, "ast_local_var_lhs"}, + {ast_local_vars_declaration, "ast_local_vars_declaration"}, {ast_int_const, "ast_int_const"}, {ast_string_const, "ast_string_const"}, {ast_bool_const, "ast_bool_const"}, {ast_null_keyword, "ast_null_keyword"}, - {ast_self_keyword, "ast_self_keyword"}, {ast_argument, "ast_argument"}, {ast_argument_list, "ast_argument_list"}, + {ast_dot_access, "ast_dot_access"}, {ast_function_call, "ast_function_call"}, - {ast_dot_method_call, "ast_dot_method_call"}, - {ast_global_var_declaration, "ast_global_var_declaration"}, - {ast_constant_declaration, "ast_constant_declaration"}, {ast_underscore, "ast_underscore"}, + {ast_assign, "ast_assign"}, + {ast_set_assign, "ast_set_assign"}, {ast_unary_operator, "ast_unary_operator"}, {ast_binary_operator, "ast_binary_operator"}, {ast_ternary_operator, "ast_ternary_operator"}, - {ast_return_statement, "ast_return_statement"}, + {ast_cast_as_operator, "ast_cast_as_operator"}, + // statements + {ast_empty_statement, "ast_empty_statement"}, {ast_sequence, "ast_sequence"}, + {ast_return_statement, "ast_return_statement"}, + {ast_if_statement, "ast_if_statement"}, {ast_repeat_statement, "ast_repeat_statement"}, {ast_while_statement, "ast_while_statement"}, {ast_do_while_statement, "ast_do_while_statement"}, {ast_throw_statement, "ast_throw_statement"}, {ast_assert_statement, "ast_assert_statement"}, {ast_try_catch_statement, "ast_try_catch_statement"}, - {ast_if_statement, "ast_if_statement"}, + {ast_asm_body, "ast_asm_body"}, + // other {ast_genericsT_item, "ast_genericsT_item"}, {ast_genericsT_list, "ast_genericsT_list"}, + {ast_instantiationT_item, "ast_instantiationT_item"}, + {ast_instantiationT_list, "ast_instantiationT_list"}, {ast_parameter, "ast_parameter"}, {ast_parameter_list, "ast_parameter_list"}, - {ast_asm_body, "ast_asm_body"}, {ast_annotation, "ast_annotation"}, {ast_function_declaration, "ast_function_declaration"}, - {ast_local_var, "ast_local_var"}, - {ast_local_vars_declaration, "ast_local_vars_declaration"}, + {ast_global_var_declaration, "ast_global_var_declaration"}, + {ast_constant_declaration, "ast_constant_declaration"}, {ast_tolk_required_version, "ast_tolk_required_version"}, - {ast_import_statement, "ast_import_statement"}, + {ast_import_directive, "ast_import_directive"}, {ast_tolk_file, "ast_tolk_file"}, }; @@ -115,6 +124,13 @@ class ASTStringifier final : public ASTVisitor { switch (v->type) { case ast_identifier: return static_cast(v->as()->name); + case ast_reference: { + std::string result(v->as()->get_name()); + if (v->as()->has_instantiationTs()) { + result += specific_str(v->as()->get_instantiationTs()); + } + return result; + } case ast_int_const: return static_cast(v->as()->orig_str); case ast_string_const: @@ -123,24 +139,40 @@ class ASTStringifier final : public ASTVisitor { } else { return "\"" + static_cast(v->as()->str_val) + "\""; } - case ast_function_call: { - if (auto v_lhs = v->as()->get_called_f()->try_as()) { - return static_cast(v_lhs->name) + "()"; + case ast_bool_const: + return v->as()->bool_val ? "true" : "false"; + case ast_dot_access: { + std::string result = "." + static_cast(v->as()->get_field_name()); + if (v->as()->has_instantiationTs()) { + result += specific_str(v->as()->get_instantiationTs()); } - return {}; + return result; + } + case ast_function_call: { + std::string inner = specific_str(v->as()->get_callee()); + if (int n_args = v->as()->get_num_args()) { + return inner + "(..." + std::to_string(n_args) + ")"; + } + return inner + "()"; } - case ast_dot_method_call: - return static_cast(v->as()->method_name); case ast_global_var_declaration: return static_cast(v->as()->get_identifier()->name); case ast_constant_declaration: return static_cast(v->as()->get_identifier()->name); + case ast_assign: + return "="; + case ast_set_assign: + return static_cast(v->as()->operator_name) + "="; case ast_unary_operator: return static_cast(v->as()->operator_name); case ast_binary_operator: return static_cast(v->as()->operator_name); + case ast_cast_as_operator: + return v->as()->cast_to_type->as_human_readable(); case ast_sequence: return "↓" + std::to_string(v->as()->get_items().size()); + case ast_instantiationT_item: + return v->as()->substituted_type->as_human_readable(); case ast_if_statement: return v->as()->is_ifnot ? "ifnot" : ""; case ast_annotation: @@ -159,18 +191,27 @@ class ASTStringifier final : public ASTVisitor { } return "fun " + static_cast(v->as()->get_identifier()->name) + "(" + param_names + ")"; } - case ast_local_var: { + case ast_local_var_lhs: { std::ostringstream os; - os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); - if (auto v_ident = v->as()->get_identifier()->try_as()) { - return static_cast(v_ident->name) + ":" + os.str(); + os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); + if (v->as()->get_name().empty()) { + return "_: " + os.str(); } - return "_: " + os.str(); + return static_cast(v->as()->get_name()) + ":" + os.str(); + } + case ast_instantiationT_list: { + std::string result = "<"; + for (AnyV item : v->as()->get_items()) { + if (result.size() > 1) + result += ","; + result += item->as()->substituted_type->as_human_readable(); + } + return result + ">"; } case ast_tolk_required_version: return static_cast(v->as()->semver); - case ast_import_statement: - return static_cast(v->as()->get_file_leaf()->str_val); + case ast_import_directive: + return static_cast(v->as()->get_file_leaf()->str_val); case ast_tolk_file: return v->as()->file->rel_filename; default: @@ -203,47 +244,55 @@ public: void visit(AnyV v) override { switch (v->type) { - case ast_empty_statement: return handle_vertex(v->as()); + case ast_identifier: return handle_vertex(v->as()); + // expressions case ast_empty_expression: return handle_vertex(v->as()); case ast_parenthesized_expression: return handle_vertex(v->as()); case ast_tensor: return handle_vertex(v->as()); - case ast_tensor_square: return handle_vertex(v->as()); - case ast_identifier: return handle_vertex(v->as()); + case ast_typed_tuple: return handle_vertex(v->as()); + case ast_reference: return handle_vertex(v->as()); + case ast_local_var_lhs: return handle_vertex(v->as()); + case ast_local_vars_declaration: return handle_vertex(v->as()); case ast_int_const: return handle_vertex(v->as()); case ast_string_const: return handle_vertex(v->as()); case ast_bool_const: return handle_vertex(v->as()); case ast_null_keyword: return handle_vertex(v->as()); - case ast_self_keyword: return handle_vertex(v->as()); case ast_argument: return handle_vertex(v->as()); case ast_argument_list: return handle_vertex(v->as()); + case ast_dot_access: return handle_vertex(v->as()); case ast_function_call: return handle_vertex(v->as()); - case ast_dot_method_call: return handle_vertex(v->as()); - case ast_global_var_declaration: return handle_vertex(v->as()); - case ast_constant_declaration: return handle_vertex(v->as()); case ast_underscore: return handle_vertex(v->as()); + case ast_assign: return handle_vertex(v->as()); + case ast_set_assign: return handle_vertex(v->as()); case ast_unary_operator: return handle_vertex(v->as()); case ast_binary_operator: return handle_vertex(v->as()); case ast_ternary_operator: return handle_vertex(v->as()); - case ast_return_statement: return handle_vertex(v->as()); + case ast_cast_as_operator: return handle_vertex(v->as()); + // statements + case ast_empty_statement: return handle_vertex(v->as()); case ast_sequence: return handle_vertex(v->as()); + case ast_return_statement: return handle_vertex(v->as()); + case ast_if_statement: return handle_vertex(v->as()); case ast_repeat_statement: return handle_vertex(v->as()); case ast_while_statement: return handle_vertex(v->as()); case ast_do_while_statement: return handle_vertex(v->as()); case ast_throw_statement: return handle_vertex(v->as()); case ast_assert_statement: return handle_vertex(v->as()); case ast_try_catch_statement: return handle_vertex(v->as()); - case ast_if_statement: return handle_vertex(v->as()); + case ast_asm_body: return handle_vertex(v->as()); + // other case ast_genericsT_item: return handle_vertex(v->as()); case ast_genericsT_list: return handle_vertex(v->as()); + case ast_instantiationT_item: return handle_vertex(v->as()); + case ast_instantiationT_list: return handle_vertex(v->as()); case ast_parameter: return handle_vertex(v->as()); case ast_parameter_list: return handle_vertex(v->as()); - case ast_asm_body: return handle_vertex(v->as()); case ast_annotation: return handle_vertex(v->as()); case ast_function_declaration: return handle_vertex(v->as()); - case ast_local_var: return handle_vertex(v->as()); - case ast_local_vars_declaration: return handle_vertex(v->as()); + case ast_global_var_declaration: return handle_vertex(v->as()); + case ast_constant_declaration: return handle_vertex(v->as()); case ast_tolk_required_version: return handle_vertex(v->as()); - case ast_import_statement: return handle_vertex(v->as()); + case ast_import_directive: return handle_vertex(v->as()); case ast_tolk_file: return handle_vertex(v->as()); default: throw UnexpectedASTNodeType(v, "ASTStringifier::visit"); diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index a67f6800..a54cb13b 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -86,92 +86,103 @@ class ASTVisitorFunctionBody : public ASTVisitor { protected: using parent = ASTVisitorFunctionBody; - virtual void visit(V v) { return visit_children(v); } + // expressions virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + // statements + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } void visit(AnyV v) final { switch (v->type) { - case ast_empty_statement: return visit(v->as()); + // expressions case ast_empty_expression: return visit(v->as()); case ast_parenthesized_expression: return visit(v->as()); case ast_tensor: return visit(v->as()); - case ast_tensor_square: return visit(v->as()); - case ast_identifier: return visit(v->as()); + case ast_typed_tuple: return visit(v->as()); + case ast_reference: return visit(v->as()); + case ast_local_var_lhs: return visit(v->as()); + case ast_local_vars_declaration: return visit(v->as()); case ast_int_const: return visit(v->as()); case ast_string_const: return visit(v->as()); case ast_bool_const: return visit(v->as()); case ast_null_keyword: return visit(v->as()); - case ast_self_keyword: return visit(v->as()); case ast_argument: return visit(v->as()); case ast_argument_list: return visit(v->as()); + case ast_dot_access: return visit(v->as()); case ast_function_call: return visit(v->as()); - case ast_dot_method_call: return visit(v->as()); case ast_underscore: return visit(v->as()); + case ast_assign: return visit(v->as()); + case ast_set_assign: return visit(v->as()); case ast_unary_operator: return visit(v->as()); case ast_binary_operator: return visit(v->as()); case ast_ternary_operator: return visit(v->as()); - case ast_return_statement: return visit(v->as()); + case ast_cast_as_operator: return visit(v->as()); + // statements + case ast_empty_statement: return visit(v->as()); case ast_sequence: return visit(v->as()); + case ast_return_statement: return visit(v->as()); + case ast_if_statement: return visit(v->as()); case ast_repeat_statement: return visit(v->as()); case ast_while_statement: return visit(v->as()); case ast_do_while_statement: return visit(v->as()); case ast_throw_statement: return visit(v->as()); case ast_assert_statement: return visit(v->as()); case ast_try_catch_statement: return visit(v->as()); - case ast_if_statement: return visit(v->as()); - case ast_local_var: return visit(v->as()); - case ast_local_vars_declaration: return visit(v->as()); - case ast_asm_body: return visit(v->as()); +#ifdef TOLK_DEBUG + case ast_asm_body: + throw UnexpectedASTNodeType(v, "ASTVisitor; forgot to filter out asm functions in should_visit_function()?"); +#endif default: throw UnexpectedASTNodeType(v, "ASTVisitorFunctionBody::visit"); } } public: - virtual void start_visiting_function(V v_function) { + virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + + virtual void start_visiting_function(const FunctionData* fun_ref, V v_function) { visit(v_function->get_body()); } }; + +const std::vector& get_all_not_builtin_functions(); + template -void visit_ast_of_all_functions(const AllSrcFiles& all_files) { - for (const SrcFile* file : all_files) { - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { - if (auto v_func = v->try_as()) { - if (v_func->is_regular_function()) { - BodyVisitorT visitor; - visitor.start_visiting_function(v_func); - } - } +void visit_ast_of_all_functions() { + BodyVisitorT visitor; + for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + if (visitor.should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); } } } diff --git a/tolk/ast.cpp b/tolk/ast.cpp index 4e78b013..092260ff 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -15,8 +15,9 @@ along with TON Blockchain Library. If not, see . */ #include "ast.h" +#ifdef TOLK_DEBUG #include "ast-stringifier.h" -#include +#endif namespace tolk { @@ -104,7 +105,7 @@ int Vertex::get_mutate_params_count() const { // Therefore, there is a guarantee, that all AST mutations are done via these methods, // easily searched by usages, and there is no another way to modify any other field. -void ASTNodeExpressionBase::assign_inferred_type(TypeExpr* type) { +void ASTNodeExpressionBase::assign_inferred_type(TypePtr type) { this->inferred_type = type; } @@ -116,43 +117,79 @@ void ASTNodeExpressionBase::assign_lvalue_true() { this->is_lvalue = true; } -void Vertex::assign_sym(const Symbol* sym) { +void Vertex::assign_sym(const Symbol* sym) { this->sym = sym; } -void Vertex::assign_param_ref(const LocalVarData* self_param) { - this->param_ref = self_param; -} - void Vertex::assign_fun_ref(const FunctionData* fun_ref) { this->fun_maybe = fun_ref; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { - this->fun_ref = fun_ref; +void Vertex::assign_resolved_type(TypePtr cast_to_type) { + this->cast_to_type = cast_to_type; } void Vertex::assign_var_ref(const GlobalVarData* var_ref) { this->var_ref = var_ref; } +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + void Vertex::assign_const_ref(const GlobalConstData* const_ref) { this->const_ref = const_ref; } +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_resolved_type(TypePtr substituted_type) { + this->substituted_type = substituted_type; +} + void Vertex::assign_param_ref(const LocalVarData* param_ref) { this->param_ref = param_ref; } +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_target(const DotTarget& target) { + this->target = target; +} + void Vertex::assign_fun_ref(const FunctionData* fun_ref) { this->fun_ref = fun_ref; } -void Vertex::assign_var_ref(const Symbol* var_ref) { - this->var_maybe = var_ref; +void Vertex::assign_resolved_type(TypePtr declared_return_type) { + this->declared_return_type = declared_return_type; } -void Vertex::assign_src_file(const SrcFile* file) { +void Vertex::assign_var_ref(const LocalVarData* var_ref) { + this->var_ref = var_ref; +} + +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_src_file(const SrcFile* file) { this->file = file; } diff --git a/tolk/ast.h b/tolk/ast.h index ccc4ac58..b90507e7 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -20,7 +20,6 @@ #include "fwd-declarations.h" #include "platform-utils.h" #include "src-file.h" -#include "type-expr.h" #include "lexer.h" #include "symtable.h" @@ -65,47 +64,55 @@ namespace tolk { enum ASTNodeType { - ast_empty_statement, + ast_identifier, + // expressions ast_empty_expression, ast_parenthesized_expression, ast_tensor, - ast_tensor_square, - ast_identifier, + ast_typed_tuple, + ast_reference, + ast_local_var_lhs, + ast_local_vars_declaration, ast_int_const, ast_string_const, ast_bool_const, ast_null_keyword, - ast_self_keyword, ast_argument, ast_argument_list, + ast_dot_access, ast_function_call, - ast_dot_method_call, - ast_global_var_declaration, - ast_constant_declaration, ast_underscore, + ast_assign, + ast_set_assign, ast_unary_operator, ast_binary_operator, ast_ternary_operator, - ast_return_statement, + ast_cast_as_operator, + // statements + ast_empty_statement, ast_sequence, + ast_return_statement, + ast_if_statement, ast_repeat_statement, ast_while_statement, ast_do_while_statement, ast_throw_statement, ast_assert_statement, ast_try_catch_statement, - ast_if_statement, + ast_asm_body, + // other ast_genericsT_item, ast_genericsT_list, + ast_instantiationT_item, + ast_instantiationT_list, ast_parameter, ast_parameter_list, - ast_asm_body, ast_annotation, ast_function_declaration, - ast_local_var, - ast_local_vars_declaration, + ast_global_var_declaration, + ast_constant_declaration, ast_tolk_required_version, - ast_import_statement, + ast_import_directive, ast_tolk_file, }; @@ -144,6 +151,7 @@ struct ASTNodeBase { const SrcLocation loc; ASTNodeBase(ASTNodeType type, SrcLocation loc) : type(type), loc(loc) {} + ASTNodeBase(const ASTNodeBase&) = delete; template V as() const { @@ -171,12 +179,14 @@ struct ASTNodeBase { }; struct ASTNodeExpressionBase : ASTNodeBase { - TypeExpr* inferred_type = nullptr; // todo make it const + friend class ASTDuplicatorFunction; + + TypePtr inferred_type = nullptr; bool is_rvalue: 1 = false; bool is_lvalue: 1 = false; ASTNodeExpressionBase* mutate() const { return const_cast(this); } - void assign_inferred_type(TypeExpr* type); + void assign_inferred_type(TypePtr type); void assign_rvalue_true(); void assign_lvalue_true(); @@ -226,6 +236,8 @@ struct ASTExprVararg : ASTNodeExpressionBase { protected: std::vector children; + AnyExprV child(int i) const { return children.at(i); } + ASTExprVararg(ASTNodeType type, SrcLocation loc, std::vector children) : ASTNodeExpressionBase(type, loc), children(std::move(children)) {} @@ -254,7 +266,6 @@ struct ASTStatementVararg : ASTNodeStatementBase { protected: std::vector children; - AnyV child(int i) const { return children.at(i); } AnyExprV child_as_expr(int i) const { return reinterpret_cast(children.at(i)); } ASTStatementVararg(ASTNodeType type, SrcLocation loc, std::vector children) @@ -281,7 +292,7 @@ struct ASTOtherVararg : ASTNodeBase { protected: std::vector children; - AnyV child(int i) const { return children.at(i); } + AnyExprV child_as_expr(int i) const { return reinterpret_cast(children.at(i)); } ASTOtherVararg(ASTNodeType type, SrcLocation loc, std::vector children) : ASTNodeBase(type, loc), children(std::move(children)) {} @@ -291,21 +302,42 @@ public: bool empty() const { return children.empty(); } }; -// --------------------------------------------------------- template<> -struct Vertex final : ASTStatementVararg { - explicit Vertex(SrcLocation loc) - : ASTStatementVararg(ast_empty_statement, loc, {}) {} +// ast_identifier is "a name" in AST structure +// it's NOT a standalone expression, it's "implementation details" of other AST vertices +// example: `var x = 5` then "x" is identifier (inside local var declaration) +// example: `global g: int` then "g" is identifier +// example: `someF` is a reference, which contains identifier +// example: `someF` is a reference which contains identifier and generics instantiation +// example: `fun f()` then "f" is identifier, "" is a generics declaration +struct Vertex final : ASTOtherLeaf { + std::string_view name; // empty for underscore + + Vertex(SrcLocation loc, std::string_view name) + : ASTOtherLeaf(ast_identifier, loc) + , name(name) {} }; + +// +// --------------------------------------------------------- +// expressions +// + + template<> +// ast_empty_expression is "nothing" in context of expression, it has "unknown" type +// example: `throw 123;` then "throw arg" is empty expression (opposed to `throw (123, arg)`) struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) : ASTExprLeaf(ast_empty_expression, loc) {} }; + template<> +// ast_parenthesized_expression is something surrounded embraced by (parenthesis) +// example: `(1)`, `((f()))` (two nested) struct Vertex final : ASTExprUnary { AnyExprV get_expr() const { return child; } @@ -314,37 +346,101 @@ struct Vertex final : ASTExprUnary { }; template<> +// ast_tensor is a set of expressions embraced by (parenthesis) +// in most languages, it's called "tuple", but in TVM, "tuple" is a TVM primitive, that's why "tensor" +// example: `(1, 2)`, `(1, (2, 3))` (nested), `()` (empty tensor) +// note, that `(1)` is not a tensor, it's a parenthesized expression +// a tensor of N elements occupies N slots on a stack (opposed to TVM tuple primitive, 1 slot) struct Vertex final : ASTExprVararg { const std::vector& get_items() const { return children; } - AnyExprV get_item(int i) const { return children.at(i); } + AnyExprV get_item(int i) const { return child(i); } Vertex(SrcLocation loc, std::vector items) : ASTExprVararg(ast_tensor, loc, std::move(items)) {} }; template<> -struct Vertex final : ASTExprVararg { +// ast_typed_tuple is a set of expressions in [square brackets] +// in TVM, it's a TVM tuple, that occupies 1 slot, but the compiler knows its "typed structure" +// example: `[1, x]`, `[[0]]` (nested) +// typed tuples can be assigned to N variables, like `[one, _, three] = [1,2,3]` +struct Vertex final : ASTExprVararg { const std::vector& get_items() const { return children; } - AnyExprV get_item(int i) const { return children.at(i); } + AnyExprV get_item(int i) const { return child(i); } Vertex(SrcLocation loc, std::vector items) - : ASTExprVararg(ast_tensor_square, loc, std::move(items)) {} + : ASTExprVararg(ast_typed_tuple, loc, std::move(items)) {} }; template<> -struct Vertex final : ASTExprLeaf { - const Symbol* sym = nullptr; // always filled (after resolved); points to local / global / function / constant - std::string_view name; +// ast_reference is "something that references a symbol" +// examples: `x` / `someF` / `someF` +// it's a leaf expression from traversing point of view, but actually, has children (not expressions) +// note, that both `someF()` and `someF()` are function calls, where a callee is just a reference +struct Vertex final : ASTExprLeaf { +private: + V identifier; // its name, `x` / `someF` + V instantiationTs; // not null if ``, otherwise nullptr + +public: + const Symbol* sym = nullptr; // filled on resolve or type inferring; points to local / global / function / constant + + auto get_identifier() const { return identifier; } + bool has_instantiationTs() const { return instantiationTs != nullptr; } + auto get_instantiationTs() const { return instantiationTs; } + std::string_view get_name() const { return identifier->name; } Vertex* mutate() const { return const_cast(this); } void assign_sym(const Symbol* sym); - Vertex(SrcLocation loc, std::string_view name) - : ASTExprLeaf(ast_identifier, loc) - , name(name) {} + Vertex(SrcLocation loc, V name_identifier, V instantiationTs) + : ASTExprLeaf(ast_reference, loc) + , identifier(name_identifier), instantiationTs(instantiationTs) {} }; template<> +// ast_local_var_lhs is one variable inside `var` declaration +// example: `var x = 0;` then "x" is local var lhs +// example: `val (x: int, [y redef], _) = rhs` then "x" and "y" and "_" are +// it's a leaf from expression's point of view, though technically has an "identifier" child +struct Vertex final : ASTExprLeaf { +private: + V identifier; + +public: + const LocalVarData* var_ref = nullptr; // filled on resolve identifiers; for `redef` points to declared above; for underscore, name is empty + TypePtr declared_type; // not null for `var x: int = rhs`, otherwise nullptr + bool is_immutable; // declared via 'val', not 'var' + bool marked_as_redef; // var (existing_var redef, new_var: int) = ... + + V get_identifier() const { return identifier; } + std::string_view get_name() const { return identifier->name; } // empty for underscore + + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const LocalVarData* var_ref); + void assign_resolved_type(TypePtr declared_type); + + Vertex(SrcLocation loc, V identifier, TypePtr declared_type, bool is_immutable, bool marked_as_redef) + : ASTExprLeaf(ast_local_var_lhs, loc) + , identifier(identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} +}; + +template<> +// ast_local_vars_declaration is an expression declaring local variables on the left side of assignment +// examples: see above +// for `var (x, [y])` its expr is "tensor (local var, typed tuple (local var))" +// for assignment `var x = 5`, this node is `var x`, lhs of assignment +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } // ast_local_var_lhs / ast_tensor / ast_typed_tuple + + Vertex(SrcLocation loc, AnyExprV expr) + : ASTExprUnary(ast_local_vars_declaration, loc, expr) {} +}; + +template<> +// ast_int_const is an integer literal +// examples: `0` / `0xFF` +// note, that `-1` is unary minus of `1` int const struct Vertex final : ASTExprLeaf { td::RefInt256 intval; // parsed value, 255 for "0xFF" std::string_view orig_str; // original "0xFF"; empty for nodes generated by compiler (e.g. in constant folding) @@ -356,6 +452,10 @@ struct Vertex final : ASTExprLeaf { }; template<> +// ast_string_const is a string literal in double quotes or """ when multiline +// examples: "asdf" / "Ef8zMz..."a / "to_calc_crc32_from"c +// an optional modifier specifies how a string is parsed (probably, like an integer) +// note, that TVM doesn't have strings, it has only slices, so "hello" has type slice struct Vertex final : ASTExprLeaf { std::string_view str_val; char modifier; @@ -375,6 +475,7 @@ struct Vertex final : ASTExprLeaf { }; template<> +// ast_bool_const is either `true` or `false` struct Vertex final : ASTExprLeaf { bool bool_val; @@ -384,25 +485,20 @@ struct Vertex final : ASTExprLeaf { }; template<> +// ast_null_keyword is the `null` literal +// it should be handled with care; for instance, `null` takes special place in the type system struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) : ASTExprLeaf(ast_null_keyword, loc) {} }; template<> -struct Vertex final : ASTExprLeaf { - const LocalVarData* param_ref = nullptr; // filled after resolve identifiers, points to `self` parameter - - Vertex* mutate() const { return const_cast(this); } - void assign_param_ref(const LocalVarData* self_param); - - explicit Vertex(SrcLocation loc) - : ASTExprLeaf(ast_self_keyword, loc) {} -}; - -template<> +// ast_argument is an element of an argument list of a function/method call +// example: `f(1, x)` has 2 arguments, `t.tupleFirst()` has no arguments (though `t` is passed as `self`) +// example: `f(mutate arg)` has 1 argument with `passed_as_mutate` flag +// (without `mutate` keyword, the entity "argument" could be replaced just by "any expression") struct Vertex final : ASTExprUnary { - bool passed_as_mutate; // when called `f(mutate arg)`, not `f(arg)` + bool passed_as_mutate; AnyExprV get_expr() const { return child; } @@ -412,19 +508,57 @@ struct Vertex final : ASTExprUnary { }; template<> +// ast_argument_list contains N arguments of a function/method call struct Vertex final : ASTExprVararg { const std::vector& get_arguments() const { return children; } - auto get_arg(int i) const { return children.at(i)->as(); } + auto get_arg(int i) const { return child(i)->as(); } Vertex(SrcLocation loc, std::vector arguments) : ASTExprVararg(ast_argument_list, loc, std::move(arguments)) {} }; template<> -struct Vertex final : ASTExprBinary { - const FunctionData* fun_maybe = nullptr; // filled after resolve; remains nullptr for `localVar()` / `getF()()` +// ast_dot_access is "object before dot, identifier + optional after dot" +// examples: `tensorVar.0` / `obj.field` / `getObj().method` / `t.tupleFirst` +// from traversing point of view, it's an unary expression: only obj is expression, field name is not +// note, that `obj.method()` is a function call with "dot access `obj.method`" callee +struct Vertex final : ASTExprUnary { +private: + V identifier; // `0` / `field` / `method` + V instantiationTs; // not null if ``, otherwise nullptr - AnyExprV get_called_f() const { return lhs; } +public: + + typedef const FunctionData* DotTarget; // for `t.tupleAt` target is `tupleAt` global function + DotTarget target = nullptr; // filled at type inferring + + AnyExprV get_obj() const { return child; } + auto get_identifier() const { return identifier; } + bool has_instantiationTs() const { return instantiationTs != nullptr; } + auto get_instantiationTs() const { return instantiationTs; } + std::string_view get_field_name() const { return identifier->name; } + + Vertex* mutate() const { return const_cast(this); } + void assign_target(const DotTarget& target); + + Vertex(SrcLocation loc, AnyExprV obj, V identifier, V instantiationTs) + : ASTExprUnary(ast_dot_access, loc, obj) + , identifier(identifier), instantiationTs(instantiationTs) {} +}; + +template<> +// ast_function_call is "calling some lhs with parenthesis", lhs is arbitrary expression (callee) +// example: `globalF()` then callee is reference +// example: `globalF()` then callee is reference (with instantiation Ts filled) +// example: `local_var()` then callee is reference (points to local var, filled at resolve identifiers) +// example: `getF()()` then callee is another func call (which type is TypeDataFunCallable) +// example: `obj.method()` then callee is dot access (resolved while type inferring) +struct Vertex final : ASTExprBinary { + const FunctionData* fun_maybe = nullptr; // filled while type inferring for `globalF()` / `obj.f()`; remains nullptr for `local_var()` / `getF()()` + + AnyExprV get_callee() const { return lhs; } + bool is_dot_call() const { return lhs->type == ast_dot_access; } + AnyExprV get_dot_obj() const { return lhs->as()->get_obj(); } auto get_arg_list() const { return rhs->as(); } int get_num_args() const { return rhs->as()->size(); } auto get_arg(int i) const { return rhs->as()->get_arg(i); } @@ -437,90 +571,79 @@ struct Vertex final : ASTExprBinary { }; template<> -struct Vertex final : ASTExprBinary { - const FunctionData* fun_ref = nullptr; // points to global function (after resolve) - std::string_view method_name; - - AnyExprV get_obj() const { return lhs; } - auto get_arg_list() const { return rhs->as(); } - int get_num_args() const { return rhs->as()->size(); } - auto get_arg(int i) const { return rhs->as()->get_arg(i); } - - Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); - - Vertex(SrcLocation loc, std::string_view method_name, AnyExprV lhs, V arguments) - : ASTExprBinary(ast_dot_method_call, loc, lhs, arguments) - , method_name(method_name) {} -}; - -template<> -struct Vertex final : ASTStatementUnary { - const GlobalVarData* var_ref = nullptr; // filled after register - TypeExpr* declared_type; - - auto get_identifier() const { return child->as(); } - - Vertex* mutate() const { return const_cast(this); } - void assign_var_ref(const GlobalVarData* var_ref); - - Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type) - : ASTStatementUnary(ast_global_var_declaration, loc, name_identifier) - , declared_type(declared_type) {} -}; - -template<> -struct Vertex final : ASTStatementVararg { - const GlobalConstData* const_ref = nullptr; // filled after register - TypeExpr* declared_type; // may be nullptr - - auto get_identifier() const { return child(0)->as(); } - AnyExprV get_init_value() const { return child_as_expr(1); } - - Vertex* mutate() const { return const_cast(this); } - void assign_const_ref(const GlobalConstData* const_ref); - - Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type, AnyExprV init_value) - : ASTStatementVararg(ast_constant_declaration, loc, {name_identifier, init_value}) - , declared_type(declared_type) {} -}; - -template<> +// ast_underscore represents `_` symbol used for left side of assignment +// example: `(cs, _) = cs.loadAndReturn()` +// though it's the only correct usage, using _ as rvalue like `var x = _;` is correct from AST point of view +// note, that for declaration `var _ = 1` underscore is a regular local var declared (with empty name) +// but for `_ = 1` (not declaration) it's underscore; it's because `var _:int` is also correct struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) : ASTExprLeaf(ast_underscore, loc) {} }; template<> +// ast_assign represents assignment "lhs = rhs" +// examples: `a = 4` / `var a = 4` / `(cs, b, mode) = rhs` / `f() = g()` +// note, that `a = 4` lhs is ast_reference, `var a = 4` lhs is ast_local_vars_declaration +struct Vertex final : ASTExprBinary { + AnyExprV get_lhs() const { return lhs; } + AnyExprV get_rhs() const { return rhs; } + + explicit Vertex(SrcLocation loc, AnyExprV lhs, AnyExprV rhs) + : ASTExprBinary(ast_assign, loc, lhs, rhs) {} +}; + +template<> +// ast_set_assign represents assignment-and-set operation "lhs = rhs" +// examples: `a += 4` / `b <<= c` +struct Vertex final : ASTExprBinary { + const FunctionData* fun_ref = nullptr; // filled at type inferring, points to `_+_` built-in for += + std::string_view operator_name; // without equal sign, "+" for operator += + TokenType tok; // tok_set_* + + AnyExprV get_lhs() const { return lhs; } + AnyExprV get_rhs() const { return rhs; } + + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) + : ASTExprBinary(ast_set_assign, loc, lhs, rhs) + , operator_name(operator_name), tok(tok) {} +}; + +template<> +// ast_unary_operator is "some operator over one expression" +// examples: `-1` / `~found` struct Vertex final : ASTExprUnary { + const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; AnyExprV get_rhs() const { return child; } + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV rhs) : ASTExprUnary(ast_unary_operator, loc, rhs) , operator_name(operator_name), tok(tok) {} }; template<> +// ast_binary_operator is "some operator over two expressions" +// examples: `a + b` / `x & true` / `(a, b) << g()` +// note, that `a = b` is NOT a binary operator, it's ast_assign, also `a += b`, it's ast_set_assign struct Vertex final : ASTExprBinary { + const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; AnyExprV get_lhs() const { return lhs; } AnyExprV get_rhs() const { return rhs; } - bool is_set_assign() const { - TokenType t = tok; - return t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || - t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || - t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor; - } - - bool is_assign() const { - return tok == tok_assign; - } + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) : ASTExprBinary(ast_binary_operator, loc, lhs, rhs) @@ -528,24 +651,53 @@ struct Vertex final : ASTExprBinary { }; template<> +// ast_ternary_operator is a traditional ternary construction +// example: `cond ? a : b` struct Vertex final : ASTExprVararg { - AnyExprV get_cond() const { return children.at(0); } - AnyExprV get_when_true() const { return children.at(1); } - AnyExprV get_when_false() const { return children.at(2); } + AnyExprV get_cond() const { return child(0); } + AnyExprV get_when_true() const { return child(1); } + AnyExprV get_when_false() const { return child(2); } Vertex(SrcLocation loc, AnyExprV cond, AnyExprV when_true, AnyExprV when_false) : ASTExprVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} }; template<> -struct Vertex : ASTStatementUnary { - AnyExprV get_return_value() const { return child_as_expr(); } +// ast_cast_as_operator is explicit casting with "as" keyword +// examples: `arg as int` / `null as cell` / `t.tupleAt(2) as slice` +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } - Vertex(SrcLocation loc, AnyExprV child) - : ASTStatementUnary(ast_return_statement, loc, child) {} + TypePtr cast_to_type; + + Vertex* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr cast_to_type); + + Vertex(SrcLocation loc, AnyExprV expr, TypePtr cast_to_type) + : ASTExprUnary(ast_cast_as_operator, loc, expr) + , cast_to_type(cast_to_type) {} +}; + + +// +// --------------------------------------------------------- +// statements +// + + +template<> +// ast_empty_statement is very similar to "empty sequence" but has a special treatment +// example: `;` (just semicolon) +// example: body of `builtin` function is empty statement (not a zero sequence) +struct Vertex final : ASTStatementVararg { + explicit Vertex(SrcLocation loc) + : ASTStatementVararg(ast_empty_statement, loc, {}) {} }; template<> +// ast_sequence is "some sequence of statements" +// example: function body is a sequence +// example: do while body is a sequence struct Vertex final : ASTStatementVararg { SrcLocation loc_end; @@ -558,26 +710,61 @@ struct Vertex final : ASTStatementVararg { }; template<> +// ast_return_statement is "return something from a function" +// examples: `return a` / `return any_expr()()` / `return;` +// note, that for `return;` (without a value, meaning "void"), in AST, it's stored as empty expression +struct Vertex : ASTStatementUnary { + AnyExprV get_return_value() const { return child_as_expr(); } + bool has_return_value() const { return child->type != ast_empty_expression; } + + Vertex(SrcLocation loc, AnyExprV child) + : ASTStatementUnary(ast_return_statement, loc, child) {} +}; + +template<> +// ast_if_statement is a traditional if statement, probably followed by an else branch +// examples: `if (cond) { ... } else { ... }` / `if (cond) { ... }` +// when else branch is missing, it's stored as empty statement +// for "else if", it's just "if statement" inside a sequence of else branch +struct Vertex final : ASTStatementVararg { + bool is_ifnot; // if(!cond), to generate more optimal fift code + + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_if_body() const { return children.at(1)->as(); } + auto get_else_body() const { return children.at(2)->as(); } // always exists (when else omitted, it's empty) + + Vertex(SrcLocation loc, bool is_ifnot, AnyExprV cond, V if_body, V else_body) + : ASTStatementVararg(ast_if_statement, loc, {cond, if_body, else_body}) + , is_ifnot(is_ifnot) {} +}; + +template<> +// ast_repeat_statement is "repeat something N times" +// example: `repeat (10) { ... }` struct Vertex final : ASTStatementVararg { AnyExprV get_cond() const { return child_as_expr(0); } - auto get_body() const { return child(1)->as(); } + auto get_body() const { return children.at(1)->as(); } Vertex(SrcLocation loc, AnyExprV cond, V body) : ASTStatementVararg(ast_repeat_statement, loc, {cond, body}) {} }; template<> +// ast_while_statement is a standard "while" loop +// example: `while (x > 0) { ... }` struct Vertex final : ASTStatementVararg { AnyExprV get_cond() const { return child_as_expr(0); } - auto get_body() const { return child(1)->as(); } + auto get_body() const { return children.at(1)->as(); } Vertex(SrcLocation loc, AnyExprV cond, V body) : ASTStatementVararg(ast_while_statement, loc, {cond, body}) {} }; template<> +// ast_do_while_statement is a standard "do while" loop +// example: `do { ... } while (x > 0);` struct Vertex final : ASTStatementVararg { - auto get_body() const { return child(0)->as(); } + auto get_body() const { return children.at(0)->as(); } AnyExprV get_cond() const { return child_as_expr(1); } Vertex(SrcLocation loc, V body, AnyExprV cond) @@ -585,16 +772,21 @@ struct Vertex final : ASTStatementVararg { }; template<> +// ast_throw_statement is throwing an exception, it accepts excNo and optional arg +// examples: `throw 10` / `throw (ERR_LOW_BALANCE)` / `throw (1001, incomingAddr)` +// when thrown arg is missing, it's stored as empty expression struct Vertex final : ASTStatementVararg { AnyExprV get_thrown_code() const { return child_as_expr(0); } - AnyExprV get_thrown_arg() const { return child_as_expr(1); } // may be ast_empty bool has_thrown_arg() const { return child_as_expr(1)->type != ast_empty_expression; } + AnyExprV get_thrown_arg() const { return child_as_expr(1); } Vertex(SrcLocation loc, AnyExprV thrown_code, AnyExprV thrown_arg) : ASTStatementVararg(ast_throw_statement, loc, {thrown_code, thrown_arg}) {} }; template<> +// ast_assert_statement is "assert that cond is true, otherwise throw an exception" +// examples: `assert (balance > 0, ERR_ZERO_BALANCE)` / `assert (balance > 0) throw (ERR_ZERO_BALANCE)` struct Vertex final : ASTStatementVararg { AnyExprV get_cond() const { return child_as_expr(0); } AnyExprV get_thrown_code() const { return child_as_expr(1); } @@ -604,6 +796,10 @@ struct Vertex final : ASTStatementVararg { }; template<> +// ast_try_catch_statement is a standard try catch (finally block doesn't exist) +// example: `try { ... } catch (excNo) { ... }` +// there are two formal "arguments" of catch: excNo and arg, but both can be omitted +// when omitted, they are stored as underscores, so len of a catch tensor is always 2 struct Vertex final : ASTStatementVararg { auto get_try_body() const { return children.at(0)->as(); } auto get_catch_expr() const { return children.at(1)->as(); } // (excNo, arg), always len 2 @@ -614,29 +810,42 @@ struct Vertex final : ASTStatementVararg { }; template<> -struct Vertex final : ASTStatementVararg { - bool is_ifnot; // if(!cond), to generate more optimal fift code +// ast_asm_body is a body of `asm` function — a set of strings, and optionally stack order manipulations +// example: `fun skipMessageOp... asm "32 PUSHINT" "SDSKIPFIRST";` +// user can specify "arg order"; example: `fun store(self: builder, op: int) asm (op self)` then [1, 0] +// user can specify "ret order"; example: `fun modDiv... asm(-> 1 0) "DIVMOD";` then [1, 0] +struct Vertex final : ASTStatementVararg { + std::vector arg_order; + std::vector ret_order; - AnyExprV get_cond() const { return child_as_expr(0); } - auto get_if_body() const { return child(1)->as(); } - auto get_else_body() const { return child(2)->as(); } // always exists (when else omitted, it's empty) + const std::vector& get_asm_commands() const { return children; } // ast_string_const[] - Vertex(SrcLocation loc, bool is_ifnot, AnyExprV cond, V if_body, V else_body) - : ASTStatementVararg(ast_if_statement, loc, {cond, if_body, else_body}) - , is_ifnot(is_ifnot) {} + Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) + : ASTStatementVararg(ast_asm_body, loc, std::move(asm_commands)) + , arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} }; + +// +// --------------------------------------------------------- +// other +// + + template<> +// ast_genericsT_item is generics T at declaration +// example: `fun f` has a list of 2 generic Ts struct Vertex final : ASTOtherLeaf { - TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates std::string_view nameT; - Vertex(SrcLocation loc, TypeExpr* created_type, std::string_view nameT) + Vertex(SrcLocation loc, std::string_view nameT) : ASTOtherLeaf(ast_genericsT_item, loc) - , created_type(created_type), nameT(nameT) {} + , nameT(nameT) {} }; template<> +// ast_genericsT_list is a container for generics T at declaration +// example: see above struct Vertex final : ASTOtherVararg { std::vector get_items() const { return children; } auto get_item(int i) const { return children.at(i)->as(); } @@ -647,24 +856,55 @@ struct Vertex final : ASTOtherVararg { int lookup_idx(std::string_view nameT) const; }; + template<> +// ast_instantiationT_item is manual substitution of generic T used in code, mostly for func calls +// examples: `g()` / `t.tupleFirst()` / `f<(int, slice), builder>()` +struct Vertex final : ASTOtherLeaf { + TypePtr substituted_type; + + Vertex* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr substituted_type); + + Vertex(SrcLocation loc, TypePtr substituted_type) + : ASTOtherLeaf(ast_instantiationT_item, loc) + , substituted_type(substituted_type) {} +}; + +template<> +// ast_instantiationT_list is a container for generic T substitutions used in code +// examples: see above +struct Vertex final : ASTOtherVararg { + std::vector get_items() const { return children; } + auto get_item(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector instantiationTs) + : ASTOtherVararg(ast_instantiationT_list, loc, std::move(instantiationTs)) {} +}; + +template<> +// ast_parameter is a parameter of a function in its declaration +// example: `fun f(a: int, mutate b: slice)` has 2 parameters struct Vertex final : ASTOtherLeaf { - const LocalVarData* param_ref = nullptr; // filled after resolved + const LocalVarData* param_ref = nullptr; // filled on resolve identifiers std::string_view param_name; - TypeExpr* declared_type; - bool declared_as_mutate; // declared as `mutate param_name` + TypePtr declared_type; + bool declared_as_mutate; // declared as `mutate param_name` bool is_underscore() const { return param_name.empty(); } Vertex* mutate() const { return const_cast(this); } void assign_param_ref(const LocalVarData* param_ref); + void assign_resolved_type(TypePtr declared_type); - Vertex(SrcLocation loc, std::string_view param_name, TypeExpr* declared_type, bool declared_as_mutate) + Vertex(SrcLocation loc, std::string_view param_name, TypePtr declared_type, bool declared_as_mutate) : ASTOtherLeaf(ast_parameter, loc) , param_name(param_name), declared_type(declared_type), declared_as_mutate(declared_as_mutate) {} }; template<> +// ast_parameter_list is a container of parameters +// example: see above struct Vertex final : ASTOtherVararg { const std::vector& get_params() const { return children; } auto get_param(int i) const { return children.at(i)->as(); } @@ -678,22 +918,12 @@ struct Vertex final : ASTOtherVararg { }; template<> -struct Vertex final : ASTStatementVararg { - std::vector arg_order; - std::vector ret_order; - - const std::vector& get_asm_commands() const { return children; } // ast_string_const[] - - Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) - : ASTStatementVararg(ast_asm_body, loc, std::move(asm_commands)) - , arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} -}; - -template<> +// ast_annotation is @annotation above a declaration +// example: `@pure fun ...` struct Vertex final : ASTOtherVararg { AnnotationKind kind; - auto get_arg() const { return child(0)->as(); } + auto get_arg() const { return children.at(0)->as(); } static AnnotationKind parse_kind(std::string_view name); @@ -703,63 +933,79 @@ struct Vertex final : ASTOtherVararg { }; template<> -struct Vertex final : ASTExprUnary { - const Symbol* var_maybe = nullptr; // typically local var; can be global var if `var g_v redef`; remains nullptr for underscore - TypeExpr* declared_type; - bool is_immutable; // declared via 'val', not 'var' - bool marked_as_redef; // var (existing_var redef, new_var: int) = ... - - AnyExprV get_identifier() const { return child; } // ast_identifier / ast_underscore - - Vertex* mutate() const { return const_cast(this); } - void assign_var_ref(const Symbol* var_ref); - - Vertex(SrcLocation loc, AnyExprV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef) - : ASTExprUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} -}; - -template<> -struct Vertex final : ASTStatementVararg { - AnyExprV get_lhs() const { return child_as_expr(0); } // ast_local_var / ast_tensor / ast_tensor_square - AnyExprV get_assigned_val() const { return child_as_expr(1); } - - Vertex(SrcLocation loc, AnyExprV lhs, AnyExprV assigned_val) - : ASTStatementVararg(ast_local_vars_declaration, loc, {lhs, assigned_val}) {} -}; - -template<> +// ast_function_declaration is declaring a function/method +// methods are still global functions, just accepting "self" first parameter +// example: `fun f() { ... }` +// functions can be generic, `fun f(params) { ... }` +// their body is either sequence (regular code function), or `asm`, or `builtin` struct Vertex final : ASTOtherVararg { - auto get_identifier() const { return child(0)->as(); } - int get_num_params() const { return child(1)->as()->size(); } - auto get_param_list() const { return child(1)->as(); } - auto get_param(int i) const { return child(1)->as()->get_param(i); } - AnyV get_body() const { return child(2); } // ast_sequence / ast_asm_body + auto get_identifier() const { return children.at(0)->as(); } + int get_num_params() const { return children.at(1)->as()->size(); } + auto get_param_list() const { return children.at(1)->as(); } + auto get_param(int i) const { return children.at(1)->as()->get_param(i); } + AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body const FunctionData* fun_ref = nullptr; // filled after register - TypeExpr* ret_type = nullptr; - V genericsT_list = nullptr; - bool is_entrypoint = false; - bool marked_as_pure = false; - bool marked_as_builtin = false; - bool marked_as_get_method = false; - bool marked_as_inline = false; - bool marked_as_inline_ref = false; - bool accepts_self = false; - bool returns_self = false; - V method_id = nullptr; + TypePtr declared_return_type; // filled at ast parsing; if unspecified (nullptr), means "auto infer" + V genericsT_list; // for non-generics it's nullptr + td::RefInt256 method_id; // specified via @method_id annotation + int flags; // from enum in FunctionData bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } - bool is_regular_function() const { return children.at(2)->type == ast_sequence; } - bool is_builtin_function() const { return marked_as_builtin; } + bool is_code_function() const { return children.at(2)->type == ast_sequence; } + bool is_builtin_function() const { return children.at(2)->type == ast_empty_statement; } Vertex* mutate() const { return const_cast(this); } void assign_fun_ref(const FunctionData* fun_ref); + void assign_resolved_type(TypePtr declared_return_type); - Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body) - : ASTOtherVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {} + Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body, TypePtr declared_return_type, V genericsT_list, td::RefInt256 method_id, int flags) + : ASTOtherVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) + , declared_return_type(declared_return_type), genericsT_list(genericsT_list), method_id(std::move(method_id)), flags(flags) {} }; template<> +// ast_global_var_declaration is declaring a global var, outside a function +// example: `global g: int;` +// note, that globals don't have default values, since there is no single "entrypoint" for a contract +struct Vertex final : ASTOtherVararg { + const GlobalVarData* var_ref = nullptr; // filled after register + TypePtr declared_type; // filled always, typing globals is mandatory + + auto get_identifier() const { return children.at(0)->as(); } + + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const GlobalVarData* var_ref); + void assign_resolved_type(TypePtr declared_type); + + Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type) + : ASTOtherVararg(ast_global_var_declaration, loc, {name_identifier}) + , declared_type(declared_type) {} +}; + +template<> +// ast_constant_declaration is declaring a global constant, outside a function +// example: `const op = 0x123;` +struct Vertex final : ASTOtherVararg { + const GlobalConstData* const_ref = nullptr; // filled after register + TypePtr declared_type; // not null for `const op: int = ...` + + auto get_identifier() const { return children.at(0)->as(); } + AnyExprV get_init_value() const { return child_as_expr(1); } + + Vertex* mutate() const { return const_cast(this); } + void assign_const_ref(const GlobalConstData* const_ref); + void assign_resolved_type(TypePtr declared_type); + + Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type, AnyExprV init_value) + : ASTOtherVararg(ast_constant_declaration, loc, {name_identifier, init_value}) + , declared_type(declared_type) {} +}; + +template<> +// ast_tolk_required_version is a preamble fixating compiler's version at the top of the file +// example: `tolk 0.6` +// when compiler version mismatches, it means, that another compiler was earlier for that sources, a warning is emitted struct Vertex final : ASTOtherLeaf { std::string_view semver; @@ -769,21 +1015,27 @@ struct Vertex final : ASTOtherLeaf { }; template<> -struct Vertex final : ASTOtherVararg { - const SrcFile* file = nullptr; // assigned after imports have been resolved +// ast_import_directive is an import at the top of the file +// examples: `import "another.tolk"` / `import "@stdlib/tvm-dicts"` +struct Vertex final : ASTOtherVararg { + const SrcFile* file = nullptr; // assigned after imports have been resolved, just after parsing a file to ast - auto get_file_leaf() const { return child(0)->as(); } + auto get_file_leaf() const { return children.at(0)->as(); } - std::string get_file_name() const { return static_cast(child(0)->as()->str_val); } + std::string get_file_name() const { return static_cast(children.at(0)->as()->str_val); } Vertex* mutate() const { return const_cast(this); } void assign_src_file(const SrcFile* file); Vertex(SrcLocation loc, V file_name) - : ASTOtherVararg(ast_import_statement, loc, {file_name}) {} + : ASTOtherVararg(ast_import_directive, loc, {file_name}) {} }; template<> +// ast_tolk_file represents a whole parsed input .tolk file +// with functions, constants, etc. +// particularly, it contains imports that lead to loading other files +// a whole program consists of multiple parsed files, each of them has a parsed ast tree (stdlib is also parsed) struct Vertex final : ASTOtherVararg { const SrcFile* const file; diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 01b096da..68c3b998 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -16,6 +16,8 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" +#include "generics-helpers.h" namespace tolk { using namespace std::literals::string_literals; @@ -23,46 +25,33 @@ using namespace std::literals::string_literals; // given func_type = `(slice, int) -> slice` and func flags, create SymLocalVarOrParameter // currently (see at the bottom) parameters of built-in functions are unnamed: // built-in functions are created using a resulting type -static std::vector define_builtin_parameters(const TypeExpr* func_type, int func_flags) { +static std::vector define_builtin_parameters(const std::vector& params_types, int func_flags) { // `loadInt()`, `storeInt()`: they accept `self` and mutate it; no other options available in built-ins for now bool is_mutate_self = func_flags & FunctionData::flagHasMutateParams; - // func_type a map (params_type -> ret_type), probably surrounded by forall (internal representation of ) - TypeExpr* params_type = func_type->constr == TypeExpr::te_ForAll ? func_type->args[0]->args[0] : func_type->args[0]; std::vector parameters; + parameters.reserve(params_types.size()); - if (params_type->constr == TypeExpr::te_Tensor) { // multiple parameters: it's a tensor - parameters.reserve(params_type->args.size()); - for (int i = 0; i < static_cast(params_type->args.size()); ++i) { - LocalVarData p_sym("", {}, i, params_type->args[i]); - if (i == 0 && is_mutate_self) { - p_sym.flags |= LocalVarData::flagMutateParameter; - } - parameters.push_back(std::move(p_sym)); - } - } else { // single parameter - LocalVarData p_sym("", {}, 0, params_type); - if (is_mutate_self) { - p_sym.flags |= LocalVarData::flagMutateParameter; - } + for (int i = 0; i < static_cast(params_types.size()); ++i) { + LocalVarData p_sym("", {}, params_types[i], (i == 0 && is_mutate_self) * LocalVarData::flagMutateParameter, i); parameters.push_back(std::move(p_sym)); } return parameters; } -static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) { - auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func)); +static void define_builtin_func(const std::string& name, const std::vector& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const simple_compile_func_t& func, int flags) { + auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(func), nullptr); G.symtable.add_function(f_sym); } -static void define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) { - auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(make_simple_compile(macro))); +static void define_builtin_func(const std::string& name, const std::vector& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const AsmOp& macro, int flags) { + auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(make_simple_compile(macro)), nullptr); G.symtable.add_function(f_sym); } -static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags, +static void define_builtin_func(const std::string& name, const std::vector& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const simple_compile_func_t& func, int flags, std::initializer_list arg_order, std::initializer_list ret_order) { - auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func)); + auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(func), nullptr); f_sym->arg_order = arg_order; f_sym->ret_order = ret_order; G.symtable.add_function(f_sym); @@ -1080,128 +1069,169 @@ AsmOp compile_is_null(std::vector& res, std::vector& args, S void define_builtins() { using namespace std::placeholders; - TypeExpr* Unit = TypeExpr::new_unit(); - TypeExpr* Int = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* Slice = TypeExpr::new_atomic(TypeExpr::_Slice); - TypeExpr* Builder = TypeExpr::new_atomic(TypeExpr::_Builder); - TypeExpr* Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple); - TypeExpr* Int2 = TypeExpr::new_tensor({Int, Int}); - TypeExpr* Int3 = TypeExpr::new_tensor({Int, Int, Int}); - TypeExpr* TupleInt = TypeExpr::new_tensor({Tuple, Int}); - TypeExpr* SliceInt = TypeExpr::new_tensor({Slice, Int}); - TypeExpr* X = TypeExpr::new_var(0); - TypeExpr* arith_bin_op = TypeExpr::new_map(Int2, Int); - TypeExpr* arith_un_op = TypeExpr::new_map(Int, Int); - TypeExpr* impure_un_op = TypeExpr::new_map(Int, Unit); - TypeExpr* fetch_int_op_mutate = TypeExpr::new_map(SliceInt, SliceInt); - TypeExpr* prefetch_int_op = TypeExpr::new_map(SliceInt, Int); - TypeExpr* store_int_mutate = TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), TypeExpr::new_tensor({Builder, Unit})); - TypeExpr* fetch_slice_op_mutate = TypeExpr::new_map(SliceInt, TypeExpr::new_tensor({Slice, Slice})); - TypeExpr* prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice); - TypeExpr* throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); + TypePtr Unit = TypeDataVoid::create(); + TypePtr Int = TypeDataInt::create(); + TypePtr Slice = TypeDataSlice::create(); + TypePtr Builder = TypeDataBuilder::create(); + TypePtr Tuple = TypeDataTuple::create(); - define_builtin_func("_+_", arith_bin_op, compile_add, + std::vector itemsT; + itemsT.emplace_back("T"); + TypePtr typeT = TypeDataGenericT::create("T"); + const GenericsDeclaration* declGenericT = new GenericsDeclaration(std::move(itemsT)); + + std::vector ParamsInt1 = {Int}; + std::vector ParamsInt2 = {Int, Int}; + std::vector ParamsInt3 = {Int, Int, Int}; + std::vector ParamsSliceInt = {Slice, Int}; + + define_builtin_func("_+_", ParamsInt2, Int, nullptr, + compile_add, FunctionData::flagMarkedAsPure); - define_builtin_func("_-_", arith_bin_op, compile_sub, + define_builtin_func("_-_", ParamsInt2, Int, nullptr, + compile_sub, FunctionData::flagMarkedAsPure); - define_builtin_func("-_", arith_un_op, compile_unary_minus, + define_builtin_func("-_", ParamsInt1, Int, nullptr, + compile_unary_minus, FunctionData::flagMarkedAsPure); - define_builtin_func("+_", arith_un_op, compile_unary_plus, + define_builtin_func("+_", ParamsInt1, Int, nullptr, + compile_unary_plus, FunctionData::flagMarkedAsPure); - define_builtin_func("_*_", arith_bin_op, compile_mul, + define_builtin_func("_*_", ParamsInt2, Int, nullptr, + compile_mul, FunctionData::flagMarkedAsPure); - define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), + define_builtin_func("_/_", ParamsInt2, Int, nullptr, + std::bind(compile_div, _1, _2, _3, -1), FunctionData::flagMarkedAsPure); - define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0), + define_builtin_func("_~/_", ParamsInt2, Int, nullptr, + std::bind(compile_div, _1, _2, _3, 0), FunctionData::flagMarkedAsPure); - define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1), + define_builtin_func("_^/_", ParamsInt2, Int, nullptr, + std::bind(compile_div, _1, _2, _3, 1), FunctionData::flagMarkedAsPure); - define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), + define_builtin_func("_%_", ParamsInt2, Int, nullptr, + std::bind(compile_mod, _1, _2, _3, -1), FunctionData::flagMarkedAsPure); - define_builtin_func("_<<_", arith_bin_op, compile_lshift, + define_builtin_func("_<<_", ParamsInt2, Int, nullptr, + compile_lshift, FunctionData::flagMarkedAsPure); - define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), + define_builtin_func("_>>_", ParamsInt2, Int, nullptr, + std::bind(compile_rshift, _1, _2, _3, -1), FunctionData::flagMarkedAsPure); - define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0), + define_builtin_func("_~>>_", ParamsInt2, Int, nullptr, + std::bind(compile_rshift, _1, _2, _3, 0), FunctionData::flagMarkedAsPure); - define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1), + define_builtin_func("_^>>_", ParamsInt2, Int, nullptr, + std::bind(compile_rshift, _1, _2, _3, 1), FunctionData::flagMarkedAsPure); - define_builtin_func("!_", arith_un_op, compile_logical_not, + define_builtin_func("!_", ParamsInt1, Int, nullptr, + compile_logical_not, FunctionData::flagMarkedAsPure); - define_builtin_func("~_", arith_un_op, compile_bitwise_not, + define_builtin_func("~_", ParamsInt1, Int, nullptr, + compile_bitwise_not, FunctionData::flagMarkedAsPure); - define_builtin_func("_&_", arith_bin_op, compile_bitwise_and, + define_builtin_func("_&_", ParamsInt2, Int, nullptr, + compile_bitwise_and, FunctionData::flagMarkedAsPure); - define_builtin_func("_|_", arith_bin_op, compile_bitwise_or, + define_builtin_func("_|_", ParamsInt2, Int, nullptr, + compile_bitwise_or, FunctionData::flagMarkedAsPure); - define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor, + define_builtin_func("_^_", ParamsInt2, Int, nullptr, + compile_bitwise_xor, FunctionData::flagMarkedAsPure); - define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2), + define_builtin_func("_==_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 2), FunctionData::flagMarkedAsPure); - define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5), + define_builtin_func("_!=_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 5), FunctionData::flagMarkedAsPure); - define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4), + define_builtin_func("_<_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 4), FunctionData::flagMarkedAsPure); - define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1), + define_builtin_func("_>_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 1), FunctionData::flagMarkedAsPure); - define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6), + define_builtin_func("_<=_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 6), FunctionData::flagMarkedAsPure); - define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3), + define_builtin_func("_>=_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 3), FunctionData::flagMarkedAsPure); - define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7), + define_builtin_func("_<=>_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 7), FunctionData::flagMarkedAsPure); - define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1), + define_builtin_func("mulDivFloor", ParamsInt3, Int, nullptr, + std::bind(compile_muldiv, _1, _2, _3, -1), FunctionData::flagMarkedAsPure); - define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0), + define_builtin_func("mulDivRound", ParamsInt3, Int, nullptr, + std::bind(compile_muldiv, _1, _2, _3, 0), FunctionData::flagMarkedAsPure); - define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1), + define_builtin_func("mulDivCeil", ParamsInt3, Int, nullptr, + std::bind(compile_muldiv, _1, _2, _3, 1), FunctionData::flagMarkedAsPure); - define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2), + define_builtin_func("mulDivMod", ParamsInt3, TypeDataTensor::create({Int, Int}), nullptr, + AsmOp::Custom("MULDIVMOD", 3, 2), FunctionData::flagMarkedAsPure); - define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true), + define_builtin_func("__true", {}, Int, nullptr, /* AsmOp::Const("TRUE") */ + std::bind(compile_bool_const, _1, _2, true), FunctionData::flagMarkedAsPure); - define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false), + define_builtin_func("__false", {}, Int, nullptr, /* AsmOp::Const("FALSE") */ + std::bind(compile_bool_const, _1, _2, false), FunctionData::flagMarkedAsPure); - define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL"), + define_builtin_func("__null", {}, typeT, declGenericT, + AsmOp::Const("PUSHNULL"), FunctionData::flagMarkedAsPure); - define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null, + define_builtin_func("__isNull", {typeT}, Int, declGenericT, + compile_is_null, FunctionData::flagMarkedAsPure); - define_builtin_func("__throw", impure_un_op, compile_throw, + define_builtin_func("__throw", ParamsInt1, Unit, nullptr, + compile_throw, 0); - define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, + define_builtin_func("__throw_arg", {typeT, Int}, Unit, declGenericT, + compile_throw_arg, 0); - define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), compile_throw_if_unless, + define_builtin_func("__throw_if_unless", ParamsInt3, Unit, nullptr, + compile_throw_if_unless, 0); - define_builtin_func("loadInt", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, true), + define_builtin_func("loadInt", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, true, true), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, {}, {1, 0}); - define_builtin_func("loadUint", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, false), + define_builtin_func("loadUint", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, true, false), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, {}, {1, 0}); - define_builtin_func("loadBits", fetch_slice_op_mutate, std::bind(compile_fetch_slice, _1, _2, true), + define_builtin_func("loadBits", ParamsSliceInt, Slice, nullptr, + std::bind(compile_fetch_slice, _1, _2, true), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, {}, {1, 0}); - define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true), + define_builtin_func("preloadInt", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, false, true), FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); - define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false), + define_builtin_func("preloadUint", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, false, false), FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); - define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false), + define_builtin_func("preloadBits", ParamsSliceInt, Slice, nullptr, + std::bind(compile_fetch_slice, _1, _2, false), FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); - define_builtin_func("storeInt", store_int_mutate, std::bind(compile_store_int, _1, _2, true), + define_builtin_func("storeInt", {Builder, Int, Int}, Unit, nullptr, + std::bind(compile_store_int, _1, _2, true), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, {1, 0, 2}, {}); - define_builtin_func("storeUint", store_int_mutate, std::bind(compile_store_int, _1, _2, false), + define_builtin_func("storeUint", {Builder, Int, Int}, Unit, nullptr, + std::bind(compile_store_int, _1, _2, false), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, {1, 0, 2}, {}); - define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at, + define_builtin_func("tupleAt", {Tuple, Int}, typeT, declGenericT, + compile_tuple_at, FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); - define_builtin_func("debugPrint", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + define_builtin_func("debugPrint", {typeT}, Unit, declGenericT, AsmOp::Custom("s0 DUMP DROP", 1, 1), 0); - define_builtin_func("debugPrintString", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + define_builtin_func("debugPrintString", {typeT}, Unit, declGenericT, AsmOp::Custom("STRDUMP DROP", 1, 1), 0); - define_builtin_func("debugDumpStack", TypeExpr::new_map(Unit, Unit), + define_builtin_func("debugDumpStack", {}, Unit, nullptr, AsmOp::Custom("DUMPSTK", 0, 0), 0); } diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 0529696f..3830f7ae 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" namespace tolk { @@ -345,16 +346,16 @@ bool Op::generate_code_step(Stack& stack) { if (f_sym->is_asm_function() || f_sym->is_builtin_function()) { // TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly) std::vector args0, res; - TypeExpr* func_type = f_sym->full_type; - TypeExpr::remove_indirect(func_type); - tolk_assert(func_type->is_map()); - auto wr = func_type->args.at(0)->get_width(); - auto wl = func_type->args.at(1)->get_width(); - tolk_assert(wl >= 0 && wr >= 0); - for (int i = 0; i < wl; i++) { + int w_arg = 0; + for (const LocalVarData& param : f_sym->parameters) { + w_arg += param.declared_type->calc_width_on_stack(); + } + int w_ret = f_sym->inferred_return_type->calc_width_on_stack(); + tolk_assert(w_ret >= 0 && w_arg >= 0); + for (int i = 0; i < w_ret; i++) { res.emplace_back(0); } - for (int i = 0; i < wr; i++) { + for (int i = 0; i < w_arg; i++) { args0.emplace_back(0); } if (f_sym->is_asm_function()) { @@ -456,14 +457,12 @@ bool Op::generate_code_step(Stack& stack) { right1.push_back(arg.idx); } } - } else if (arg_order) { - for (int i = 0; i < (int)right.size(); i++) { - right1.push_back(right.at(arg_order->at(i))); - } } else { + tolk_assert(!arg_order); right1 = right; } std::vector last; + last.reserve(right1.size()); for (var_idx_t x : right1) { last.push_back(var_info[x] && var_info[x]->is_last()); } @@ -489,7 +488,7 @@ bool Op::generate_code_step(Stack& stack) { }; if (cl == _CallInd) { exec_callxargs((int)right.size() - 1, (int)left.size()); - } else if (!f_sym->is_regular_function()) { + } else if (!f_sym->is_code_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { @@ -503,7 +502,7 @@ bool Op::generate_code_step(Stack& stack) { } else { if (f_sym->is_inline() || f_sym->is_inline_ref()) { stack.o << AsmOp::Custom(f_sym->name + " INLINECALLDICT", (int)right.size(), (int)left.size()); - } else if (f_sym->is_regular_function() && std::get(f_sym->body)->code->require_callxargs) { + } else if (f_sym->is_code_function() && std::get(f_sym->body)->code->require_callxargs) { stack.o << AsmOp::Custom(f_sym->name + (" PREPAREDICT"), 0, 2); exec_callxargs((int)right.size() + 1, (int)left.size()); } else { diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp index 14d064d9..66fad844 100644 --- a/tolk/compiler-state.cpp +++ b/tolk/compiler-state.cpp @@ -66,4 +66,8 @@ void CompilerSettings::parse_experimental_options_cmd_arg(const std::string& cmd } } +const std::vector& get_all_not_builtin_functions() { + return G.all_functions; +} + } // namespace tolk diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index 56158876..d33eec81 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -95,7 +95,7 @@ struct CompilerState { GlobalSymbolTable symtable; PersistentHeapAllocator persistent_mem; - std::vector all_code_functions; + std::vector all_functions; // all user-defined (not built-in) functions, with generic instantiations std::vector all_get_methods; std::vector all_global_vars; std::vector all_constants; diff --git a/tolk/constant-evaluator.cpp b/tolk/constant-evaluator.cpp index 73c80b9a..9ad27381 100644 --- a/tolk/constant-evaluator.cpp +++ b/tolk/constant-evaluator.cpp @@ -248,15 +248,19 @@ struct ConstantEvaluator { return ConstantValue::from_int(std::move(intval)); } - static ConstantValue handle_identifier(V v) { + static ConstantValue handle_reference(V v) { // todo better handle "appears, directly or indirectly, in its own initializer" - const Symbol* sym = lookup_global_symbol(v->name); + std::string_view name = v->get_name(); + const Symbol* sym = lookup_global_symbol(name); if (!sym) { - v->error("undefined symbol `" + static_cast(v->name) + "`"); + v->error("undefined symbol `" + static_cast(name) + "`"); } const GlobalConstData* const_ref = sym->try_as(); if (!const_ref) { - v->error("symbol `" + static_cast(v->name) + "` is not a constant"); + v->error("symbol `" + static_cast(name) + "` is not a constant"); + } + if (v->has_instantiationTs()) { // SOME_CONST + v->error("constant is not a generic"); } return {const_ref->value}; } @@ -274,8 +278,8 @@ struct ConstantEvaluator { if (auto v_binop = v->try_as()) { return handle_binary_operator(v_binop, visit(v_binop->get_lhs()), visit(v_binop->get_rhs())); } - if (auto v_ident = v->try_as()) { - return handle_identifier(v_ident); + if (auto v_ref = v->try_as()) { + return handle_reference(v_ref); } if (auto v_par = v->try_as()) { return visit(v_par->get_expr()); diff --git a/tolk/fwd-declarations.h b/tolk/fwd-declarations.h index d2197e66..e3599f36 100644 --- a/tolk/fwd-declarations.h +++ b/tolk/fwd-declarations.h @@ -32,7 +32,8 @@ struct FunctionData; struct GlobalVarData; struct GlobalConstData; -struct TypeExpr; +class TypeData; +using TypePtr = const TypeData*; struct SrcFile; diff --git a/tolk/generics-helpers.cpp b/tolk/generics-helpers.cpp new file mode 100644 index 00000000..3d353cc4 --- /dev/null +++ b/tolk/generics-helpers.cpp @@ -0,0 +1,272 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "generics-helpers.h" +#include "tolk.h" +#include "ast.h" +#include "ast-replicator.h" +#include "type-system.h" +#include "compiler-state.h" +#include "pipeline.h" + +namespace tolk { + +// given orig = "(int, T)" and substitutions = [slice], return "(int, slice)" +static TypePtr replace_genericT_with_deduced(TypePtr orig, const GenericsDeclaration* genericTs, const std::vector& substitutionTs) { + if (!orig || !orig->has_genericT_inside()) { + return orig; + } + tolk_assert(genericTs->size() == substitutionTs.size()); + + return orig->replace_children_custom([genericTs, substitutionTs](TypePtr child) { + if (const TypeDataGenericT* asT = child->try_as()) { + int idx = genericTs->find_nameT(asT->nameT); + if (idx == -1) { + throw Fatal("can not replace generic " + asT->nameT); + } + return substitutionTs[idx]; + } + return child; + }); +} + +// purpose: having `f(value: T)` and call `f(5)`, deduce T = int +// generally, there may be many generic Ts for declaration, and many arguments +// for every argument, `consider_next_condition()` is called +// example: `f(a: int, b: T1, c: (T1, T2))` and call `f(6, 7, (8, cs))` +// - `a` does not affect, it doesn't depend on generic Ts +// - next condition: param_type = `T1`, arg_type = `int`, deduce T1 = int +// - next condition: param_type = `(T1, T2)`, arg_type = `(int, slice)`, deduce T1 = int, T2 = slice +// for call `f(6, cs, (8, cs))` T1 will be both `slice` and `int`, fired an error +class GenericSubstitutionsDeduceForFunctionCall final { + const FunctionData* fun_ref; + std::vector substitutions; + + void provideDeducedT(const std::string& nameT, TypePtr deduced) { + if (deduced == TypeDataNullLiteral::create() || deduced->has_unknown_inside()) { + return; // just 'null' doesn't give sensible info + } + + int idx = fun_ref->genericTs->find_nameT(nameT); + if (substitutions[idx] == nullptr) { + substitutions[idx] = deduced; + } else if (substitutions[idx] != deduced) { + throw std::runtime_error(nameT + " is both " + substitutions[idx]->as_human_readable() + " and " + deduced->as_human_readable()); + } + } + +public: + explicit GenericSubstitutionsDeduceForFunctionCall(const FunctionData* fun_ref) + : fun_ref(fun_ref) { + substitutions.resize(fun_ref->genericTs->size()); // filled with nullptr (nothing deduced) + } + + void consider_next_condition(TypePtr param_type, TypePtr arg_type) { + if (const auto* asT = param_type->try_as()) { + // `(arg: T)` called as `f([1, 2])` => T is [int, int] + provideDeducedT(asT->nameT, arg_type); + } else if (const auto* p_tensor = param_type->try_as()) { + // `arg: (int, T)` called as `f((5, cs))` => T is slice + if (const auto* a_tensor = arg_type->try_as(); a_tensor && a_tensor->size() == p_tensor->size()) { + for (int i = 0; i < a_tensor->size(); ++i) { + consider_next_condition(p_tensor->items[i], a_tensor->items[i]); + } + } + } else if (const auto* p_tuple = param_type->try_as()) { + // `arg: [int, T]` called as `f([5, cs])` => T is slice + if (const auto* a_tuple = arg_type->try_as(); a_tuple && a_tuple->size() == p_tuple->size()) { + for (int i = 0; i < a_tuple->size(); ++i) { + consider_next_condition(p_tuple->items[i], a_tuple->items[i]); + } + } + } else if (const auto* p_callable = param_type->try_as()) { + // `arg: fun(TArg) -> TResult` called as `f(calcTupleLen)` => TArg is tuple, TResult is int + if (const auto* a_callable = arg_type->try_as(); a_callable && a_callable->params_size() == p_callable->params_size()) { + for (int i = 0; i < a_callable->params_size(); ++i) { + consider_next_condition(p_callable->params_types[i], a_callable->params_types[i]); + } + consider_next_condition(p_callable->return_type, a_callable->return_type); + } + } + } + + int get_first_not_deduced_idx() const { + for (int i = 0; i < static_cast(substitutions.size()); ++i) { + if (substitutions[i] == nullptr) { + return i; + } + } + return -1; + } + + std::vector flush() { + return {std::move(substitutions)}; + } +}; + +// clone the body of `f` replacing T everywhere with a substitution +// before: `fun f(v: T) { var cp: [T] = [v]; }` +// after: `fun f(v: int) { var cp: [int] = [v]; }` +// an instantiated function becomes a deep copy, all AST nodes are copied, no previous pointers left +class GenericFunctionReplicator final : public ASTReplicatorFunction { + const GenericsDeclaration* genericTs; + const std::vector& substitutionTs; + +protected: + using ASTReplicatorFunction::clone; + + TypePtr clone(TypePtr t) override { + return replace_genericT_with_deduced(t, genericTs, substitutionTs); + } + +public: + GenericFunctionReplicator(const GenericsDeclaration* genericTs, const std::vector& substitutionTs) + : genericTs(genericTs) + , substitutionTs(substitutionTs) { + } + + V clone_function_body(V v_function) override { + return createV( + v_function->loc, + clone(v_function->get_identifier()), + clone(v_function->get_param_list()), + clone(v_function->get_body()), + clone(v_function->declared_return_type), + nullptr, // a newly-created function is not generic + v_function->method_id, + v_function->flags + ); + } +}; + +std::string GenericsDeclaration::as_human_readable() const { + std::string result = "<"; + for (const GenericsItem& item : itemsT) { + if (result.size() > 1) { + result += ","; + } + result += item.nameT; + } + result += ">"; + return result; +} + +int GenericsDeclaration::find_nameT(std::string_view nameT) const { + for (int i = 0; i < static_cast(itemsT.size()); ++i) { + if (itemsT[i].nameT == nameT) { + return i; + } + } + return -1; +} + +// after creating a deep copy of `f` like `f`, its new and fresh body needs the previous pipeline to run +// for example, all local vars need to be registered as symbols, etc. +static void run_pipeline_for_instantiated_function(const FunctionData* inst_fun_ref) { + // these pipes are exactly the same as in tolk.cpp — all preceding (and including) type inferring + pipeline_resolve_identifiers_and_assign_symbols(inst_fun_ref); + pipeline_calculate_rvalue_lvalue(inst_fun_ref); + pipeline_detect_unreachable_statements(inst_fun_ref); + pipeline_infer_types_and_calls_and_fields(inst_fun_ref); +} + +std::string generate_instantiated_name(const std::string& orig_name, const std::vector& substitutions) { + // an instantiated function name will be "{orig_name}<{T1,T2,...}>" + std::string name = orig_name; + name += "<"; + for (TypePtr subs : substitutions) { + if (name.size() > orig_name.size() + 1) { + name += ","; + } + name += subs->as_human_readable(); + } + name.erase(std::remove(name.begin(), name.end(), ' '), name.end()); + name += ">"; + return name; +} + +td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint) { + try { + GenericSubstitutionsDeduceForFunctionCall deducing(called_fun); + for (const LocalVarData& param : called_fun->parameters) { + if (param.declared_type->has_genericT_inside() && param.idx < static_cast(arg_types.size())) { + deducing.consider_next_condition(param.declared_type, arg_types[param.idx]); + } + } + int idx = deducing.get_first_not_deduced_idx(); + if (idx != -1 && return_hint && called_fun->declared_return_type->has_genericT_inside()) { + deducing.consider_next_condition(called_fun->declared_return_type, return_hint); + idx = deducing.get_first_not_deduced_idx(); + } + if (idx != -1) { + return td::Status::Error(td::Slice{"can not deduce " + called_fun->genericTs->get_nameT(idx)}); + } + return deducing.flush(); + } catch (const std::runtime_error& ex) { + return td::Status::Error(td::Slice{ex.what()}); + } +} + +const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs) { + tolk_assert(fun_ref->genericTs); + + // if `f` was earlier instantiated, return it + if (const auto* existing = lookup_global_symbol(inst_name)) { + const FunctionData* inst_ref = existing->try_as(); + tolk_assert(inst_ref); + return inst_ref; + } + + std::vector parameters; + parameters.reserve(fun_ref->get_num_params()); + for (const LocalVarData& orig_p : fun_ref->parameters) { + parameters.emplace_back(orig_p.name, orig_p.loc, replace_genericT_with_deduced(orig_p.declared_type, fun_ref->genericTs, substitutionTs), orig_p.flags, orig_p.idx); + } + TypePtr declared_return_type = replace_genericT_with_deduced(fun_ref->declared_return_type, fun_ref->genericTs, substitutionTs); + const GenericsInstantiation* instantiationTs = new GenericsInstantiation(loc, std::move(substitutionTs)); + + if (fun_ref->is_asm_function()) { + FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, new FunctionBodyAsm, fun_ref->ast_root); + inst_ref->arg_order = fun_ref->arg_order; + inst_ref->ret_order = fun_ref->ret_order; + G.symtable.add_function(inst_ref); + G.all_functions.push_back(inst_ref); + run_pipeline_for_instantiated_function(inst_ref); + return inst_ref; + } + + if (fun_ref->is_builtin_function()) { + FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, fun_ref->body, fun_ref->ast_root); + inst_ref->arg_order = fun_ref->arg_order; + inst_ref->ret_order = fun_ref->ret_order; + G.symtable.add_function(inst_ref); + return inst_ref; + } + + GenericFunctionReplicator replicator(fun_ref->genericTs, instantiationTs->substitutions); + V inst_root = replicator.clone_function_body(fun_ref->ast_root->as()); + + FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, new FunctionBodyCode, inst_root); + inst_ref->arg_order = fun_ref->arg_order; + inst_ref->ret_order = fun_ref->ret_order; + inst_root->mutate()->assign_fun_ref(inst_ref); + G.symtable.add_function(inst_ref); + G.all_functions.push_back(inst_ref); + run_pipeline_for_instantiated_function(inst_ref); + return inst_ref; +} + +} // namespace tolk diff --git a/tolk/generics-helpers.h b/tolk/generics-helpers.h new file mode 100644 index 00000000..2a304f55 --- /dev/null +++ b/tolk/generics-helpers.h @@ -0,0 +1,64 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" +#include "fwd-declarations.h" +#include "td/utils/Status.h" +#include + +namespace tolk { + +// when a function is declared `f`, this "" is represented as this class +// (not at AST, but at symbol storage level) +struct GenericsDeclaration { + struct GenericsItem { + std::string_view nameT; + + explicit GenericsItem(std::string_view nameT) + : nameT(nameT) {} + }; + + explicit GenericsDeclaration(std::vector&& itemsT) + : itemsT(std::move(itemsT)) {} + + const std::vector itemsT; + + std::string as_human_readable() const; + + size_t size() const { return itemsT.size(); } + bool has_nameT(std::string_view nameT) const { return find_nameT(nameT) != -1; } + int find_nameT(std::string_view nameT) const; + std::string get_nameT(int idx) const { return static_cast(itemsT[idx].nameT); } +}; + +// when a function call is `f()`, this "" is represented as this class +struct GenericsInstantiation { + const std::vector substitutions; // for genericTs + const SrcLocation loc; // first instantiation location + + explicit GenericsInstantiation(SrcLocation loc, std::vector&& substitutions) + : substitutions(std::move(substitutions)) + , loc(loc) { + } +}; + +std::string generate_instantiated_name(const std::string& orig_name, const std::vector& substitutions); +td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint); +const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs); + +} // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 4278f040..7e8c8fb2 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -328,6 +328,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { case 2: if (str == "do") return tok_do; if (str == "if") return tok_if; + if (str == "as") return tok_as; break; case 3: if (str == "int") return tok_int; @@ -345,7 +346,6 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (str == "null") return tok_null; if (str == "void") return tok_void; if (str == "bool") return tok_bool; - if (str == "auto") return tok_auto; if (str == "self") return tok_self; if (str == "tolk") return tok_tolk; if (str == "type") return tok_type; @@ -578,6 +578,16 @@ void Lexer::next_special(TokenType parse_next_as, const char* str_expected) { cur_token = tokens_circularbuf[++cur_token_idx & 7]; } +Lexer::SavedPositionForLookahead Lexer::save_parsing_position() const { + return {p_next, cur_token_idx, cur_token}; +} + +void Lexer::restore_position(SavedPositionForLookahead saved) { + p_next = saved.p_next; + cur_token_idx = last_token_idx = saved.cur_token_idx; + cur_token = saved.cur_token; +} + void Lexer::error(const std::string& err_msg) const { throw ParseError(cur_location(), err_msg); } @@ -595,7 +605,7 @@ void lexer_init() { // Hence, it's difficult to measure Lexer performance separately. // This function can be called just to tick Lexer performance, it just scans all input files. // There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful. -void lexer_measure_performance(const AllSrcFiles& files_to_just_parse) { +void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse) { for (const SrcFile* file : files_to_just_parse) { Lexer lex(file); while (!lex.is_eof()) { diff --git a/tolk/lexer.h b/tolk/lexer.h index bf116188..81d579db 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -125,9 +125,9 @@ enum TokenType { tok_builder, tok_continuation, tok_tuple, - tok_auto, tok_void, tok_arrow, + tok_as, tok_tolk, tok_semver, @@ -166,6 +166,12 @@ class Lexer { public: + struct SavedPositionForLookahead { + const char* p_next = nullptr; + int cur_token_idx = 0; + Token cur_token; + }; + explicit Lexer(const SrcFile* file); Lexer(const Lexer&) = delete; Lexer &operator=(const Lexer&) = delete; @@ -209,6 +215,9 @@ public: void next(); void next_special(TokenType parse_next_as, const char* str_expected); + SavedPositionForLookahead save_parsing_position() const; + void restore_position(SavedPositionForLookahead saved); + void check(TokenType next_tok, const char* str_expected) const { if (cur_token.type != next_tok) { unexpected(str_expected); // unlikely path, not inlined @@ -230,6 +239,6 @@ public: void lexer_init(); // todo #ifdef TOLK_PROFILING -void lexer_measure_performance(const AllSrcFiles& files_to_just_parse); +void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse); } // namespace tolk diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 3c02c7d1..77dbee41 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -17,7 +17,8 @@ #include "tolk.h" #include "src-file.h" #include "ast.h" -#include "compiler-state.h" +#include "ast-visitor.h" +#include "type-system.h" #include "common/refint.h" #include "constant-evaluator.h" @@ -48,7 +49,7 @@ struct LValGlobs { }; std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs = nullptr); -void process_statement(AnyV v, CodeBlob& code); +void process_any_statement(AnyV v, CodeBlob& code); static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, @@ -128,25 +129,24 @@ static std::vector pre_compile_tensor(CodeBlob& code, const std::vect static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) { // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" - if (lhs->type == ast_tensor_square && rhs->type == ast_tensor_square) { - std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); + if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) { + std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); LValGlobs globs; - std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); code.on_var_modification(left, loc); code.emplace_back(loc, Op::_Let, std::move(left), right); globs.gen_ops_set_globs(code, loc); return right; } // [lhs] = rhs; it's un-tuple to N left vars - if (lhs->type == ast_tensor_square) { + if (lhs->type == ast_typed_tuple) { std::vector right = pre_compile_expr(rhs, code); - TypeExpr* rhs_type = rhs->inferred_type; - TypeExpr::remove_indirect(rhs_type); - TypeExpr* unpacked_type = rhs_type->args.at(0); // rhs->inferred_type is tuple> - std::vector rvect = {code.create_tmp_var(unpacked_type, rhs->loc)}; + const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as(); + std::vector types_list = inferred_tuple->items; + std::vector rvect = {code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc)}; code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); LValGlobs globs; - std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); code.on_var_modification(left, loc); code.emplace_back(loc, Op::_Let, std::move(left), rvect); globs.gen_ops_set_globs(code, loc); @@ -162,7 +162,7 @@ static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE return right; } -static std::vector gen_op_call(CodeBlob& code, TypeExpr* ret_type, SrcLocation here, +static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation here, std::vector&& args_vars, const FunctionData* fun_ref) { std::vector rvect = {code.create_tmp_var(ret_type, here)}; Op& op = code.emplace_back(here, Op::_Call, rvect, std::move(args_vars), fun_ref); @@ -173,38 +173,75 @@ static std::vector gen_op_call(CodeBlob& code, TypeExpr* ret_type, Sr } -static std::vector process_binary_operator(V v, CodeBlob& code) { - TokenType t = v->tok; - std::string operator_name = static_cast(v->operator_name); - - if (v->is_set_assign()) { - std::string_view calc_operator = std::string_view{operator_name}.substr(0, operator_name.size() - 1); - auto v_apply = createV(v->loc, calc_operator, static_cast(t - 1), v->get_lhs(), v->get_rhs()); - v_apply->assign_inferred_type(v->inferred_type); - return pre_compile_let(code, v->get_lhs(), v_apply, v->loc); +static std::vector process_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValGlobs* lval_globs) { + if (const auto* glob_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(glob_ref->declared_type, loc)}; + if (lval_globs) { + lval_globs->add_modified_glob(glob_ref, rvect[0]); + return rvect; + } else { + code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, glob_ref); + return rvect; + } } - if (v->is_assign()) { + if (const auto* const_ref = sym->try_as()) { + if (const_ref->is_int_const()) { + std::vector rvect = {code.create_tmp_var(TypeDataInt::create(), loc)}; + code.emplace_back(loc, Op::_IntConst, rvect, const_ref->as_int_const()); + return rvect; + } else { + std::vector rvect = {code.create_tmp_var(TypeDataSlice::create(), loc)}; + code.emplace_back(loc, Op::_SliceConst, rvect, const_ref->as_slice_const()); + return rvect; + } + } + if (const auto* fun_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(fun_ref->inferred_full_type, loc)}; + code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); + return rvect; + } + if (const auto* var_ref = sym->try_as()) { + return {var_ref->idx}; + } + throw Fatal("process_symbol"); +} + +static std::vector process_assign(V v, CodeBlob& code) { + if (auto lhs_decl = v->get_lhs()->try_as()) { + return pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); + } else { return pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); } - if (t == tok_minus || t == tok_plus || - t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor || - t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship || - t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR || - t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { - const FunctionData* fun_ref = lookup_global_symbol("_" + operator_name + "_")->as(); +} + +static std::vector process_set_assign(V v, CodeBlob& code) { + // for "a += b", emulate "a = a + b" + // seems not beautiful, but it works; probably, this transformation should be done at AST level in advance + std::string_view calc_operator = v->operator_name; // "+" for operator += + auto v_apply = createV(v->loc, calc_operator, static_cast(v->tok - 1), v->get_lhs(), v->get_rhs()); + v_apply->assign_inferred_type(v->inferred_type); + v_apply->assign_fun_ref(v->fun_ref); + return pre_compile_let(code, v->get_lhs(), v_apply, v->loc); +} + +static std::vector process_binary_operator(V v, CodeBlob& code) { + TokenType t = v->tok; + + if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref); } if (t == tok_logical_and || t == tok_logical_or) { // do the following transformations: // a && b -> a ? (b != 0) : 0 // a || b -> a ? 1 : (b != 0) AnyExprV v_0 = createV(v->loc, td::make_refint(0), "0"); - v_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + v_0->mutate()->assign_inferred_type(TypeDataInt::create()); AnyExprV v_1 = createV(v->loc, td::make_refint(-1), "-1"); - v_1->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - AnyExprV v_b_ne_0 = createV(v->loc, "!=", tok_neq, v->get_rhs(), v_0); - v_b_ne_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + v_1->mutate()->assign_inferred_type(TypeDataInt::create()); + auto v_b_ne_0 = createV(v->loc, "!=", tok_neq, v->get_rhs(), v_0); + v_b_ne_0->mutate()->assign_inferred_type(TypeDataInt::create()); + v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); std::vector cond = pre_compile_expr(v->get_lhs(), code); tolk_assert(cond.size() == 1); std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; @@ -222,9 +259,8 @@ static std::vector process_binary_operator(V v, } static std::vector process_unary_operator(V v, CodeBlob& code) { - const FunctionData* fun_ref = lookup_global_symbol(static_cast(v->operator_name) + "_")->as(); std::vector args_vars = pre_compile_tensor(code, {v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref); } static std::vector process_ternary_operator(V v, CodeBlob& code) { @@ -241,8 +277,17 @@ static std::vector process_ternary_operator(V v return rvect; } +static std::vector process_dot_access(V v, CodeBlob& code, LValGlobs* lval_globs) { + // it's NOT a method call `t.tupleSize()` (since such cases are handled by process_function_call) + // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) + // currently, nothing except a global function can be a target of dot access + const FunctionData* fun_ref = v->target; + tolk_assert(fun_ref); + return process_symbol(v->loc, fun_ref, code, lval_globs); +} + static std::vector process_function_call(V v, CodeBlob& code) { - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` const FunctionData* fun_ref = v->fun_maybe; if (!fun_ref) { std::vector args; @@ -251,7 +296,7 @@ static std::vector process_function_call(V v, Code args.push_back(v->get_arg(i)->get_expr()); } std::vector args_vars = pre_compile_tensor(code, args); - std::vector tfunc = pre_compile_expr(v->get_called_f(), code); + std::vector tfunc = pre_compile_expr(v->get_callee(), code); tolk_assert(tfunc.size() == 1); args_vars.push_back(tfunc[0]); std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; @@ -260,95 +305,54 @@ static std::vector process_function_call(V v, Code return rvect; } + int delta_self = v->is_dot_call(); + AnyExprV obj_leftmost = nullptr; std::vector args; - args.reserve(v->get_num_args()); - for (int i = 0; i < v->get_num_args(); ++i) { - args.push_back(v->get_arg(i)->get_expr()); - } - std::vector args_vars = pre_compile_tensor(code, args); - - TypeExpr* op_call_type = v->inferred_type; - if (fun_ref->has_mutate_params()) { - std::vector types_list; - for (int i = 0; i < v->get_num_args(); ++i) { - if (fun_ref->parameters[i].is_mutate_parameter()) { - types_list.push_back(args[i]->inferred_type); - } + args.reserve(delta_self + v->get_num_args()); + if (delta_self) { + args.push_back(v->get_dot_obj()); + obj_leftmost = v->get_dot_obj(); + while (obj_leftmost->type == ast_function_call && obj_leftmost->as()->is_dot_call() && obj_leftmost->as()->fun_maybe && obj_leftmost->as()->fun_maybe->does_return_self()) { + obj_leftmost = obj_leftmost->as()->get_dot_obj(); } - types_list.push_back(v->inferred_type); - op_call_type = TypeExpr::new_tensor(std::move(types_list)); } - - std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref); - - if (fun_ref->has_mutate_params()) { - LValGlobs local_globs; - std::vector left; - for (int i = 0; i < v->get_num_args(); ++i) { - if (fun_ref->parameters[i].is_mutate_parameter()) { - AnyExprV arg_i = v->get_arg(i)->get_expr(); - tolk_assert(arg_i->is_lvalue); - std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); - left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); - } - } - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; - left.push_back(rvect[0]); - code.on_var_modification(left, v->loc); - code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); - local_globs.gen_ops_set_globs(code, v->loc); - return rvect; - } - - return rvect_apply; -} - -static std::vector process_dot_method_call(V v, CodeBlob& code) { - std::vector args; - args.reserve(1 + v->get_num_args()); - args.push_back(v->get_obj()); for (int i = 0; i < v->get_num_args(); ++i) { args.push_back(v->get_arg(i)->get_expr()); } std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, nullptr); - TypeExpr* op_call_type = v->inferred_type; - TypeExpr* real_ret_type = v->inferred_type; - if (v->fun_ref->does_return_self()) { - real_ret_type = TypeExpr::new_unit(); - if (!v->fun_ref->parameters[0].is_mutate_parameter()) { - op_call_type = TypeExpr::new_unit(); + TypePtr op_call_type = v->inferred_type; + TypePtr real_ret_type = v->inferred_type; + if (delta_self && fun_ref->does_return_self()) { + real_ret_type = TypeDataVoid::create(); + if (!fun_ref->parameters[0].is_mutate_parameter()) { + op_call_type = TypeDataVoid::create(); } } - if (v->fun_ref->has_mutate_params()) { - std::vector types_list; - for (int i = 0; i < 1 + v->get_num_args(); ++i) { - if (v->fun_ref->parameters[i].is_mutate_parameter()) { + if (fun_ref->has_mutate_params()) { + std::vector types_list; + for (int i = 0; i < delta_self + v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { types_list.push_back(args[i]->inferred_type); } } types_list.push_back(real_ret_type); - op_call_type = TypeExpr::new_tensor(std::move(types_list)); + op_call_type = TypeDataTensor::create(std::move(types_list)); } std::vector args_vars; for (const std::vector& list : vars_per_arg) { args_vars.insert(args_vars.end(), list.cbegin(), list.cend()); } - std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), v->fun_ref); + std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref); - AnyExprV obj_leftmost = args[0]; - while (obj_leftmost->type == ast_dot_method_call && obj_leftmost->as()->fun_ref->does_return_self()) { - obj_leftmost = obj_leftmost->as()->get_obj(); - } - - if (v->fun_ref->has_mutate_params()) { + if (fun_ref->has_mutate_params()) { LValGlobs local_globs; std::vector left; - for (int i = 0; i < 1 + v->get_num_args(); ++i) { - if (v->fun_ref->parameters[i].is_mutate_parameter()) { - AnyExprV arg_i = i == 0 ? obj_leftmost : args[i]; - tolk_assert (arg_i->is_lvalue || i == 0); + for (int i = 0; i < delta_self + v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { + AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[i]; + tolk_assert(arg_i->is_lvalue || i == 0); if (arg_i->is_lvalue) { std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); @@ -365,7 +369,7 @@ static std::vector process_dot_method_call(V v, rvect_apply = rvect; } - if (v->fun_ref->does_return_self()) { + if (obj_leftmost && fun_ref->does_return_self()) { if (obj_leftmost->is_lvalue) { // to handle if obj is global var, potentially re-assigned inside a chain rvect_apply = pre_compile_expr(obj_leftmost, code); } else { // temporary object, not lvalue, pre_compile_expr @@ -380,7 +384,7 @@ static std::vector process_tensor(V v, CodeBlob& code, LV return pre_compile_tensor(code, v->get_items(), lval_globs); } -static std::vector process_tensor_square(V v, CodeBlob& code, LValGlobs* lval_globs) { +static std::vector process_typed_tuple(V v, CodeBlob& code, LValGlobs* lval_globs) { if (lval_globs) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work v->error("[...] can not be used as lvalue here"); } @@ -417,82 +421,53 @@ static std::vector process_null_keyword(V v, CodeBl return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); } -static std::vector process_self_keyword(V v, CodeBlob& code) { - tolk_assert(code.fun_ref->does_accept_self() && v->param_ref); - tolk_assert(v->param_ref->idx == 0); - return {0}; -} - -static std::vector process_identifier(V v, CodeBlob& code, LValGlobs* lval_globs) { - const Symbol* sym = v->sym; - if (const auto* glob_ref = sym->try_as()) { - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; - if (lval_globs) { - lval_globs->add_modified_glob(glob_ref, rvect[0]); - return rvect; - } else { - code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector{}, glob_ref); - return rvect; - } - } - if (const auto* const_ref = sym->try_as()) { - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; - if (const_ref->is_int_const()) { - code.emplace_back(v->loc, Op::_IntConst, rvect, const_ref->as_int_const()); - } else { - code.emplace_back(v->loc, Op::_SliceConst, rvect, const_ref->as_slice_const()); - } - return rvect; - } - if (const auto* fun_ref = sym->try_as()) { - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; - code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); - return rvect; - } - if (const auto* var_ref = sym->try_as()) { -#ifdef TOLK_DEBUG - tolk_assert(var_ref->idx != -1); -#endif - return {var_ref->idx}; - } - throw UnexpectedASTNodeType(v, "process_identifier"); -} - -static std::vector process_local_var(V v, CodeBlob& code, LValGlobs* lval_globs) { +static std::vector process_local_var(V v, CodeBlob& code) { if (v->marked_as_redef) { - return process_identifier(v->get_identifier()->as(), code, lval_globs); + return process_symbol(v->loc, v->var_ref, code, nullptr); } - if (v->get_identifier()->try_as()) { - const LocalVarData* var_ref = v->var_maybe->as(); - tolk_assert(var_ref->idx == -1); - var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, var_ref, v->loc)); - return {var_ref->idx}; - } - return {code.create_tmp_var(v->inferred_type, v->loc)}; // underscore + + tolk_assert(v->var_ref->idx == -1); + v->var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, v->var_ref, v->loc)); + return {v->var_ref->idx}; +} + +static std::vector process_local_vars_declaration(V, CodeBlob&) { + // it can not appear as a standalone expression + // `var ... = rhs` is handled by ast_assign + tolk_assert(false); } static std::vector process_underscore(V v, CodeBlob& code) { + // when _ is used as left side of assignment, like `(cs, _) = cs.loadAndReturn()` return {code.create_tmp_var(v->inferred_type, v->loc)}; } std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs) { switch (v->type) { + case ast_reference: + return process_symbol(v->loc, v->as()->sym, code, lval_globs); + case ast_assign: + return process_assign(v->as(), code); + case ast_set_assign: + return process_set_assign(v->as(), code); case ast_binary_operator: return process_binary_operator(v->as(), code); case ast_unary_operator: return process_unary_operator(v->as(), code); case ast_ternary_operator: return process_ternary_operator(v->as(), code); + case ast_cast_as_operator: + return pre_compile_expr(v->as()->get_expr(), code, lval_globs); + case ast_dot_access: + return process_dot_access(v->as(), code, lval_globs); case ast_function_call: return process_function_call(v->as(), code); - case ast_dot_method_call: - return process_dot_method_call(v->as(), code); case ast_parenthesized_expression: return pre_compile_expr(v->as()->get_expr(), code, lval_globs); case ast_tensor: return process_tensor(v->as(), code, lval_globs); - case ast_tensor_square: - return process_tensor_square(v->as(), code, lval_globs); + case ast_typed_tuple: + return process_typed_tuple(v->as(), code, lval_globs); case ast_int_const: return process_int_const(v->as(), code); case ast_string_const: @@ -501,12 +476,10 @@ std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* l return process_bool_const(v->as(), code); case ast_null_keyword: return process_null_keyword(v->as(), code); - case ast_self_keyword: - return process_self_keyword(v->as(), code); - case ast_identifier: - return process_identifier(v->as(), code, lval_globs); - case ast_local_var: - return process_local_var(v->as(), code, lval_globs); + case ast_local_var_lhs: + return process_local_var(v->as(), code); + case ast_local_vars_declaration: + return process_local_vars_declaration(v->as(), code); case ast_underscore: return process_underscore(v->as(), code); default: @@ -515,39 +488,34 @@ std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* l } -static void process_local_vars_declaration(V v, CodeBlob& code) { - pre_compile_let(code, v->get_lhs(), v->get_assigned_val(), v->loc); -} - static void process_sequence(V v, CodeBlob& code) { for (AnyV item : v->get_items()) { - process_statement(item, code); + process_any_statement(item, code); } } - static void process_assert_statement(V v, CodeBlob& code) { std::vector args(3); if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { args[0] = v->get_thrown_code(); args[1] = v->get_cond()->as()->get_rhs(); args[2] = createV(v->loc, true); - args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + args[2]->mutate()->assign_inferred_type(TypeDataInt::create()); } else { args[0] = v->get_thrown_code(); args[1] = v->get_cond(); args[2] = createV(v->loc, false); - args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + args[2]->mutate()->assign_inferred_type(TypeDataInt::create()); } const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as(); std::vector args_vars = pre_compile_tensor(code, args); - gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); } static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { - if (auto v_ident = v_catch_var->try_as()) { - const LocalVarData* var_ref = v_ident->sym->as(); + if (auto v_ref = v_catch_var->try_as(); v_ref && v_ref->sym) { // not underscore + const LocalVarData* var_ref = v_ref->sym->as(); tolk_assert(var_ref->idx == -1); var_ref->mutate()->assign_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc)); } @@ -557,7 +525,7 @@ static void process_try_catch_statement(V v, CodeBlob& code.require_callxargs = true; Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); code.push_set_cur(try_catch_op.block0); - process_statement(v->get_try_body(), code); + process_any_statement(v->get_try_body(), code); code.close_pop_cur(v->get_try_body()->loc_end); code.push_set_cur(try_catch_op.block1); @@ -567,7 +535,7 @@ static void process_try_catch_statement(V v, CodeBlob& process_catch_variable(catch_vars[0], code); process_catch_variable(catch_vars[1], code); try_catch_op.left = pre_compile_tensor(code, {catch_vars[1], catch_vars[0]}); - process_statement(v->get_catch_body(), code); + process_any_statement(v->get_catch_body(), code); code.close_pop_cur(v->get_catch_body()->loc_end); } @@ -575,7 +543,7 @@ static void process_repeat_statement(V v, CodeBlob& code) std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); - process_statement(v->get_body(), code); + process_any_statement(v->get_body(), code); code.close_pop_cur(v->get_body()->loc_end); } @@ -583,10 +551,10 @@ static void process_if_statement(V v, CodeBlob& code) { std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars)); code.push_set_cur(if_op.block0); - process_statement(v->get_if_body(), code); + process_any_statement(v->get_if_body(), code); code.close_pop_cur(v->get_if_body()->loc_end); code.push_set_cur(if_op.block1); - process_statement(v->get_else_body(), code); + process_any_statement(v->get_else_body(), code); code.close_pop_cur(v->get_else_body()->loc_end); if (v->is_ifnot) { std::swap(if_op.block0, if_op.block1); @@ -596,7 +564,7 @@ static void process_if_statement(V v, CodeBlob& code) { static void process_do_while_statement(V v, CodeBlob& code) { Op& until_op = code.emplace_back(v->loc, Op::_Until); code.push_set_cur(until_op.block0); - process_statement(v->get_body(), code); + process_any_statement(v->get_body(), code); // in TVM, there is only "do until", but in Tolk, we want "do while" // here we negate condition to pass it forward to legacy to Op::_Until @@ -621,7 +589,12 @@ static void process_do_while_statement(V v, CodeBlob& co } else { until_cond = createV(cond->loc, "!", tok_logical_not, cond); } - until_cond->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + until_cond->mutate()->assign_inferred_type(TypeDataInt::create()); + if (auto v_bin = until_cond->try_as(); v_bin && !v_bin->fun_ref) { + v_bin->mutate()->assign_fun_ref(lookup_global_symbol("_" + static_cast(v_bin->operator_name) + "_")->as()); + } else if (auto v_un = until_cond->try_as(); v_un && !v_un->fun_ref) { + v_un->mutate()->assign_fun_ref(lookup_global_symbol(static_cast(v_un->operator_name) + "_")->as()); + } until_op.left = pre_compile_expr(until_cond, code); code.close_pop_cur(v->get_body()->loc_end); @@ -633,7 +606,7 @@ static void process_while_statement(V v, CodeBlob& code) { while_op.left = pre_compile_expr(v->get_cond(), code); code.close_pop_cur(v->get_body()->loc); code.push_set_cur(while_op.block1); - process_statement(v->get_body(), code); + process_any_statement(v->get_body(), code); code.close_pop_cur(v->get_body()->loc_end); } @@ -641,16 +614,16 @@ static void process_throw_statement(V v, CodeBlob& code) { if (v->has_thrown_arg()) { const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()}); - gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); } else { const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_code()}); - gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); } } static void process_return_statement(V v, CodeBlob& code) { - std::vector return_vars = pre_compile_expr(v->get_return_value(), code); + std::vector return_vars = v->has_return_value() ? pre_compile_expr(v->get_return_value(), code) : std::vector{}; if (code.fun_ref->does_return_self()) { tolk_assert(return_vars.size() == 1); return_vars = {}; @@ -680,10 +653,8 @@ static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code } -void process_statement(AnyV v, CodeBlob& code) { +void process_any_statement(AnyV v, CodeBlob& code) { switch (v->type) { - case ast_local_vars_declaration: - return process_local_vars_declaration(v->as(), code); case ast_sequence: return process_sequence(v->as(), code); case ast_return_statement: @@ -709,30 +680,31 @@ void process_statement(AnyV v, CodeBlob& code) { } } -static void convert_function_body_to_CodeBlob(V v, V v_body) { - CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, v->fun_ref, v->ret_type}; +static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, FunctionBodyCode* code_body) { + auto v_body = fun_ref->ast_root->as()->get_body()->as(); + CodeBlob* blob = new CodeBlob{fun_ref->name, fun_ref->loc, fun_ref}; FormalArgList legacy_arg_list; - for (int i = 0; i < v->get_num_params(); ++i) { - legacy_arg_list.emplace_back(v->get_param(i)->declared_type, &v->fun_ref->parameters[i], v->loc); + for (const LocalVarData& param : fun_ref->parameters) { + legacy_arg_list.emplace_back(param.declared_type, ¶m, param.loc); } blob->import_params(std::move(legacy_arg_list)); for (AnyV item : v_body->get_items()) { - process_statement(item, *blob); + process_any_statement(item, *blob); } - if (v->fun_ref->is_implicit_return()) { + if (fun_ref->is_implicit_return()) { append_implicit_return_statement(v_body->loc_end, *blob); } blob->close_blk(v_body->loc_end); - std::get(v->fun_ref->body)->set_code(blob); + code_body->set_code(blob); } -static void convert_asm_body_to_AsmOp(V v, V v_body) { - int cnt = v->get_num_params(); - int width = v->ret_type->get_width(); +static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyAsm* asm_body) { + int cnt = fun_ref->get_num_params(); + int width = fun_ref->inferred_return_type->calc_width_on_stack(); std::vector asm_ops; - for (AnyV v_child : v_body->get_asm_commands()) { + for (AnyV v_child : fun_ref->ast_root->as()->get_body()->as()->get_asm_commands()) { std::string_view ops = v_child->as()->str_val; // \n\n... std::string op; for (char c : ops) { @@ -756,21 +728,77 @@ static void convert_asm_body_to_AsmOp(V v, V(v->fun_ref->body)->set_code(std::move(asm_ops)); + asm_body->set_code(std::move(asm_ops)); } -void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) { - for (const SrcFile* file : all_src_files) { - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { - if (auto v_func = v->try_as()) { - if (v_func->is_asm_function()) { - convert_asm_body_to_AsmOp(v_func, v_func->get_body()->as()); - } else if (!v_func->marked_as_builtin) { - convert_function_body_to_CodeBlob(v_func, v_func->get_body()->as()); +class UpdateArgRetOrderConsideringStackWidth final { +public: + static bool should_visit_function(const FunctionData* fun_ref) { + return !fun_ref->is_generic_function() && (!fun_ref->ret_order.empty() || !fun_ref->arg_order.empty()); + } + + static void start_visiting_function(const FunctionData* fun_ref, V v_function) { + int total_arg_mutate_width = 0; + bool has_arg_width_not_1 = false; + for (const LocalVarData& param : fun_ref->parameters) { + int arg_width = param.declared_type->calc_width_on_stack(); + has_arg_width_not_1 |= arg_width != 1; + total_arg_mutate_width += param.is_mutate_parameter() * arg_width; + } + + // example: `fun f(a: int, b: (int, (int, int)), c: int)` with `asm (b a c)` + // current arg_order is [1 0 2] + // needs to be converted to [1 2 3 0 4] because b width is 3 + if (has_arg_width_not_1) { + int total_arg_width = 0; + std::vector cum_arg_width; + cum_arg_width.reserve(1 + fun_ref->get_num_params()); + cum_arg_width.push_back(0); + for (const LocalVarData& param : fun_ref->parameters) { + cum_arg_width.push_back(total_arg_width += param.declared_type->calc_width_on_stack()); + } + std::vector arg_order; + for (int i = 0; i < fun_ref->get_num_params(); ++i) { + int j = fun_ref->arg_order[i]; + int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; + while (c1 < c2) { + arg_order.push_back(c1++); } } + fun_ref->mutate()->assign_arg_order(std::move(arg_order)); + } + + // example: `fun f(mutate self: slice): slice` with `asm(-> 1 0)` + // ret_order is a shuffled range 0...N + // validate N: a function should return value and mutated arguments onto a stack + if (!fun_ref->ret_order.empty()) { + size_t expected_width = fun_ref->inferred_return_type->calc_width_on_stack() + total_arg_mutate_width; + if (expected_width != fun_ref->ret_order.size()) { + v_function->get_body()->error("ret_order (after ->) expected to contain " + std::to_string(expected_width) + " numbers"); + } } } +}; + +class ConvertASTToLegacyOpVisitor final { +public: + static bool should_visit_function(const FunctionData* fun_ref) { + return !fun_ref->is_generic_function(); + } + + static void start_visiting_function(const FunctionData* fun_ref, V) { + tolk_assert(fun_ref->is_type_inferring_done()); + if (fun_ref->is_code_function()) { + convert_function_body_to_CodeBlob(fun_ref, std::get(fun_ref->body)); + } else if (fun_ref->is_asm_function()) { + convert_asm_body_to_AsmOp(fun_ref, std::get(fun_ref->body)); + } + } +}; + +void pipeline_convert_ast_to_legacy_Expr_Op() { + visit_ast_of_all_functions(); + visit_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-calc-rvalue-lvalue.cpp b/tolk/pipe-calc-rvalue-lvalue.cpp index 1738226b..041aec89 100644 --- a/tolk/pipe-calc-rvalue-lvalue.cpp +++ b/tolk/pipe-calc-rvalue-lvalue.cpp @@ -74,12 +74,12 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v); } - void visit(V v) override { + void visit(V v) override { mark_vertex_cur_or_rvalue(v); parent::visit(v); } - void visit(V v) override { + void visit(V v) override { mark_vertex_cur_or_rvalue(v); } @@ -99,10 +99,6 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { mark_vertex_cur_or_rvalue(v); } - void visit(V v) override { - mark_vertex_cur_or_rvalue(v); - } - void visit(V v) override { mark_vertex_cur_or_rvalue(v); MarkingState saved = enter_state(v->passed_as_mutate ? MarkingState::LValueAndRValue : MarkingState::RValue); @@ -115,6 +111,13 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v); } + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v->get_obj()); + restore_state(saved); + } + void visit(V v) override { mark_vertex_cur_or_rvalue(v); MarkingState saved = enter_state(MarkingState::RValue); @@ -122,15 +125,6 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { restore_state(saved); } - void visit(V v) override { - mark_vertex_cur_or_rvalue(v); - MarkingState saved = enter_state(MarkingState::RValue); - parent::visit(v->get_obj()); - enter_state(MarkingState::RValue); - parent::visit(v->get_arg_list()); - restore_state(saved); - } - void visit(V v) override { // underscore is a placeholder to ignore left side of assignment: `(a, _) = get2params()` // so, if current state is "lvalue", `_` will be marked as lvalue, and ok @@ -139,6 +133,24 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { mark_vertex_cur_or_rvalue(v); } + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::LValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_rhs()); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::LValueAndRValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_rhs()); + restore_state(saved); + } + void visit(V v) override { mark_vertex_cur_or_rvalue(v); MarkingState saved = enter_state(MarkingState::RValue); @@ -148,10 +160,8 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { mark_vertex_cur_or_rvalue(v); - MarkingState saved = enter_state(v->is_set_assign() ? MarkingState::LValueAndRValue : v->is_assign() ? MarkingState::LValue : MarkingState::RValue); - parent::visit(v->get_lhs()); - enter_state(MarkingState::RValue); - parent::visit(v->get_rhs()); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); restore_state(saved); } @@ -162,15 +172,18 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { restore_state(saved); } - void visit(V v) override { - MarkingState saved = enter_state(MarkingState::LValue); - parent::visit(v->get_lhs()); - enter_state(MarkingState::RValue); - parent::visit(v->get_assigned_val()); - restore_state(saved); + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v->get_expr()); // leave lvalue state unchanged, for `mutate (t.0 as int)` both `t.0 as int` and `t.0` are lvalue } - void visit(V v) override { + void visit(V v) override { + tolk_assert(cur_state == MarkingState::LValue); + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { tolk_assert(cur_state == MarkingState::LValue); mark_vertex_cur_or_rvalue(v); parent::visit(v); @@ -183,10 +196,22 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { restore_state(saved); parent::visit(v->get_catch_body()); } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } }; -void pipeline_calculate_rvalue_lvalue(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_calculate_rvalue_lvalue() { + visit_ast_of_all_functions(); +} + +void pipeline_calculate_rvalue_lvalue(const FunctionData* fun_ref) { + CalculateRvalueLvalueVisitor visitor; + if (visitor.should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + } } } // namespace tolk diff --git a/tolk/pipe-check-pure-impure.cpp b/tolk/pipe-check-pure-impure.cpp index 6cef9f15..2b2e1e67 100644 --- a/tolk/pipe-check-pure-impure.cpp +++ b/tolk/pipe-check-pure-impure.cpp @@ -33,31 +33,27 @@ static void fire_error_impure_operation_inside_pure_function(AnyV v) { class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFunctionBody { static void fire_if_global_var(AnyExprV v) { - if (auto v_ident = v->try_as()) { + if (auto v_ident = v->try_as()) { if (v_ident->sym->try_as()) { fire_error_impure_operation_inside_pure_function(v); } } } - void visit(V v) override { - if (v->marked_as_redef) { - fire_if_global_var(v->get_identifier()); - } + void visit(V v) override { + fire_if_global_var(v->get_lhs()); + parent::visit(v); } - void visit(V v) override { - if (v->is_set_assign() || v->is_assign()) { - fire_if_global_var(v->get_lhs()); - } - + void visit(V v) override { + fire_if_global_var(v->get_lhs()); parent::visit(v); } void visit(V v) override { - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` if (!v->fun_maybe) { - // calling variables is always impure, no considerations about what's there at runtime + // `local_var(args)` is always impure, no considerations about what's there at runtime fire_error_impure_operation_inside_pure_function(v); } @@ -68,14 +64,6 @@ class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFuncti parent::visit(v); } - void visit(V v) override { - if (!v->fun_ref->is_marked_as_pure()) { - fire_error_impure_operation_inside_pure_function(v); - } - - parent::visit(v); - } - void visit(V v) override { if (v->passed_as_mutate) { fire_if_global_var(v->get_expr()); @@ -93,15 +81,13 @@ class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFuncti } public: - void start_visiting_function(V v_function) override { - if (v_function->marked_as_pure) { - parent::visit(v_function->get_body()); - } + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function() && fun_ref->is_marked_as_pure(); } }; -void pipeline_check_pure_impure_operations(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_check_pure_impure_operations() { + visit_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-check-rvalue-lvalue.cpp b/tolk/pipe-check-rvalue-lvalue.cpp index f5bf8526..038b0999 100644 --- a/tolk/pipe-check-rvalue-lvalue.cpp +++ b/tolk/pipe-check-rvalue-lvalue.cpp @@ -36,9 +36,18 @@ static void fire_error_cannot_be_used_as_lvalue(AnyV v, const std::string& detai v->error(details + " can not be used as lvalue"); } -// handle when a function used as rvalue, like `var cb = f` -static void handle_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) { - fun_ref->mutate()->assign_is_used_as_noncall(); +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_modifying_immutable_variable(AnyExprV v, const LocalVarData* var_ref) { + if (var_ref->idx == 0 && var_ref->name == "self") { + v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); + } else { + v->error("modifying immutable variable `" + var_ref->name + "`"); + } +} + +// validate a function used as rvalue, like `var cb = f` +// it's not a generic function (ensured earlier at type inferring) and has some more restrictions +static void validate_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) { if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { v->error("saving `" + fun_ref->name + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); } @@ -48,16 +57,30 @@ static void handle_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ } class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "assignment"); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "assignment"); + } + parent::visit(v); + } + void visit(V v) override { if (v->is_lvalue) { - fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast(v->operator_name)); + fire_error_cannot_be_used_as_lvalue(v, "operator " + static_cast(v->operator_name)); } parent::visit(v); } void visit(V v) override { if (v->is_lvalue) { - fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast(v->operator_name)); + fire_error_cannot_be_used_as_lvalue(v, "operator " + static_cast(v->operator_name)); } parent::visit(v); } @@ -69,6 +92,11 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v); } + void visit(V v) override { + // if `x as int` is lvalue, then `x` is also lvalue, so check that `x` is ok + parent::visit(v->get_expr()); + } + void visit(V v) override { if (v->is_lvalue) { fire_error_cannot_be_used_as_lvalue(v, "literal"); @@ -93,46 +121,45 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { } } + void visit(V v) override { + // a reference to a method used as rvalue, like `var v = t.tupleAt` + if (const FunctionData* fun_ref = v->target; v->is_rvalue) { + validate_function_used_as_noncall(v, fun_ref); + } + } + void visit(V v) override { if (v->is_lvalue) { fire_error_cannot_be_used_as_lvalue(v, "function call"); } if (!v->fun_maybe) { - parent::visit(v->get_called_f()); + parent::visit(v->get_callee()); + } + // for `f()` don't visit ast_reference `f`, to detect `f` usage as non-call, like `var cb = f` + // same for `obj.method()`, don't visit ast_reference method, visit only obj + if (v->is_dot_call()) { + parent::visit(v->get_dot_obj()); } - // for `f(...)` don't visit identifier `f`, to detect `f` usage as non-call, like `var cb = f` for (int i = 0; i < v->get_num_args(); ++i) { parent::visit(v->get_arg(i)); } } - void visit(V v) override { - if (v->is_lvalue) { - fire_error_cannot_be_used_as_lvalue(v, "method call"); - } - - parent::visit(v->get_obj()); - - for (int i = 0; i < v->get_num_args(); ++i) { - parent::visit(v->get_arg(i)); - } - } - - void visit(V v) override { + void visit(V v) override { if (v->marked_as_redef) { - tolk_assert(v->var_maybe); // always filled, but for `var g_var redef` might point not to a local - if (const LocalVarData* var_ref = v->var_maybe->try_as(); var_ref && var_ref->is_immutable()) { + tolk_assert(v->var_ref); + if (v->var_ref->is_immutable()) { v->error("`redef` for immutable variable"); } } } - void visit(V v) override { + void visit(V v) override { if (v->is_lvalue) { tolk_assert(v->sym); if (const auto* var_ref = v->sym->try_as(); var_ref && var_ref->is_immutable()) { - v->error("modifying immutable variable `" + var_ref->name + "`"); + fire_error_modifying_immutable_variable(v, var_ref); } else if (v->sym->try_as()) { v->error("modifying immutable constant"); } else if (v->sym->try_as()) { @@ -142,13 +169,7 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { // a reference to a function used as rvalue, like `var v = someFunction` if (const FunctionData* fun_ref = v->sym->try_as(); fun_ref && v->is_rvalue) { - handle_function_used_as_noncall(v, fun_ref); - } - } - - void visit(V v) override { - if (v->is_lvalue && v->param_ref->is_immutable()) { - v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); + validate_function_used_as_noncall(v, fun_ref); } } @@ -163,10 +184,15 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { // skip catch(_,excNo), there are always vars due to grammar, lvalue/rvalue aren't set to them parent::visit(v->get_catch_body()); } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } }; -void pipeline_check_rvalue_lvalue(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_check_rvalue_lvalue() { + visit_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp index 9e266e6d..4090d247 100644 --- a/tolk/pipe-constant-folding.cpp +++ b/tolk/pipe-constant-folding.cpp @@ -17,6 +17,7 @@ #include "tolk.h" #include "ast.h" #include "ast-replacer.h" +#include "type-system.h" /* * This pipe is supposed to do constant folding, like replacing `2 + 3` with `5`. @@ -33,7 +34,7 @@ namespace tolk { class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { auto v_int = createV(loc, std::move(intval), {}); - v_int->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + v_int->assign_inferred_type(TypeDataInt::create()); v_int->assign_rvalue_true(); return v_int; } @@ -59,10 +60,15 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { return v; } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } }; -void pipeline_constant_folding(const AllSrcFiles& all_src_files) { - replace_ast_of_all_functions(all_src_files); +void pipeline_constant_folding() { + replace_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-detect-unreachable.cpp b/tolk/pipe-detect-unreachable.cpp index 96de2eb0..15824cf3 100644 --- a/tolk/pipe-detect-unreachable.cpp +++ b/tolk/pipe-detect-unreachable.cpp @@ -111,17 +111,28 @@ class UnreachableStatementsDetectVisitor final { } public: - void start_visiting_function(V v_function) { + static bool should_visit_function(const FunctionData* fun_ref) { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } + + void start_visiting_function(const FunctionData* fun_ref, V v_function) { bool control_flow_reaches_end = !always_returns(v_function->get_body()->as()); if (control_flow_reaches_end) { - v_function->fun_ref->mutate()->assign_is_implicit_return(); + fun_ref->mutate()->assign_is_implicit_return(); } } }; -void pipeline_detect_unreachable_statements(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_detect_unreachable_statements() { + visit_ast_of_all_functions(); +} + +void pipeline_detect_unreachable_statements(const FunctionData* fun_ref) { + UnreachableStatementsDetectVisitor visitor; + if (UnreachableStatementsDetectVisitor::should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + } } } // namespace tolk diff --git a/tolk/pipe-discover-parse-sources.cpp b/tolk/pipe-discover-parse-sources.cpp index 92cc2807..d31348ba 100644 --- a/tolk/pipe-discover-parse-sources.cpp +++ b/tolk/pipe-discover-parse-sources.cpp @@ -38,7 +38,7 @@ namespace tolk { -AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { +void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { G.all_src_files.locate_and_register_source_file(stdlib_filename, {}); G.all_src_files.locate_and_register_source_file(entrypoint_filename, {}); @@ -46,27 +46,25 @@ AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filena tolk_assert(!file->ast); file->ast = parse_src_file_to_ast(file); - // file->ast->debug_print(); + // if (!file->is_stdlib_file()) file->ast->debug_print(); for (AnyV v_toplevel : file->ast->as()->get_toplevel_declarations()) { - if (auto v_import = v_toplevel->try_as()) { + if (auto v_import = v_toplevel->try_as()) { std::string imported_str = v_import->get_file_name(); size_t cur_slash_pos = file->rel_filename.rfind('/'); std::string rel_filename = cur_slash_pos == std::string::npos || imported_str[0] == '@' ? std::move(imported_str) : file->rel_filename.substr(0, cur_slash_pos + 1) + imported_str; - SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); - file->imports.push_back(SrcFile::ImportStatement{imported}); + const SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); + file->imports.push_back(SrcFile::ImportDirective{imported}); v_import->mutate()->assign_src_file(imported); } } } // todo #ifdef TOLK_PROFILING - // lexer_measure_performance(G.all_src_files.get_all_files()); - - return G.all_src_files.get_all_files(); + lexer_measure_performance(G.all_src_files); } } // namespace tolk diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp index 815905e6..29584cbf 100644 --- a/tolk/pipe-find-unused-symbols.cpp +++ b/tolk/pipe-find-unused-symbols.cpp @@ -37,7 +37,7 @@ namespace tolk { static void mark_function_used_dfs(const std::unique_ptr& op); static void mark_function_used(const FunctionData* fun_ref) { - if (!fun_ref->is_regular_function() || fun_ref->is_really_used()) { // already handled + if (!fun_ref->is_code_function() || fun_ref->is_really_used()) { // already handled return; } @@ -66,7 +66,7 @@ static void mark_function_used_dfs(const std::unique_ptr& op) { } void pipeline_find_unused_symbols() { - for (const FunctionData* fun_ref : G.all_code_functions) { + for (const FunctionData* fun_ref : G.all_functions) { if (fun_ref->is_method_id_not_empty()) { // get methods, main and other entrypoints, regular functions with @method_id mark_function_used(fun_ref); } diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 5c0f1647..9092e564 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -40,20 +40,15 @@ void FunctionBodyAsm::set_code(std::vector&& code) { static void generate_output_func(const FunctionData* fun_ref) { - tolk_assert(fun_ref->is_regular_function()); + tolk_assert(fun_ref->is_code_function()); if (G.is_verbosity(2)) { - std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->full_type << std::endl; + std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->inferred_return_type << std::endl; } CodeBlob* code = std::get(fun_ref->body)->code; if (G.is_verbosity(3)) { code->print(std::cerr, 9); } - code->simplify_var_types(); - if (G.is_verbosity(5)) { - std::cerr << "after simplify_var_types: \n"; - code->print(std::cerr, 0); - } code->prune_unreachable_code(); if (G.is_verbosity(5)) { std::cerr << "after prune_unreachable: \n"; @@ -112,11 +107,11 @@ static void generate_output_func(const FunctionData* fun_ref) { } } -void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) { +void pipeline_generate_fif_output_to_std_cout() { std::cout << "\"Asm.fif\" include\n"; std::cout << "// automatically generated from "; bool need_comma = false; - for (const SrcFile* file : all_src_files) { + for (const SrcFile* file : G.all_src_files) { if (!file->is_stdlib_file()) { if (need_comma) { std::cout << ", "; @@ -129,9 +124,9 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) std::cout << "PROGRAM{\n"; bool has_main_procedure = false; - for (const FunctionData* fun_ref : G.all_code_functions) { + for (const FunctionData* fun_ref : G.all_functions) { if (!fun_ref->does_need_codegen()) { - if (G.is_verbosity(2)) { + if (G.is_verbosity(2) && fun_ref->is_code_function()) { std::cerr << fun_ref->name << ": code not generated, function does not need codegen\n"; } continue; @@ -164,7 +159,7 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) std::cout << std::string(2, ' ') << "DECLGLOBVAR " << var_ref->name << "\n"; } - for (const FunctionData* fun_ref : G.all_code_functions) { + for (const FunctionData* fun_ref : G.all_functions) { if (!fun_ref->does_need_codegen()) { continue; } diff --git a/tolk/pipe-infer-check-types.cpp b/tolk/pipe-infer-check-types.cpp deleted file mode 100644 index 8c18bae9..00000000 --- a/tolk/pipe-infer-check-types.cpp +++ /dev/null @@ -1,524 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" -#include "src-file.h" -#include "ast.h" -#include "ast-visitor.h" - -/* - * This pipe does type inferring. - * It will be fully rewritten, because current type system is based on Hindley-Milner (unifying usages), - * and I am going to introduce a static type system, drop TypeExpr completely, etc. - * Currently, after this inferring, lots of `te_Indirect` and partially complete types still exist, - * whey are partially refined during converting AST to legacy. - */ - -namespace tolk { - -class InferAndCheckTypesInsideFunctionVisitor final : public ASTVisitorFunctionBody { - const FunctionData* current_function = nullptr; - - static bool expect_integer(TypeExpr* inferred) { - try { - TypeExpr* t_int = TypeExpr::new_atomic(TypeExpr::_Int); - unify(inferred, t_int); - return true; - } catch (UnifyError&) { - return false; - } - } - - static bool expect_integer(AnyExprV v_inferred) { - return expect_integer(v_inferred->inferred_type); - } - - static bool is_expr_valid_as_return_self(AnyExprV return_expr) { - // `return self` - if (return_expr->type == ast_self_keyword) { - return true; - } - // `return self.someMethod()` - if (auto v_call = return_expr->try_as()) { - return v_call->fun_ref->does_return_self() && is_expr_valid_as_return_self(v_call->get_obj()); - } - // `return cond ? ... : ...` - if (auto v_ternary = return_expr->try_as()) { - return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); - } - return false; - } - - void visit(V v) override { - parent::visit(v->get_expr()); - v->mutate()->assign_inferred_type(v->get_expr()->inferred_type); - } - - void visit(V v) override { - if (v->empty()) { - v->mutate()->assign_inferred_type(TypeExpr::new_unit()); - return; - } - std::vector types_list; - types_list.reserve(v->get_items().size()); - for (AnyExprV item : v->get_items()) { - parent::visit(item); - types_list.emplace_back(item->inferred_type); - } - v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list))); - } - - void visit(V v) override { - if (v->empty()) { - v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_unit())); - return; - } - std::vector types_list; - types_list.reserve(v->get_items().size()); - for (AnyExprV item : v->get_items()) { - parent::visit(item); - types_list.emplace_back(item->inferred_type); - } - v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types_list), false))); - } - - void visit(V v) override { - if (const auto* glob_ref = v->sym->try_as()) { - v->mutate()->assign_inferred_type(glob_ref->declared_type); - } else if (const auto* const_ref = v->sym->try_as()) { - v->mutate()->assign_inferred_type(const_ref->inferred_type); - } else if (const auto* fun_ref = v->sym->try_as()) { - v->mutate()->assign_inferred_type(fun_ref->full_type); - } else if (const auto* var_ref = v->sym->try_as()) { - v->mutate()->assign_inferred_type(var_ref->declared_type); - } - } - - void visit(V v) override { - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - } - - void visit(V v) override { - switch (v->modifier) { - case 0: - case 's': - case 'a': - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Slice)); - break; - case 'u': - case 'h': - case 'H': - case 'c': - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - break; - default: - break; - } - } - - void visit(V v) override { - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - } - - void visit(V v) override { - const FunctionData* fun_ref = lookup_global_symbol("__null")->as(); - TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_unit(), TypeExpr::new_hole()); - TypeExpr* sym_type = fun_ref->full_type; - try { - unify(fun_type, sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type " - << fun_type->args[0] << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - v->mutate()->assign_inferred_type(e_type); - } - - void visit(V v) override { - v->mutate()->assign_inferred_type(v->param_ref->declared_type); - } - - void visit(V v) override { - parent::visit(v->get_expr()); - v->mutate()->assign_inferred_type(v->get_expr()->inferred_type); - } - - void visit(V v) override { - if (v->empty()) { - v->mutate()->assign_inferred_type(TypeExpr::new_unit()); - return; - } - std::vector types_list; - types_list.reserve(v->size()); - for (AnyExprV item : v->get_arguments()) { - parent::visit(item); - types_list.emplace_back(item->inferred_type); - } - v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list))); - } - - void visit(V v) override { - // special error for "null()" which is a FunC syntax - if (v->get_called_f()->type == ast_null_keyword) { - v->error("null is not a function: use `null`, not `null()`"); - } - - parent::visit(v->get_called_f()); - visit(v->get_arg_list()); - - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` - const FunctionData* fun_ref = v->fun_maybe; - if (!fun_ref) { - TypeExpr* arg_tensor = v->get_arg_list()->inferred_type; - TypeExpr* lhs_type = v->get_called_f()->inferred_type; - TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); - try { - unify(fun_type, lhs_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply expression of type " << lhs_type << " to an expression of type " << arg_tensor - << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - v->mutate()->assign_inferred_type(e_type); - return; - } - - TypeExpr* arg_tensor = v->get_arg_list()->inferred_type; - TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); - TypeExpr* sym_type = fun_ref->full_type; - try { - unify(fun_type, sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type " - << fun_type->args[0] << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - - if (fun_ref->has_mutate_params()) { - tolk_assert(e_type->constr == TypeExpr::te_Tensor); - e_type = e_type->args[e_type->args.size() - 1]; - } - - v->mutate()->assign_inferred_type(e_type); - } - - void visit(V v) override { - parent::visit(v->get_obj()); - visit(v->get_arg_list()); - std::vector arg_types; - arg_types.reserve(1 + v->get_num_args()); - arg_types.push_back(v->get_obj()->inferred_type); - for (int i = 0; i < v->get_num_args(); ++i) { - arg_types.push_back(v->get_arg(i)->inferred_type); - } - - TypeExpr* arg_tensor = TypeExpr::new_tensor(std::move(arg_types)); - TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); - TypeExpr* sym_type = v->fun_ref->full_type; - try { - unify(fun_type, sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply function " << v->fun_ref->name << " : " << v->fun_ref->full_type << " to arguments of type " - << fun_type->args[0] << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - - if (v->fun_ref->has_mutate_params()) { - tolk_assert(e_type->constr == TypeExpr::te_Tensor); - e_type = e_type->args[e_type->args.size() - 1]; - } - if (v->fun_ref->does_return_self()) { - e_type = v->get_obj()->inferred_type; - TypeExpr::remove_indirect(e_type); - } - - v->mutate()->assign_inferred_type(e_type); - } - - void visit(V v) override { - v->mutate()->assign_inferred_type(TypeExpr::new_hole()); - } - - void visit(V v) override { - parent::visit(v->get_rhs()); - if (!expect_integer(v->get_rhs())) { - v->error("operator `" + static_cast(v->operator_name) + "` expects integer operand"); - } - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - } - - void visit(V v) override { - parent::visit(v->get_lhs()); - parent::visit(v->get_rhs()); - switch (v->tok) { - case tok_assign: { - TypeExpr* lhs_type = v->get_lhs()->inferred_type; - TypeExpr* rhs_type = v->get_rhs()->inferred_type; - try { - unify(lhs_type, rhs_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot assign an expression of type " << rhs_type << " to a variable or pattern of type " - << lhs_type << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = lhs_type; - TypeExpr::remove_indirect(e_type); - v->mutate()->assign_inferred_type(e_type); - break; - } - case tok_eq: - case tok_neq: - case tok_spaceship: { - if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { - v->error("comparison operators `== !=` can compare only integers"); - } - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - break; - } - case tok_logical_and: - case tok_logical_or: { - if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { - v->error("logical operators `&& ||` expect integer operands"); - } - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - break; - } - default: - if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { - v->error("operator `" + static_cast(v->operator_name) + "` expects integer operands"); - } - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - } - } - - void visit(V v) override { - parent::visit(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of ternary ?: operator must be an integer"); - } - parent::visit(v->get_when_true()); - parent::visit(v->get_when_false()); - - TypeExpr* res = TypeExpr::new_hole(); - TypeExpr *ttrue = v->get_when_true()->inferred_type; - TypeExpr *tfals = v->get_when_false()->inferred_type; - unify(res, ttrue); - unify(res, tfals); - v->mutate()->assign_inferred_type(res); - } - - void visit(V v) override { - parent::visit(v->get_cond()); - parent::visit(v->get_if_body()); - parent::visit(v->get_else_body()); - TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* cond_type = v->get_cond()->inferred_type; - try { - - unify(cond_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`if` condition value of type " << cond_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - v->get_cond()->mutate()->assign_inferred_type(cond_type); - } - - void visit(V v) override { - parent::visit(v->get_cond()); - parent::visit(v->get_body()); - TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* cond_type = v->get_cond()->inferred_type; - try { - unify(cond_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "repeat count value of type " << cond_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - v->get_cond()->mutate()->assign_inferred_type(cond_type); - } - - void visit(V v) override { - parent::visit(v->get_cond()); - parent::visit(v->get_body()); - TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* cond_type = v->get_cond()->inferred_type; - try { - unify(cond_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`while` condition value of type " << cond_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - v->get_cond()->mutate()->assign_inferred_type(cond_type); - } - - void visit(V v) override { - parent::visit(v->get_body()); - parent::visit(v->get_cond()); - TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* cond_type = v->get_cond()->inferred_type; - try { - unify(cond_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`while` condition value of type " << cond_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - v->get_cond()->mutate()->assign_inferred_type(cond_type); - } - - void visit(V v) override { - parent::visit(v->get_return_value()); - if (current_function->does_return_self()) { - if (!is_expr_valid_as_return_self(v->get_return_value())) { - v->error("invalid return from `self` function"); - } - return; - } - TypeExpr* expr_type = v->get_return_value()->inferred_type; - TypeExpr* ret_type = current_function->full_type; - if (ret_type->constr == TypeExpr::te_ForAll) { - ret_type = ret_type->args[0]; - } - tolk_assert(ret_type->constr == TypeExpr::te_Map); - ret_type = ret_type->args[1]; - if (current_function->has_mutate_params()) { - tolk_assert(ret_type->constr == TypeExpr::te_Tensor); - ret_type = ret_type->args[ret_type->args.size() - 1]; - } - try { - unify(expr_type, ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << ret_type - << " cannot be unified with return statement expression type " << expr_type << ": " << ue; - v->error(os.str()); - } - } - - void visit(V v) override { - if (v->var_maybe) { // not underscore - if (const auto* var_ref = v->var_maybe->try_as()) { - v->mutate()->assign_inferred_type(var_ref->declared_type); - } else if (const auto* glob_ref = v->var_maybe->try_as()) { - v->mutate()->assign_inferred_type(glob_ref->declared_type); - } else { - tolk_assert(0); - } - } else if (v->declared_type) { // underscore with type - v->mutate()->assign_inferred_type(v->declared_type); - } else { // just underscore - v->mutate()->assign_inferred_type(TypeExpr::new_hole()); - } - v->get_identifier()->mutate()->assign_inferred_type(v->inferred_type); - } - - void visit(V v) override { - parent::visit(v->get_lhs()); - parent::visit(v->get_assigned_val()); - TypeExpr* lhs = v->get_lhs()->inferred_type; - TypeExpr* rhs = v->get_assigned_val()->inferred_type; - try { - unify(lhs, rhs); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot assign an expression of type " << rhs << " to a variable or pattern of type " << lhs << ": " << ue; - v->error(os.str()); - } - } - - void visit(V v) override { - parent::visit(v->get_try_body()); - parent::visit(v->get_catch_expr()); - - TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); - tolk_assert(v->get_catch_expr()->size() == 2); - TypeExpr* type1 = v->get_catch_expr()->get_item(0)->inferred_type; - unify(type1, tvm_error_type->args[1]); - TypeExpr* type2 = v->get_catch_expr()->get_item(1)->inferred_type; - unify(type2, tvm_error_type->args[0]); - - parent::visit(v->get_catch_body()); - } - - void visit(V v) override { - parent::visit(v->get_thrown_code()); - if (!expect_integer(v->get_thrown_code())) { - v->get_thrown_code()->error("excNo of `throw` must be an integer"); - } - if (v->has_thrown_arg()) { - parent::visit(v->get_thrown_arg()); - } - } - - void visit(V v) override { - parent::visit(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of `assert` must be an integer"); - } - parent::visit(v->get_thrown_code()); - } - -public: - void start_visiting_function(V v_function) override { - current_function = v_function->fun_ref; - parent::visit(v_function->get_body()); - if (current_function->is_implicit_return()) { - if (current_function->does_return_self()) { - throw ParseError(v_function->get_body()->as()->loc_end, "missing return; forgot `return self`?"); - } - TypeExpr* expr_type = TypeExpr::new_unit(); - TypeExpr* ret_type = current_function->full_type; - if (ret_type->constr == TypeExpr::te_ForAll) { - ret_type = ret_type->args[0]; - } - tolk_assert(ret_type->constr == TypeExpr::te_Map); - ret_type = ret_type->args[1]; - if (current_function->has_mutate_params()) { - ret_type = ret_type->args[ret_type->args.size() - 1]; - } - try { - unify(expr_type, ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "implicit function return type " << expr_type - << " cannot be unified with inferred return type " << ret_type << ": " << ue; - v_function->error(os.str()); - } - } - } -}; - -void pipeline_infer_and_check_types(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); -} - -} // namespace tolk diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp new file mode 100644 index 00000000..1d6fbcb0 --- /dev/null +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -0,0 +1,1149 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "src-file.h" +#include "ast.h" +#include "ast-visitor.h" +#include "generics-helpers.h" +#include "type-system.h" + +/* + * This is a complicated and crucial part of the pipeline. It simultaneously does the following: + * * infers types of all expressions; example: `2 + 3` both are TypeDataInt, result is also + * * AND checks types for assignment, arguments passing, etc.; example: `fInt(cs)` is error passing slice to int + * * AND binds function/method calls (assigns fun_ref); example: `globalF()`, fun_ref is assigned to `globalF` (unless generic) + * * AND instantiates generic functions; example: `t.tuplePush(2)` creates `tuplePush` and assigns fun_ref to dot field + * * AND infers return type of functions if it's omitted (`fun f() { ... }` means "auto infer", not "void") + * + * It's important to do all these parts simultaneously, they can't be split or separated. + * For example, we can't bind `f(2)` earlier, because if `f` is a generic `f`, we should instantiate it, + * and in order to do it, we need to know argument types. + * For example, we can't bind `c.cellHash()` earlier, because in the future we'll have overloads (`cell.hash()` and `slice.hash()`), + * and in order to bind it, we need to know object type. + * And vice versa, to infer type of expression in the middle, we need to have inferred all expressions preceding it, + * which may also include generics, etc. + * + * About generics. They are more like "C++ templates". If `f` and `f` called from somewhere, + * there will be TWO new functions, inserted into symtable, and both will be code generated to Fift. + * Body of a generic function is NOT analyzed. Hence, `fun f(v: T) { v.method(); }` we don't know + * whether `v.method()` is a valid call until instantiate it with `f` for example. + * Same for `v + 2`, we don't know whether + operator can be applied until instantiation. + * In other words, we have a closed type system, not open. + * That's why generic functions' bodies aren't traversed here (and in most following pipes). + * Instead, when an instantiated function is created, it follows all the preceding pipeline (registering symbols, etc.), + * and type inferring is done inside instantiated functions (which can recursively instantiate another, etc.). + * + * A noticeable part of inferring is "hints". + * Example: `var a: User = { id: 3, name: "" }`. To infer type of `{...}` we need to know it's `User`. This hint is taken from lhs. + * Example: `fun tupleAt(t: tuple, idx: int):T`, just `t.tupleGet(2)` can't be deduced (T left unspecified), + * but for assignment with left-defined type, or a call to `fInt(t.tupleGet(2))` hint "int" helps deduce T. + * + * Unlike other pipes, inferring can dig recursively on demand. + * Example: + * fun getInt() { return 1; } + * fun main() { var i = getInt(); } + * If `main` is handled the first, it should know the return type if `getInt`. It's not declared, so we need + * to launch type inferring for `getInt` and then proceed back to `main`. + * When a generic function is instantiated, type inferring inside it is also run. + */ + +namespace tolk { + +static void infer_and_save_return_type_of_function(const FunctionData* fun_ref); + +static TypePtr get_or_infer_return_type(const FunctionData* fun_ref) { + if (!fun_ref->inferred_return_type) { + infer_and_save_return_type_of_function(fun_ref); + } + return fun_ref->inferred_return_type; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(TypePtr type) { + return "`" + type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(AnyExprV v_with_type) { + return "`" + v_with_type->inferred_type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(const LocalVarData& var_ref) { + return "`" + var_ref.declared_type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(const FunctionData* fun_ref) { + return "`" + fun_ref->as_human_readable() + "`"; +} + +// fire an error when `fun f(...) asm ...` is called with T=(int,int) or other non-1 width on stack +// asm functions generally can't handle it, they expect T to be a TVM primitive +// (in FunC, `forall` type just couldn't be unified with non-primitives; in Tolk, generic T is expectedly inferred) +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_calling_asm_function_with_non1_stack_width_arg(SrcLocation loc, const FunctionData* fun_ref, const std::vector& substitutions, int arg_idx) { + throw ParseError(loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->calc_width_on_stack()) + " stack slots in TVM, not 1"); +} + +// fire an error on `var n = null` +// technically it's correct, type of `n` is TypeDataNullLiteral, but it's not what the user wanted +// so, it's better to see an error on assignment, that later, on `n` usage and types mismatch +// (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_assign_always_null_to_variable(SrcLocation loc, const LocalVarData* assigned_var, bool is_assigned_null_literal) { + std::string var_name = assigned_var->name; + throw ParseError(loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); +} + +// check correctness of called arguments counts and their type matching +static void check_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { + int delta_self = lhs_of_dot_call ? 1 : 0; + int n_arguments = v->size() + delta_self; + int n_parameters = fun_ref->get_num_params(); + + // Tolk doesn't have optional parameters currently, so just compare counts + if (!n_parameters && lhs_of_dot_call) { + v->error("`" + fun_ref->name + "` has no parameters and can not be called as method"); + } + if (n_parameters < n_arguments) { + v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + if (n_arguments < n_parameters) { + v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + + if (lhs_of_dot_call) { + if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(lhs_of_dot_call->inferred_type)) { + lhs_of_dot_call->error("can not call method for " + to_string(fun_ref->parameters[0]) + " with object of type " + to_string(lhs_of_dot_call)); + } + } + for (int i = 0; i < v->size(); ++i) { + if (!fun_ref->parameters[i + delta_self].declared_type->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { + v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(fun_ref->parameters[i + delta_self])); + } + } +} + +/* + * TypeInferringUnifyStrategy unifies types from various branches to a common result (lca). + * It's used to auto infer function return type based on return statements, like in TypeScript. + * Example: `fun f() { ... return 1; ... return null; }` inferred as `int`. + * + * Besides function returns, it's also useful for ternary `return cond ? 1 : null` and `match` expression. + * If types can't be unified (a function returns int and cell, for example), `unify()` returns false, handled outside. + * BTW, don't confuse this way of inferring with Hindley-Milner, they have nothing in common. + */ +class TypeInferringUnifyStrategy { + TypePtr unified_result = nullptr; + + static TypePtr calculate_type_lca(TypePtr t1, TypePtr t2) { + if (t1 == t2) { + return t1; + } + if (t1->can_rhs_be_assigned(t2)) { + return t1; + } + if (t2->can_rhs_be_assigned(t1)) { + return t2; + } + + const auto* tensor1 = t1->try_as(); + const auto* tensor2 = t2->try_as(); + if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { + std::vector types_lca; + types_lca.reserve(tensor1->size()); + for (int i = 0; i < tensor1->size(); ++i) { + TypePtr next = calculate_type_lca(tensor1->items[i], tensor2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTensor::create(std::move(types_lca)); + } + + const auto* tuple1 = t1->try_as(); + const auto* tuple2 = t2->try_as(); + if (tuple1 && tuple2 && tuple1->size() == tuple2->size()) { + std::vector types_lca; + types_lca.reserve(tuple1->size()); + for (int i = 0; i < tuple1->size(); ++i) { + TypePtr next = calculate_type_lca(tuple1->items[i], tuple2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTypedTuple::create(std::move(types_lca)); + } + + return nullptr; + } + +public: + bool unify_with(TypePtr next) { + if (unified_result == nullptr) { + unified_result = next; + return true; + } + if (unified_result == next) { + return true; + } + + TypePtr combined = calculate_type_lca(unified_result, next); + if (!combined) { + return false; + } + + unified_result = combined; + return true; + } + + bool unify_with_implicit_return_void() { + if (unified_result == nullptr) { + unified_result = TypeDataVoid::create(); + return true; + } + + return unified_result == TypeDataVoid::create(); + } + + TypePtr get_result() const { return unified_result; } +}; + +/* + * This class handles all types of AST vertices and traverses them, filling all AnyExprV::inferred_type. + * Note, that it isn't derived from ASTVisitor, it has manual `switch` over all existing vertex types. + * There are two reasons for this: + * 1) when a new AST node type is introduced, I want it to fail here, not to be left un-inferred with UB at next steps + * 2) easy to maintain a hint (see comments at the top of the file) + */ +class InferCheckTypesAndCallsAndFieldsVisitor final { + const FunctionData* current_function = nullptr; + TypeInferringUnifyStrategy return_unifier; + + GNU_ATTRIBUTE_ALWAYS_INLINE + static void assign_inferred_type(AnyExprV dst, AnyExprV src) { +#ifdef TOLK_DEBUG + tolk_assert(src->inferred_type != nullptr && !src->inferred_type->has_unresolved_inside() && !src->inferred_type->has_genericT_inside()); +#endif + dst->mutate()->assign_inferred_type(src->inferred_type); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE + static void assign_inferred_type(AnyExprV dst, TypePtr inferred_type) { +#ifdef TOLK_DEBUG + tolk_assert(inferred_type != nullptr && !inferred_type->has_unresolved_inside() && !inferred_type->has_genericT_inside()); +#endif + dst->mutate()->assign_inferred_type(inferred_type); + } + + static void assign_inferred_type(const LocalVarData* local_var_or_param, TypePtr inferred_type) { +#ifdef TOLK_DEBUG + tolk_assert(inferred_type != nullptr && !inferred_type->has_unresolved_inside() && !inferred_type->has_genericT_inside()); +#endif + local_var_or_param->mutate()->assign_inferred_type(inferred_type); + } + + static void assign_inferred_type(const FunctionData* fun_ref, TypePtr inferred_return_type, TypePtr inferred_full_type) { +#ifdef TOLK_DEBUG + tolk_assert(inferred_return_type != nullptr && !inferred_return_type->has_unresolved_inside() && !inferred_return_type->has_genericT_inside()); +#endif + fun_ref->mutate()->assign_inferred_type(inferred_return_type, inferred_full_type); + } + + // traverse children in any statement + void process_any_statement(AnyV v) { + switch (v->type) { + case ast_sequence: + return process_sequence(v->as()); + case ast_return_statement: + return process_return_statement(v->as()); + case ast_if_statement: + return process_if_statement(v->as()); + case ast_repeat_statement: + return process_repeat_statement(v->as()); + case ast_while_statement: + return process_while_statement(v->as()); + case ast_do_while_statement: + return process_do_while_statement(v->as()); + case ast_throw_statement: + return process_throw_statement(v->as()); + case ast_assert_statement: + return process_assert_statement(v->as()); + case ast_try_catch_statement: + return process_try_catch_statement(v->as()); + case ast_empty_statement: + return; + default: + infer_any_expr(reinterpret_cast(v)); + } + } + + // assigns inferred_type for any expression (by calling assign_inferred_type) + void infer_any_expr(AnyExprV v, TypePtr hint = nullptr) { + switch (v->type) { + case ast_int_const: + return infer_int_const(v->as()); + case ast_string_const: + return infer_string_const(v->as()); + case ast_bool_const: + return infer_bool_const(v->as()); + case ast_local_vars_declaration: + return infer_local_vars_declaration(v->as()); + case ast_assign: + return infer_assignment(v->as()); + case ast_set_assign: + return infer_set_assign(v->as()); + case ast_unary_operator: + return infer_unary_operator(v->as()); + case ast_binary_operator: + return infer_binary_operator(v->as()); + case ast_ternary_operator: + return infer_ternary_operator(v->as(), hint); + case ast_cast_as_operator: + return infer_cast_as_operator(v->as()); + case ast_parenthesized_expression: + return infer_parenthesized(v->as(), hint); + case ast_reference: + return infer_reference(v->as()); + case ast_dot_access: + return infer_dot_access(v->as(), hint); + case ast_function_call: + return infer_function_call(v->as(), hint); + case ast_tensor: + return infer_tensor(v->as(), hint); + case ast_typed_tuple: + return infer_typed_tuple(v->as(), hint); + case ast_null_keyword: + return infer_null_keyword(v->as()); + case ast_underscore: + return infer_underscore(v->as(), hint); + case ast_empty_expression: + return infer_empty_expression(v->as()); + default: + throw UnexpectedASTNodeType(v, "infer_any_expr"); + } + } + + static bool expect_integer(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataInt::create(); + } + + static void infer_int_const(V v) { + assign_inferred_type(v, TypeDataInt::create()); + } + + static void infer_string_const(V v) { + if (v->is_bitslice()) { + assign_inferred_type(v, TypeDataSlice::create()); + } else { + assign_inferred_type(v, TypeDataInt::create()); + } + } + + static void infer_bool_const(V v) { + // currently, Tolk has no `bool` type; `true` and `false` are integers (-1 and 0) + assign_inferred_type(v, TypeDataInt::create()); + } + + static void infer_local_vars_declaration(V) { + // it can not appear as a standalone expression + // `var ... = rhs` is handled by ast_assign + tolk_assert(false); + } + + void infer_assignment(V v) { + // v is assignment: `x = 5` / `var x = 5` / `var x: slice = 5` / `(cs,_) = f()` / `val (a,[b],_) = (a,t,0)` + // it's a tricky node to handle, because to infer rhs, at first we need to create hint from lhs + // and then to apply/check inferred rhs onto lhs + // about a hint: `var i: int = t.tupleAt(0)` is ok, but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + infer_any_expr(rhs, calc_hint_from_assignment_lhs(lhs)); + process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, rhs); + assign_inferred_type(v, lhs); + } + + // having assignment like `var (i: int, s) = rhs` (its lhs is local vars declaration), + // create a contextual infer hint for rhs, `(int, unknown)` in this case + // this hint helps to deduce generics and to resolve unknown types while inferring rhs + static TypePtr calc_hint_from_assignment_lhs(AnyExprV lhs) { + // `var ... = rhs` - dig into left part + if (auto lhs_decl = lhs->try_as()) { + return calc_hint_from_assignment_lhs(lhs_decl->get_expr()); + } + + // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") + if (auto lhs_var = lhs->try_as()) { + if (lhs_var->marked_as_redef) { + return lhs_var->var_ref->declared_type; + } + if (lhs_var->declared_type) { + return lhs_var->declared_type; + } + return TypeDataUnknown::create(); + } + + // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) + if (auto lhs_ref = lhs->try_as()) { + if (const auto* var_ref = lhs_ref->sym->try_as()) { + return var_ref->declared_type; + } + if (const auto* glob_ref = lhs_ref->sym->try_as()) { + return glob_ref->declared_type; + } + return TypeDataUnknown::create(); + } + + // `(v1, v2) = rhs` / `var (v1, v2) = rhs` + if (auto lhs_tensor = lhs->try_as()) { + std::vector sub_hints; + sub_hints.reserve(lhs_tensor->size()); + for (AnyExprV item : lhs_tensor->get_items()) { + sub_hints.push_back(calc_hint_from_assignment_lhs(item)); + } + return TypeDataTensor::create(std::move(sub_hints)); + } + + // `[v1, v2] = rhs` / `var [v1, v2] = rhs` + if (auto lhs_tuple = lhs->try_as()) { + std::vector sub_hints; + sub_hints.reserve(lhs_tuple->size()); + for (AnyExprV item : lhs_tuple->get_items()) { + sub_hints.push_back(calc_hint_from_assignment_lhs(item)); + } + return TypeDataTypedTuple::create(std::move(sub_hints)); + } + + return TypeDataUnknown::create(); + } + + // handle (and dig recursively) into `var lhs = rhs` + // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` + // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) + // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 + void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; + + // `var ... = rhs` - dig into left part + if (auto lhs_decl = lhs->try_as()) { + process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); + assign_inferred_type(lhs, lhs_decl->get_expr()->inferred_type); + return; + } + + // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") + if (auto lhs_var = lhs->try_as()) { + TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr) + if (lhs_var->marked_as_redef) { + tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type); + declared_type = lhs_var->var_ref->declared_type; + } + if (declared_type) { + if (!declared_type->can_rhs_be_assigned(rhs_type)) { + err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); + } + assign_inferred_type(lhs, declared_type); + } else { + if (rhs_type == TypeDataNullLiteral::create()) { + fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); + } + assign_inferred_type(lhs, rhs_type); + assign_inferred_type(lhs_var->var_ref, lhs_var->inferred_type); + } + return; + } + + // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) + if (lhs->try_as()) { + infer_any_expr(lhs); + if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { + err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); + } + return; + } + + // `(v1, v2) = rhs` / `var (v1, v2) = rhs` (rhs may be `(1,2)` or `tensorVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tensor + if (auto lhs_tensor = lhs->try_as()) { + const TypeDataTensor* rhs_type_tensor = rhs_type->try_as(); + if (!rhs_type_tensor) { + err_loc->error("can not assign " + to_string(rhs_type) + " to a tensor"); + } + if (lhs_tensor->size() != rhs_type_tensor->size()) { + err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tensor->size()); + for (int i = 0; i < lhs_tensor->size(); ++i) { + process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); + types_list.push_back(lhs_tensor->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); + return; + } + + // `[v1, v2] = rhs` / `var [v1, v2] = rhs` (rhs may be `[1,2]` or `tupleVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tuple + if (auto lhs_tuple = lhs->try_as()) { + const TypeDataTypedTuple* rhs_type_tuple = rhs_type->try_as(); + if (!rhs_type_tuple) { + err_loc->error("can not assign " + to_string(rhs_type) + " to a tuple"); + } + if (lhs_tuple->size() != rhs_type_tuple->size()) { + err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tuple->size()); + for (int i = 0; i < lhs_tuple->size(); ++i) { + process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); + types_list.push_back(lhs_tuple->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); + return; + } + + // `_ = rhs` + if (lhs->type == ast_underscore) { + assign_inferred_type(lhs, TypeDataUnknown::create()); + return; + } + + // here is something strange and unhandled, like `f() = rhs` + // it will fail on later compilation steps (like rvalue/lvalue checks), but type inferring should pass + infer_any_expr(lhs, rhs_type); + if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { + err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); + } + } + + void infer_set_assign(V v) { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + infer_any_expr(lhs); + infer_any_expr(rhs, lhs->inferred_type); + + // almost all operators implementation is hardcoded by built-in functions `_+_` and similar + std::string_view builtin_func = v->operator_name; // "+" for operator += + + if (!expect_integer(lhs) || !expect_integer(rhs)) { + v->error("can not apply operator `" + static_cast(v->operator_name) + "` to " + to_string(lhs) + " and " + to_string(rhs)); + } + + assign_inferred_type(v, lhs); + if (!builtin_func.empty()) { + const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); + tolk_assert(builtin_sym); + v->mutate()->assign_fun_ref(builtin_sym); + } + } + + void infer_unary_operator(V v) { + AnyExprV rhs = v->get_rhs(); + infer_any_expr(rhs); + + // all operators implementation is hardcoded by built-in functions `~_` and similar + std::string_view builtin_func = v->operator_name; + + if (!expect_integer(rhs)) { + v->error("can not apply operator `" + static_cast(v->operator_name) + "` to " + to_string(rhs)); + } + assign_inferred_type(v, TypeDataInt::create()); + + if (!builtin_func.empty()) { + const FunctionData* builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->as(); + tolk_assert(builtin_sym); + v->mutate()->assign_fun_ref(builtin_sym); + } + } + + void infer_binary_operator(V v) { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + infer_any_expr(lhs); + infer_any_expr(rhs); + + // almost all operators implementation is hardcoded by built-in functions `_+_` and similar + std::string_view builtin_func = v->operator_name; + + switch (v->tok) { + // == != can compare both integers and booleans, (int == bool) is NOT allowed + case tok_eq: + case tok_neq: + case tok_spaceship: { + if (!expect_integer(lhs) || !expect_integer(rhs)) { + v->error("comparison operators `== !=` can compare only integers, got " + to_string(lhs) + " and " + to_string(rhs)); + } + assign_inferred_type(v, TypeDataInt::create()); + break; + } + case tok_logical_and: + case tok_logical_or: { + if (!expect_integer(lhs) || !expect_integer(rhs)) { + v->error("logical operators `&& ||` expect integer operands, got " + to_string(lhs) + " and " + to_string(rhs)); + } + assign_inferred_type(v, TypeDataInt::create()); + builtin_func = {}; + break; + } + default: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + v->error("can not apply operator `" + static_cast(v->operator_name) + "` to " + to_string(lhs) + " and " + to_string(rhs)); + } + assign_inferred_type(v, TypeDataInt::create()); + } + + if (!builtin_func.empty()) { + const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); + tolk_assert(builtin_sym); + v->mutate()->assign_fun_ref(builtin_sym); + } + } + + void infer_ternary_operator(V v, TypePtr hint) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of ternary operator must be an integer, got " + to_string(v->get_cond())); + } + infer_any_expr(v->get_when_true(), hint); + infer_any_expr(v->get_when_false(), hint); + + TypeInferringUnifyStrategy tern_type; + tern_type.unify_with(v->get_when_true()->inferred_type); + if (!tern_type.unify_with(v->get_when_false()->inferred_type)) { + v->error("types of ternary branches are incompatible"); + } + assign_inferred_type(v, tern_type.get_result()); + } + + void infer_cast_as_operator(V v) { + // for `expr as `, use this type for hint, so that `t.tupleAt(0) as int` is ok + infer_any_expr(v->get_expr(), v->cast_to_type); + if (!v->get_expr()->inferred_type->can_be_casted_with_as_operator(v->cast_to_type)) { + v->error("type " + to_string(v->get_expr()) + " can not be cast to " + to_string(v->cast_to_type)); + } + assign_inferred_type(v, v->cast_to_type); + } + + void infer_parenthesized(V v, TypePtr hint) { + infer_any_expr(v->get_expr(), hint); + assign_inferred_type(v, v->get_expr()); + } + + static void infer_reference(V v) { + if (const auto* var_ref = v->sym->try_as()) { + assign_inferred_type(v, var_ref->declared_type); + + } else if (const auto* const_ref = v->sym->try_as()) { + assign_inferred_type(v, const_ref->is_int_const() ? TypeDataInt::create() : TypeDataSlice::create()); + + } else if (const auto* glob_ref = v->sym->try_as()) { + assign_inferred_type(v, glob_ref->declared_type); + + } else if (const auto* fun_ref = v->sym->try_as()) { + // it's `globalF` / `globalF` - references to functions used as non-call + V v_instantiationTs = v->get_instantiationTs(); + + if (fun_ref->is_generic_function() && !v_instantiationTs) { + // `genericFn` is invalid as non-call, can't be used without + v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + + } else if (fun_ref->is_generic_function()) { + // `genericFn` is valid, it's a reference to instantiation + std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); + v->mutate()->assign_sym(fun_ref); + + } else if (UNLIKELY(v_instantiationTs != nullptr)) { + // non-generic function referenced like `return beginCell;` + v_instantiationTs->error("not generic function used with generic T"); + } + + fun_ref->mutate()->assign_is_used_as_noncall(); + get_or_infer_return_type(fun_ref); + assign_inferred_type(v, fun_ref->inferred_full_type); + return; + + } else { + tolk_assert(false); + } + + // for non-functions: `local_var` and similar not allowed + if (UNLIKELY(v->has_instantiationTs())) { + v->get_instantiationTs()->error("generic T not expected here"); + } + } + + // given `genericF` / `t.tupleFirst` (the user manually specified instantiation Ts), + // validate and collect them + // returns: [int, slice] / [cell] + static std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, const FunctionData* fun_ref, V instantiationT_list) { + if (fun_ref->genericTs->size() != instantiationT_list->get_items().size()) { + throw ParseError(loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); + } + + std::vector substitutions; + substitutions.reserve(instantiationT_list->size()); + for (int i = 0; i < instantiationT_list->size(); ++i) { + substitutions.push_back(instantiationT_list->get_item(i)->substituted_type); + } + + return substitutions; + } + + // when generic Ts have been collected from user-specified or deduced from arguments, + // instantiate a generic function + // example: was `t.tuplePush(2)`, deduced , instantiate `tuplePush` + // example: was `t.tuplePush(2)`, read , instantiate `tuplePush` (will later fail type check) + // example: was `var cb = t.tupleFirst;` (used as reference, as non-call), instantiate `tupleFirst` + // returns fun_ref to instantiated function + static const FunctionData* check_and_instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, std::vector&& substitutionTs) { + // T for asm function must be a TVM primitive (width 1), otherwise, asm would act incorrectly + if (fun_ref->is_asm_function() || fun_ref->is_builtin_function()) { + for (int i = 0; i < static_cast(substitutionTs.size()); ++i) { + if (substitutionTs[i]->calc_width_on_stack() != 1) { + fire_error_calling_asm_function_with_non1_stack_width_arg(loc, fun_ref, substitutionTs, i); + } + } + } + + std::string inst_name = generate_instantiated_name(fun_ref->name, substitutionTs); + try { + // make deep clone of `f` with substitutionTs + // (if `f` was already instantiated, it will be immediately returned from a symbol table) + return instantiate_generic_function(loc, fun_ref, inst_name, std::move(substitutionTs)); + } catch (const ParseError& ex) { + throw ParseError(ex.where, "while instantiating generic function `" + inst_name + "` at " + loc.to_string() + ": " + ex.message); + } + } + + void infer_dot_access(V v, TypePtr hint) { + // it's NOT a method call `t.tupleSize()` (since such cases are handled by infer_function_call) + // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) + infer_any_expr(v->get_obj()); + // our goal is to fill v->target knowing type of obj + V v_ident = v->get_identifier(); // field/method name vertex + V v_instantiationTs = v->get_instantiationTs(); + std::string_view field_name = v_ident->name; + + // for now, Tolk doesn't have structures, properties, and object-scoped methods + // so, only `t.tupleSize` is allowed, look up a global function + const Symbol* sym = lookup_global_symbol(field_name); + if (!sym) { + v_ident->error("undefined symbol `" + static_cast(field_name) + "`"); + } + const FunctionData* fun_ref = sym->try_as(); + if (!fun_ref) { + v_ident->error("referencing a non-function"); + } + + // `t.tupleSize` is ok, `cs.tupleSize` not + if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(v->get_obj()->inferred_type)) { + v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0]) + " with an object of type " + to_string(v->get_obj())); + } + + if (fun_ref->is_generic_function() && !v_instantiationTs) { + // `genericFn` and `t.tupleAt` are invalid as non-call, they can't be used without + v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + + } else if (fun_ref->is_generic_function()) { + // `t.tupleAt` is valid, it's a reference to instantiation + std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); + + } else if (UNLIKELY(v_instantiationTs != nullptr)) { + // non-generic method referenced like `var cb = c.cellHash;` + v_instantiationTs->error("not generic function used with generic T"); + } + + fun_ref->mutate()->assign_is_used_as_noncall(); + v->mutate()->assign_target(fun_ref); + get_or_infer_return_type(fun_ref); + assign_inferred_type(v, fun_ref->inferred_full_type); // type of `t.tupleSize` is TypeDataFunCallable + } + + void infer_function_call(V v, TypePtr hint) { + AnyExprV callee = v->get_callee(); + + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` + int delta_self = 0; + AnyExprV dot_obj = nullptr; + const FunctionData* fun_ref = nullptr; + V v_instantiationTs = nullptr; + + if (auto v_ref = callee->try_as()) { + // `globalF()` / `globalF()` / `local_var()` / `SOME_CONST()` + fun_ref = v_ref->sym->try_as(); // not null for `globalF` + v_instantiationTs = v_ref->get_instantiationTs(); // present for `globalF()` + + } else if (auto v_dot = callee->try_as()) { + // `obj.someMethod()` / `obj.someMethod()` / `getF().someMethod()` / `obj.SOME_CONST()` + delta_self = 1; + dot_obj = v_dot->get_obj(); + v_instantiationTs = v_dot->get_instantiationTs(); // present for `obj.someMethod()` + infer_any_expr(dot_obj); + + // for now, Tolk doesn't have object-scoped methods, so method resolving doesn't depend on obj type + // (in other words, `globalFunction(a)` = `a.globalFunction()`) + std::string_view method_name = v_dot->get_field_name(); + const Symbol* sym = lookup_global_symbol(method_name); + if (!sym) { + v_dot->get_identifier()->error("undefined symbol `" + static_cast(method_name) + "`"); + } + fun_ref = sym->try_as(); + if (!fun_ref) { + v_dot->get_identifier()->error("calling a non-function"); + } + + } else { + // `getF()()` / `5()` + // fun_ref remains nullptr + } + + // infer argument types, looking at fun_ref's parameters as hints + for (int i = 0; i < v->get_num_args(); ++i) { + TypePtr param_type = fun_ref && i < fun_ref->get_num_params() - delta_self ? fun_ref->parameters[delta_self + i].declared_type : nullptr; + auto arg_i = v->get_arg(i); + infer_any_expr(arg_i->get_expr(), param_type && !param_type->has_genericT_inside() ? param_type : nullptr); + assign_inferred_type(arg_i, arg_i->get_expr()); + } + + // handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` + if (!fun_ref) { + // treat callee like a usual expression, which must have "callable" inferred type + infer_any_expr(callee); + const TypeDataFunCallable* f_callable = callee->inferred_type->try_as(); + if (!f_callable) { // `5()` / `SOME_CONST()` / `null()` + v->error("calling a non-function"); + } + // check arguments count and their types + if (v->get_num_args() != static_cast(f_callable->params_types.size())) { + v->error("expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); + } + for (int i = 0; i < v->get_num_args(); ++i) { + if (!f_callable->params_types[i]->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { + v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(f_callable->params_types[i])); + } + } + v->mutate()->assign_fun_ref(nullptr); // no fun_ref to a global function + assign_inferred_type(v, f_callable->return_type); + return; + } + + // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) + // if it's a generic function `f`, we need to instantiate it, like `f` + // same for generic methods `t.tupleAt`, need to achieve `t.tupleAt` + + if (fun_ref->is_generic_function() && v_instantiationTs) { + // if Ts are specified by a user like `f(args)` / `t.tupleAt()`, take them + std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); + + } else if (fun_ref->is_generic_function()) { + // if `f` called like `f(args)`, deduce T from arg types + std::vector arg_types; + arg_types.reserve(delta_self + v->get_num_args()); + if (dot_obj) { + arg_types.push_back(dot_obj->inferred_type); + } + for (int i = 0; i < v->get_num_args(); ++i) { + arg_types.push_back(v->get_arg(i)->inferred_type); + } + + td::Result> deduced = deduce_substitutionTs_on_generic_func_call(fun_ref, std::move(arg_types), hint); + if (deduced.is_error()) { + v->error(deduced.error().message().str() + " for generic function " + to_string(fun_ref)); + } + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, deduced.move_as_ok()); + + } else if (UNLIKELY(v_instantiationTs != nullptr)) { + // non-generic function/method called with type arguments, like `c.cellHash()` / `beginCell()` + v_instantiationTs->error("calling a not generic function with generic T"); + } + + v->mutate()->assign_fun_ref(fun_ref); + // since for `t.tupleAt()`, infer_dot_access() not called for callee = "t.tupleAt", assign its target here + if (v->is_dot_call()) { + v->get_callee()->as()->mutate()->assign_target(fun_ref); + v->get_callee()->as()->mutate()->assign_inferred_type(fun_ref->inferred_full_type); + } + // check arguments count and their types + check_function_arguments(fun_ref, v->get_arg_list(), dot_obj); + // get return type either from user-specified declaration or infer here on demand traversing its body + get_or_infer_return_type(fun_ref); + TypePtr inferred_type = dot_obj && fun_ref->does_return_self() ? dot_obj->inferred_type : fun_ref->inferred_return_type; + assign_inferred_type(v, inferred_type); + assign_inferred_type(callee, fun_ref->inferred_full_type); + // note, that mutate params don't affect typing, they are handled when converting to IR + } + + void infer_tensor(V v, TypePtr hint) { + const TypeDataTensor* tensor_hint = hint ? hint->try_as() : nullptr; + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (int i = 0; i < v->size(); ++i) { + AnyExprV item = v->get_item(i); + infer_any_expr(item, tensor_hint && i < tensor_hint->size() ? tensor_hint->items[i] : nullptr); + types_list.emplace_back(item->inferred_type); + } + assign_inferred_type(v, TypeDataTensor::create(std::move(types_list))); + } + + void infer_typed_tuple(V v, TypePtr hint) { + const TypeDataTypedTuple* tuple_hint = hint ? hint->try_as() : nullptr; + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (int i = 0; i < v->size(); ++i) { + AnyExprV item = v->get_item(i); + infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); + types_list.emplace_back(item->inferred_type); + } + assign_inferred_type(v, TypeDataTypedTuple::create(std::move(types_list))); + } + + static void infer_null_keyword(V v) { + assign_inferred_type(v, TypeDataNullLiteral::create()); + } + + static void infer_underscore(V v, TypePtr hint) { + // if execution is here, underscore is either used as lhs of assignment, or incorrectly, like `f(_)` + // more precise is to always set unknown here, but for incorrect usages, instead of an error + // "can not pass unknown to X" would better be an error it can't be used as a value, at later steps + assign_inferred_type(v, hint ? hint : TypeDataUnknown::create()); + } + + static void infer_empty_expression(V v) { + assign_inferred_type(v, TypeDataUnknown::create()); + } + + void process_sequence(V v) { + for (AnyV item : v->get_items()) { + process_any_statement(item); + } + } + + static bool is_expr_valid_as_return_self(AnyExprV return_expr) { + // `return self` + if (return_expr->type == ast_reference && return_expr->as()->get_name() == "self") { + return true; + } + // `return self.someMethod()` + if (auto v_call = return_expr->try_as(); v_call && v_call->is_dot_call()) { + return v_call->fun_maybe && v_call->fun_maybe->does_return_self() && is_expr_valid_as_return_self(v_call->get_dot_obj()); + } + // `return cond ? ... : ...` + if (auto v_ternary = return_expr->try_as()) { + return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); + } + return false; + } + + void process_return_statement(V v) { + if (v->has_return_value()) { + infer_any_expr(v->get_return_value(), current_function->declared_return_type); + } else { + assign_inferred_type(v->get_return_value(), TypeDataVoid::create()); + } + if (current_function->does_return_self()) { + return_unifier.unify_with(current_function->parameters[0].declared_type); + if (!is_expr_valid_as_return_self(v->get_return_value())) { + v->error("invalid return from `self` function"); + } + return; + } + + TypePtr expr_type = v->get_return_value()->inferred_type; + if (current_function->declared_return_type) { + if (!current_function->declared_return_type->can_rhs_be_assigned(expr_type)) { + v->get_return_value()->error("can not convert type " + to_string(expr_type) + " to return type " + to_string(current_function->declared_return_type)); + } + } else { + if (!return_unifier.unify_with(expr_type)) { + v->get_return_value()->error("can not unify type " + to_string(expr_type) + " with previous return type " + to_string(return_unifier.get_result())); + } + } + } + + void process_if_statement(V v) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `if` must be an integer, got " + to_string(v->get_cond())); + } + process_any_statement(v->get_if_body()); + process_any_statement(v->get_else_body()); + } + + void process_repeat_statement(V v) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `repeat` must be an integer, got " + to_string(v->get_cond())); + } + process_any_statement(v->get_body()); + } + + void process_while_statement(V v) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `while` must be an integer, got " + to_string(v->get_cond())); + } + process_any_statement(v->get_body()); + } + + void process_do_while_statement(V v) { + process_any_statement(v->get_body()); + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `while` must be an integer, got " + to_string(v->get_cond())); + } + } + + void process_throw_statement(V v) { + infer_any_expr(v->get_thrown_code()); + if (!expect_integer(v->get_thrown_code())) { + v->get_thrown_code()->error("excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); + } + infer_any_expr(v->get_thrown_arg()); + if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->calc_width_on_stack() != 1) { + v->get_thrown_arg()->error("can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); + } + } + + void process_assert_statement(V v) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `assert` must be an integer, got " + to_string(v->get_cond())); + } + infer_any_expr(v->get_thrown_code()); + if (!expect_integer(v->get_thrown_code())) { + v->get_cond()->error("thrown excNo of `assert` must be an integer, got " + to_string(v->get_cond())); + } + } + + static void process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type) { + if (auto v_ref = catch_var->try_as(); v_ref && v_ref->sym) { // not underscore + assign_inferred_type(v_ref->sym->as(), catch_var_type); + } + assign_inferred_type(catch_var, catch_var_type); + } + + void process_try_catch_statement(V v) { + process_any_statement(v->get_try_body()); + + // `catch` has exactly 2 variables: excNo and arg (when missing, they are implicit underscores) + // `arg` is a curious thing, it can be any TVM primitive, so assign unknown to it + // hence, using `fInt(arg)` (int from parameter is a hint) or `arg as slice` works well + // it's not truly correct, because `arg as (int,int)` also compiles, but can never happen, but let it be user responsibility + tolk_assert(v->get_catch_expr()->size() == 2); + std::vector types_list = {TypeDataInt::create(), TypeDataUnknown::create()}; + process_catch_variable(v->get_catch_expr()->get_item(0), types_list[0]); + process_catch_variable(v->get_catch_expr()->get_item(1), types_list[1]); + assign_inferred_type(v->get_catch_expr(), TypeDataTensor::create(std::move(types_list))); + + process_any_statement(v->get_catch_body()); + } + +public: + static void assign_fun_full_type(const FunctionData* fun_ref, TypePtr inferred_return_type) { + // calculate function full type `fun(params) -> ret_type` + std::vector params_types; + params_types.reserve(fun_ref->get_num_params()); + for (const LocalVarData& param : fun_ref->parameters) { + params_types.push_back(param.declared_type); + } + assign_inferred_type(fun_ref, inferred_return_type, TypeDataFunCallable::create(std::move(params_types), inferred_return_type)); + } + + void start_visiting_function(const FunctionData* fun_ref, V v_function) { + if (fun_ref->is_code_function()) { + current_function = fun_ref; + process_any_statement(v_function->get_body()); + current_function = nullptr; + + if (fun_ref->is_implicit_return()) { + bool is_ok_with_void = fun_ref->declared_return_type + ? fun_ref->declared_return_type->can_rhs_be_assigned(TypeDataVoid::create()) + : return_unifier.unify_with_implicit_return_void(); + if (!is_ok_with_void || fun_ref->does_return_self()) { + throw ParseError(v_function->get_body()->as()->loc_end, "missing return"); + } + } + } else { + // asm functions should be strictly typed, this was checked earlier + tolk_assert(fun_ref->declared_return_type); + } + + TypePtr inferred_return_type = fun_ref->declared_return_type ? fun_ref->declared_return_type : return_unifier.get_result(); + assign_fun_full_type(fun_ref, inferred_return_type); + fun_ref->mutate()->assign_is_type_inferring_done(); + } +}; + +class LaunchInferTypesAndMethodsOnce final { +public: + static bool should_visit_function(const FunctionData* fun_ref) { + // since inferring can be requested on demand, prevent second execution from a regular pipeline launcher + return !fun_ref->is_type_inferring_done() && !fun_ref->is_generic_function(); + } + + static void start_visiting_function(const FunctionData* fun_ref, V v_function) { + InferCheckTypesAndCallsAndFieldsVisitor visitor; + visitor.start_visiting_function(fun_ref, v_function); + } +}; + +// infer return type "on demand" +// example: `fun f() { return g(); } fun g() { ... }` +// when analyzing `f()`, we need to infer what fun_ref=g returns +// (if `g` is generic, it was already instantiated, so fun_ref=g is here) +static void infer_and_save_return_type_of_function(const FunctionData* fun_ref) { + static std::vector called_stack; + + tolk_assert(!fun_ref->is_generic_function() && !fun_ref->is_type_inferring_done()); + // if `g` has return type declared, like `fun g(): int { ... }`, don't traverse its body + if (fun_ref->declared_return_type) { + InferCheckTypesAndCallsAndFieldsVisitor::assign_fun_full_type(fun_ref, fun_ref->declared_return_type); + return; + } + + // prevent recursion of untyped functions, like `fun f() { return g(); } fun g() { return f(); }` + bool contains = std::find(called_stack.begin(), called_stack.end(), fun_ref) != called_stack.end(); + if (contains) { + fun_ref->ast_root->error("could not infer return type of " + to_string(fun_ref) + ", because it appears in a recursive call chain; specify `: ` manually"); + } + + // dig into g's body; it's safe, since the compiler is single-threaded + // on finish, fun_ref->inferred_return_type is filled, and won't be called anymore + called_stack.push_back(fun_ref); + InferCheckTypesAndCallsAndFieldsVisitor visitor; + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + called_stack.pop_back(); +} + +void pipeline_infer_types_and_calls_and_fields() { + visit_ast_of_all_functions(); +} + +void pipeline_infer_types_and_calls_and_fields(const FunctionData* fun_ref) { + InferCheckTypesAndCallsAndFieldsVisitor visitor; + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); +} + +} // namespace tolk diff --git a/tolk/pipe-refine-lvalue-for-mutate.cpp b/tolk/pipe-refine-lvalue-for-mutate.cpp index c4c31b51..45dd3a94 100644 --- a/tolk/pipe-refine-lvalue-for-mutate.cpp +++ b/tolk/pipe-refine-lvalue-for-mutate.cpp @@ -34,8 +34,8 @@ namespace tolk { GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_invalid_mutate_arg_passed(AnyV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { - std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as()->name : "obj"); +static void fire_error_invalid_mutate_arg_passed(AnyExprV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { + std::string arg_str(arg_expr->type == ast_reference ? arg_expr->as()->get_name() : "obj"); // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && p_sym.idx == 0 && fun_ref->does_accept_self()) { @@ -59,7 +59,7 @@ static void fire_error_invalid_mutate_arg_passed(AnyV v, const FunctionData* fun class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` const FunctionData* fun_ref = v->fun_maybe; if (!fun_ref) { parent::visit(v); @@ -72,47 +72,55 @@ class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBod return; } - tolk_assert(static_cast(fun_ref->parameters.size()) == v->get_num_args()); + int delta_self = v->is_dot_call(); + tolk_assert(fun_ref->get_num_params() == delta_self + v->get_num_args()); + + if (v->is_dot_call()) { + if (fun_ref->does_mutate_self()) { + // for `b.storeInt()`, `b` should become lvalue, since `storeInt` is a method mutating self + // but: `beginCell().storeInt()`, then `beginCell()` is not lvalue + // (it will be extracted as tmp var when transforming AST to IR) + AnyExprV leftmost_obj = v->get_dot_obj(); + while (true) { + if (auto as_par = leftmost_obj->try_as()) { + leftmost_obj = as_par->get_expr(); + } else if (auto as_cast = leftmost_obj->try_as()) { + leftmost_obj = as_cast->get_expr(); + } else { + break; + } + } + bool will_be_extracted_as_tmp_var = leftmost_obj->type == ast_function_call; + if (!will_be_extracted_as_tmp_var) { + leftmost_obj->mutate()->assign_lvalue_true(); + v->get_dot_obj()->mutate()->assign_lvalue_true(); + } + } + + if (!fun_ref->does_accept_self() && fun_ref->parameters[0].is_mutate_parameter()) { + fire_error_invalid_mutate_arg_passed(v, fun_ref, fun_ref->parameters[0], true, false, v->get_dot_obj()); + } + } for (int i = 0; i < v->get_num_args(); ++i) { - const LocalVarData& p_sym = fun_ref->parameters[i]; + const LocalVarData& p_sym = fun_ref->parameters[delta_self + i]; auto arg_i = v->get_arg(i); if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) { fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr()); } parent::visit(arg_i); } + parent::visit(v->get_callee()); } - void visit(V v) override { - parent::visit(v); - - const FunctionData* fun_ref = v->fun_ref; - tolk_assert(static_cast(fun_ref->parameters.size()) == 1 + v->get_num_args()); - - if (fun_ref->does_mutate_self()) { - bool will_be_extracted_as_tmp_var = v->get_obj()->type == ast_function_call || v->get_obj()->type == ast_dot_method_call; - if (!will_be_extracted_as_tmp_var) { - v->get_obj()->mutate()->assign_lvalue_true(); - } - } - - if (!fun_ref->does_accept_self() && fun_ref->parameters[0].is_mutate_parameter()) { - fire_error_invalid_mutate_arg_passed(v, fun_ref, fun_ref->parameters[0], true, false, v->get_obj()); - } - - for (int i = 0; i < v->get_num_args(); ++i) { - const LocalVarData& p_sym = fun_ref->parameters[1 + i]; - auto arg_i = v->get_arg(i); - if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) { - fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr()); - } - } +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; -void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_refine_lvalue_for_mutate_arguments() { + visit_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index 478bc727..2dae0d23 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -20,7 +20,9 @@ #include "ast.h" #include "compiler-state.h" #include "constant-evaluator.h" +#include "generics-helpers.h" #include "td/utils/crypto.h" +#include "type-system.h" #include /* @@ -59,67 +61,69 @@ static int calculate_method_id_by_func_name(std::string_view func_name) { return static_cast(crc & 0xffff) | 0x10000; } -static void calc_arg_ret_order_of_asm_function(V v_body, V param_list, TypeExpr* ret_type, - std::vector& arg_order, std::vector& ret_order) { - int cnt = param_list->size(); - int width = ret_type->get_width(); - if (width < 0 || width > 16) { - v_body->error("return type of an assembler built-in function must have a well-defined fixed width"); +static void validate_arg_ret_order_of_asm_function(V v_body, int n_params, TypePtr ret_type) { + if (!ret_type) { + v_body->error("asm function must declare return type (before asm instructions)"); } - if (cnt > 16) { - v_body->error("assembler built-in function must have at most 16 arguments"); - } - std::vector cum_arg_width; - cum_arg_width.push_back(0); - int tot_width = 0; - for (int i = 0; i < cnt; ++i) { - V v_param = param_list->get_param(i); - int arg_width = v_param->declared_type->get_width(); - if (arg_width < 0 || arg_width > 16) { - v_param->error("parameters of an assembler built-in function must have a well-defined fixed width"); - } - cum_arg_width.push_back(tot_width += arg_width); + if (n_params > 16) { + v_body->error("asm function can have at most 16 parameters"); } + + // asm(param1 ... paramN), param names were previously mapped into indices if (!v_body->arg_order.empty()) { - if (static_cast(v_body->arg_order.size()) != cnt) { + if (static_cast(v_body->arg_order.size()) != n_params) { v_body->error("arg_order of asm function must specify all parameters"); } - std::vector visited(cnt, false); - for (int i = 0; i < cnt; ++i) { - int j = v_body->arg_order[i]; + std::vector visited(v_body->arg_order.size(), false); + for (int j : v_body->arg_order) { if (visited[j]) { v_body->error("arg_order of asm function contains duplicates"); } visited[j] = true; - int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; - while (c1 < c2) { - arg_order.push_back(c1++); - } } - tolk_assert(arg_order.size() == (unsigned)tot_width); } + + // asm(-> 0 2 1 3), check for a shuffled range 0...N + // correctness of N (actual return width onto a stack) will be checked after type inferring and generics instantiation if (!v_body->ret_order.empty()) { - if (static_cast(v_body->ret_order.size()) != width) { - v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width)); - } - std::vector visited(width, false); - for (int i = 0; i < width; ++i) { - int j = v_body->ret_order[i]; - if (j < 0 || j >= width || visited[j]) { - v_body->error("ret_order contains invalid integer, not in range 0 .. width-1"); + std::vector visited(v_body->ret_order.size(), false); + for (int j : v_body->ret_order) { + if (j < 0 || j >= static_cast(v_body->ret_order.size()) || visited[j]) { + v_body->error("ret_order contains invalid integer, not in range 0 .. N"); } visited[j] = true; } - ret_order = v_body->ret_order; } } +static const GenericsDeclaration* construct_genericTs(V v_list) { + std::vector itemsT; + itemsT.reserve(v_list->size()); + + for (int i = 0; i < v_list->size(); ++i) { + auto v_item = v_list->get_item(i); + auto it_existing = std::find_if(itemsT.begin(), itemsT.end(), [v_item](const GenericsDeclaration::GenericsItem& prev) { + return prev.nameT == v_item->nameT; + }); + if (it_existing != itemsT.end()) { + v_item->error("duplicate generic parameter `" + static_cast(v_item->nameT) + "`"); + } + itemsT.emplace_back(v_item->nameT); + } + + return new GenericsDeclaration(std::move(itemsT)); +} + static void register_constant(V v) { ConstantValue init_value = eval_const_init_value(v->get_init_value()); - GlobalConstData* c_sym = new GlobalConstData(static_cast(v->get_identifier()->name), v->loc, std::move(init_value)); + GlobalConstData* c_sym = new GlobalConstData(static_cast(v->get_identifier()->name), v->loc, v->declared_type, std::move(init_value)); - if (v->declared_type && !v->declared_type->equals_to(c_sym->inferred_type)) { - v->error("expression type does not match declared type"); + if (v->declared_type) { + bool ok = (c_sym->is_int_const() && (v->declared_type == TypeDataInt::create())) + || (c_sym->is_slice_const() && (v->declared_type == TypeDataSlice::create())); + if (!ok) { + v->error("expression type does not match declared type"); + } } G.symtable.add_global_const(c_sym); @@ -137,124 +141,82 @@ static void register_global_var(V v) { static LocalVarData register_parameter(V v, int idx) { if (v->is_underscore()) { - return {"", v->loc, idx, v->declared_type}; + return {"", v->loc, v->declared_type, 0, idx}; } - LocalVarData p_sym(static_cast(v->param_name), v->loc, idx, v->declared_type); + int flags = 0; if (v->declared_as_mutate) { - p_sym.flags |= LocalVarData::flagMutateParameter; + flags |= LocalVarData::flagMutateParameter; } if (!v->declared_as_mutate && idx == 0 && v->param_name == "self") { - p_sym.flags |= LocalVarData::flagImmutable; + flags |= LocalVarData::flagImmutable; } - return p_sym; + return LocalVarData(static_cast(v->param_name), v->loc, v->declared_type, flags, idx); } static void register_function(V v) { std::string_view func_name = v->get_identifier()->name; - // calculate TypeExpr of a function: it's a map (params -> ret), probably surrounded by forall - TypeExpr* params_tensor_type = nullptr; + // calculate TypeData of a function + std::vector arg_types; + std::vector parameters; int n_params = v->get_num_params(); int n_mutate_params = 0; - std::vector parameters; - if (n_params) { - std::vector param_tensor_items; - param_tensor_items.reserve(n_params); - parameters.reserve(n_params); - for (int i = 0; i < n_params; ++i) { - auto v_param = v->get_param(i); - n_mutate_params += static_cast(v_param->declared_as_mutate); - param_tensor_items.emplace_back(v_param->declared_type); - parameters.emplace_back(register_parameter(v_param, i)); - } - params_tensor_type = TypeExpr::new_tensor(std::move(param_tensor_items)); - } else { - params_tensor_type = TypeExpr::new_unit(); + arg_types.reserve(n_params); + parameters.reserve(n_params); + for (int i = 0; i < n_params; ++i) { + auto v_param = v->get_param(i); + arg_types.emplace_back(v_param->declared_type); + parameters.emplace_back(register_parameter(v_param, i)); + n_mutate_params += static_cast(v_param->declared_as_mutate); } - TypeExpr* function_type = TypeExpr::new_map(params_tensor_type, v->ret_type); + const GenericsDeclaration* genericTs = nullptr; if (v->genericsT_list) { - std::vector type_vars; - type_vars.reserve(v->genericsT_list->size()); - for (int idx = 0; idx < v->genericsT_list->size(); ++idx) { - type_vars.emplace_back(v->genericsT_list->get_item(idx)->created_type); - } - function_type = TypeExpr::new_forall(std::move(type_vars), function_type); + genericTs = construct_genericTs(v->genericsT_list); } - if (v->marked_as_builtin) { + if (v->is_builtin_function()) { const Symbol* builtin_func = lookup_global_symbol(func_name); - const FunctionData* func_val = builtin_func ? builtin_func->as() : nullptr; - if (!func_val || !func_val->is_builtin_function()) { + const FunctionData* fun_ref = builtin_func ? builtin_func->as() : nullptr; + if (!fun_ref || !fun_ref->is_builtin_function()) { v->error("`builtin` used for non-builtin function"); } -#ifdef TOLK_DEBUG - // in release, we don't need this check, since `builtin` is used only in stdlib, which is our responsibility - if (!func_val->full_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { - v->error("declaration for `builtin` function doesn't match an actual one"); - } -#endif + v->mutate()->assign_fun_ref(fun_ref); return; } - if (G.is_verbosity(1)) { - std::cerr << "fun " << func_name << " : " << function_type << std::endl; - } - if (v->marked_as_pure && v->ret_type->get_width() == 0) { - v->error("a pure function should return something, otherwise it will be optimized out anyway"); + if (G.is_verbosity(1) && v->is_code_function()) { + std::cerr << "fun " << func_name << " : " << v->declared_return_type << std::endl; } FunctionBody f_body = v->get_body()->type == ast_sequence ? static_cast(new FunctionBodyCode) : static_cast(new FunctionBodyAsm); - FunctionData* f_sym = new FunctionData(static_cast(func_name), v->loc, function_type, std::move(parameters), 0, f_body); + FunctionData* f_sym = new FunctionData(static_cast(func_name), v->loc, v->declared_return_type, std::move(parameters), 0, genericTs, nullptr, f_body, v); if (const auto* v_asm = v->get_body()->try_as()) { - calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, f_sym->arg_order, f_sym->ret_order); + validate_arg_ret_order_of_asm_function(v_asm, v->get_num_params(), v->declared_return_type); + f_sym->arg_order = v_asm->arg_order; + f_sym->ret_order = v_asm->ret_order; } - if (v->method_id) { - if (v->method_id->intval.is_null() || !v->method_id->intval->signed_fits_bits(32)) { - v->method_id->error("invalid integer constant"); - } - f_sym->method_id = static_cast(v->method_id->intval->to_long()); - } else if (v->marked_as_get_method) { + if (v->method_id.not_null()) { + f_sym->method_id = static_cast(v->method_id->to_long()); + } else if (v->flags & FunctionData::flagGetMethod) { f_sym->method_id = calculate_method_id_by_func_name(func_name); for (const FunctionData* other : G.all_get_methods) { if (other->method_id == f_sym->method_id) { v->error(PSTRING() << "GET methods hash collision: `" << other->name << "` and `" << f_sym->name << "` produce the same hash. Consider renaming one of these functions."); } } - } else if (v->is_entrypoint) { + } else if (v->flags & FunctionData::flagIsEntrypoint) { f_sym->method_id = calculate_method_id_for_entrypoint(func_name); } - if (v->marked_as_pure) { - f_sym->flags |= FunctionData::flagMarkedAsPure; - } - if (v->marked_as_inline) { - f_sym->flags |= FunctionData::flagInline; - } - if (v->marked_as_inline_ref) { - f_sym->flags |= FunctionData::flagInlineRef; - } - if (v->marked_as_get_method) { - f_sym->flags |= FunctionData::flagGetMethod; - } - if (v->is_entrypoint) { - f_sym->flags |= FunctionData::flagIsEntrypoint; - } + f_sym->flags |= v->flags; if (n_mutate_params) { f_sym->flags |= FunctionData::flagHasMutateParams; } - if (v->accepts_self) { - f_sym->flags |= FunctionData::flagAcceptsSelf; - } - if (v->returns_self) { - f_sym->flags |= FunctionData::flagReturnsSelf; - } G.symtable.add_function(f_sym); - if (f_sym->is_regular_function()) { - G.all_code_functions.push_back(f_sym); - } + G.all_functions.push_back(f_sym); if (f_sym->is_get_method()) { G.all_get_methods.push_back(f_sym); } @@ -270,10 +232,10 @@ static void iterate_through_file_symbols(const SrcFile* file) { for (AnyV v : file->ast->as()->get_toplevel_declarations()) { switch (v->type) { - case ast_import_statement: + case ast_import_directive: // on `import "another-file.tolk"`, register symbols from that file at first // (for instance, it can calculate constants, which are used in init_val of constants in current file below import) - iterate_through_file_symbols(v->as()->file); + iterate_through_file_symbols(v->as()->file); break; case ast_constant_declaration: @@ -291,8 +253,8 @@ static void iterate_through_file_symbols(const SrcFile* file) { } } -void pipeline_register_global_symbols(const AllSrcFiles& all_src_files) { - for (const SrcFile* file : all_src_files) { +void pipeline_register_global_symbols() { + for (const SrcFile* file : G.all_src_files) { iterate_through_file_symbols(file); } } diff --git a/tolk/pipe-resolve-identifiers.cpp b/tolk/pipe-resolve-identifiers.cpp new file mode 100644 index 00000000..299f33be --- /dev/null +++ b/tolk/pipe-resolve-identifiers.cpp @@ -0,0 +1,350 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "platform-utils.h" +#include "compiler-state.h" +#include "src-file.h" +#include "generics-helpers.h" +#include "ast.h" +#include "ast-visitor.h" +#include "type-system.h" +#include + +/* + * This pipe resolves identifiers (local variables and types) in all functions bodies. + * It happens before type inferring, but after all global symbols are registered. + * It means, that for any symbol `x` we can look up whether it's a global name or not. + * + * About resolving variables. + * Example: `var x = 10; x = 20;` both `x` point to one LocalVarData. + * Example: `x = 20` undefined symbol `x` is also here (unless it's a global) + * Variables scoping and redeclaration are also here. + * Note, that `x` is stored as `ast_reference (ast_identifier "x")`. More formally, "references" are resolved. + * "Reference" in AST, besides the identifier, stores optional generics instantiation. `x` is grammar-valid. + * + * About resolving types. At the moment of parsing, `int`, `cell` and other predefined are parsed as TypeDataInt, etc. + * All the others are stored as TypeDataUnresolved, to be resolved here, after global symtable is filled. + * Example: `var x: T = 0` unresolved "T" is replaced by TypeDataGenericT inside `f`. + * Example: `f()` unresolved "MyAlias" is replaced by TypeDataAlias inside the reference. + * Example: `fun f(): KKK` unresolved "KKK" fires an error "unknown type name". + * When structures and type aliases are implemented, their resolving will also be done here. + * See finalize_type_data(). + * + * Note, that functions/methods binding is NOT here. + * In other words, for ast_function_call `beginCell()` and `t.tupleAt(0)`, their fun_ref is NOT filled here. + * Functions/methods binding is done later, simultaneously with type inferring and generics instantiation. + * For instance, to call a generic function `t.tuplePush(1)`, we need types of `t` and `1` to be inferred, + * as well as `tuplePush` to be instantiated, and fun_ref to point at that exact instantiations. + * + * As a result of this step, + * * every V::sym is filled, pointing either to a local var/parameter, or to a global symbol + * (exceptional for function calls and methods, their references are bound later) + * * all TypeData in all symbols is ready for analyzing, TypeDataUnresolved won't occur later in pipeline + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_undefined_symbol(V v) { + if (v->name == "self") { + v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + } else { + v->error("undefined symbol `" + static_cast(v->name) + "`"); + } +} + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_unknown_type_name(SrcLocation loc, const std::string &text) { + throw ParseError(loc, "unknown type name `" + text + "`"); +} + +static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { + SrcLocation sym_loc = used_sym->loc; + if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { + const SrcFile* declared_in = sym_loc.get_src_file(); + bool has_import = false; + for (const SrcFile::ImportDirective& import : v_usage->loc.get_src_file()->imports) { + if (import.imported_file == declared_in) { + has_import = true; + } + } + if (!has_import) { + v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + } + } +} + +struct NameAndScopeResolver { + std::vector> scopes; + + static uint64_t key_hash(std::string_view name_key) { + return std::hash{}(name_key); + } + + void open_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "open_scope " << scopes.size() + 1 << " at " << loc << std::endl; + scopes.emplace_back(); + } + + void close_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "close_scope " << scopes.size() << " at " << loc << std::endl; + if (UNLIKELY(scopes.empty())) { + throw Fatal{"cannot close the outer scope"}; + } + scopes.pop_back(); + } + + const Symbol* lookup_symbol(std::string_view name) const { + uint64_t key = key_hash(name); + for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) { // NOLINT(*-loop-convert) + const auto& scope = *it; + if (auto it_sym = scope.find(key); it_sym != scope.end()) { + return it_sym->second; + } + } + return G.symtable.lookup(name); + } + + void add_local_var(const LocalVarData* v_sym) { + if (UNLIKELY(scopes.empty())) { + throw Fatal("unexpected scope_level = 0"); + } + if (v_sym->name.empty()) { // underscore + return; + } + + uint64_t key = key_hash(v_sym->name); + const auto& [_, inserted] = scopes.rbegin()->emplace(key, v_sym); + if (UNLIKELY(!inserted)) { + throw ParseError(v_sym->loc, "redeclaration of local variable `" + v_sym->name + "`"); + } + } +}; + +struct TypeDataResolver { + GNU_ATTRIBUTE_NOINLINE + static TypePtr resolve_identifiers_in_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { + return type_data->replace_children_custom([genericTs](TypePtr child) { + if (const TypeDataUnresolved* un = child->try_as()) { + if (genericTs && genericTs->has_nameT(un->text)) { + std::string nameT = un->text; + return TypeDataGenericT::create(std::move(nameT)); + } + if (un->text == "auto") { + throw ParseError(un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed"); + } + if (un->text == "self") { + throw ParseError(un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); + } + if (un->text == "bool") { + throw ParseError(un->loc, "bool type is not supported yet"); + } + fire_error_unknown_type_name(un->loc, un->text); + } + return child; + }); + } +}; + +static TypePtr finalize_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { + if (!type_data || !type_data->has_unresolved_inside()) { + return type_data; + } + return TypeDataResolver::resolve_identifiers_in_type_data(type_data, genericTs); +} + + +class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { + // more correctly this field shouldn't be static, but currently there is no need to make it a part of state + static NameAndScopeResolver current_scope; + static const FunctionData* current_function; + + static const LocalVarData* create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) { + LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, declared_type, immutable * LocalVarData::flagImmutable, -1); + current_scope.add_local_var(v_sym); + return v_sym; + } + + static void process_catch_variable(AnyExprV catch_var) { + if (auto v_ref = catch_var->try_as()) { + const LocalVarData* var_ref = create_local_var_sym(v_ref->get_name(), catch_var->loc, nullptr, true); + v_ref->mutate()->assign_sym(var_ref); + } + } + +protected: + void visit(V v) override { + if (v->marked_as_redef) { + const Symbol* sym = current_scope.lookup_symbol(v->get_name()); + if (sym == nullptr) { + v->error("`redef` for unknown variable"); + } + const LocalVarData* var_ref = sym->try_as(); + if (!var_ref) { + v->error("`redef` for unknown variable"); + } + v->mutate()->assign_var_ref(var_ref); + } else { + TypePtr declared_type = finalize_type_data(v->declared_type, current_function->genericTs); + const LocalVarData* var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable); + v->mutate()->assign_resolved_type(declared_type); + v->mutate()->assign_var_ref(var_ref); + } + } + + void visit(V v) override { + parent::visit(v->get_rhs()); // in this order, so that `var x = x` is invalid, "x" on the right unknown + parent::visit(v->get_lhs()); + } + + void visit(V v) override { + const Symbol* sym = current_scope.lookup_symbol(v->get_name()); + if (!sym) { + fire_error_undefined_symbol(v->get_identifier()); + } + v->mutate()->assign_sym(sym); + + // for global functions, global vars and constants, `import` must exist + if (!sym->try_as()) { + check_import_exists_when_using_sym(v, sym); + } + + // for `f` / `f`, resolve "MyAlias" and "T" + // (for function call `f()`, this v (ast_reference `f`) is callee) + if (auto v_instantiationTs = v->get_instantiationTs()) { + for (int i = 0; i < v_instantiationTs->size(); ++i) { + TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); + } + } + } + + void visit(V v) override { + // for `t.tupleAt` / `obj.method`, resolve "MyAlias" and "T" + // (for function call `t.tupleAt()`, this v (ast_dot_access `t.tupleAt`) is callee) + if (auto v_instantiationTs = v->get_instantiationTs()) { + for (int i = 0; i < v_instantiationTs->size(); ++i) { + TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); + } + } + parent::visit(v->get_obj()); + } + + void visit(V v) override { + TypePtr cast_to_type = finalize_type_data(v->cast_to_type, current_function->genericTs); + v->mutate()->assign_resolved_type(cast_to_type); + parent::visit(v->get_expr()); + } + + void visit(V v) override { + if (v->empty()) { + return; + } + current_scope.open_scope(v->loc); + parent::visit(v); + current_scope.close_scope(v->loc_end); + } + + void visit(V v) override { + current_scope.open_scope(v->loc); + parent::visit(v->get_body()); + parent::visit(v->get_cond()); // in 'while' condition it's ok to use variables declared inside do + current_scope.close_scope(v->get_body()->loc_end); + } + + void visit(V v) override { + visit(v->get_try_body()); + current_scope.open_scope(v->get_catch_body()->loc); + const std::vector& catch_items = v->get_catch_expr()->get_items(); + tolk_assert(catch_items.size() == 2); + process_catch_variable(catch_items[1]); + process_catch_variable(catch_items[0]); + parent::visit(v->get_catch_body()); + current_scope.close_scope(v->get_catch_body()->loc_end); + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + // this pipe is done just after parsing + // visit both asm and code functions, resolve identifiers in parameter/return types everywhere + // for generic functions, unresolved "T" will be replaced by TypeDataGenericT + return true; + } + + void start_visiting_function(const FunctionData* fun_ref, V v) override { + current_function = fun_ref; + + for (int i = 0; i < v->get_num_params(); ++i) { + const LocalVarData& param_var = fun_ref->parameters[i]; + TypePtr declared_type = finalize_type_data(param_var.declared_type, fun_ref->genericTs); + v->get_param(i)->mutate()->assign_param_ref(¶m_var); + v->get_param(i)->mutate()->assign_resolved_type(declared_type); + param_var.mutate()->assign_resolved_type(declared_type); + } + TypePtr return_type = finalize_type_data(fun_ref->declared_return_type, fun_ref->genericTs); + v->mutate()->assign_resolved_type(return_type); + fun_ref->mutate()->assign_resolved_type(return_type); + + if (fun_ref->is_code_function()) { + auto v_seq = v->get_body()->as(); + current_scope.open_scope(v->loc); + for (int i = 0; i < v->get_num_params(); ++i) { + current_scope.add_local_var(&fun_ref->parameters[i]); + } + parent::visit(v_seq); + current_scope.close_scope(v_seq->loc_end); + tolk_assert(current_scope.scopes.empty()); + } + + current_function = nullptr; + } +}; + +NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; +const FunctionData* AssignSymInsideFunctionVisitor::current_function = nullptr; + +void pipeline_resolve_identifiers_and_assign_symbols() { + AssignSymInsideFunctionVisitor visitor; + for (const SrcFile* file : G.all_src_files) { + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + tolk_assert(v_func->fun_ref); + visitor.start_visiting_function(v_func->fun_ref, v_func); + + } else if (auto v_global = v->try_as()) { + TypePtr declared_type = finalize_type_data(v_global->var_ref->declared_type, nullptr); + v_global->mutate()->assign_resolved_type(declared_type); + v_global->var_ref->mutate()->assign_resolved_type(declared_type); + + } else if (auto v_const = v->try_as(); v_const && v_const->declared_type) { + TypePtr declared_type = finalize_type_data(v_const->const_ref->declared_type, nullptr); + v_const->mutate()->assign_resolved_type(declared_type); + v_const->const_ref->mutate()->assign_resolved_type(declared_type); + } + } + } +} + +void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData* fun_ref) { + AssignSymInsideFunctionVisitor visitor; + if (visitor.should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + } +} + +} // namespace tolk diff --git a/tolk/pipe-resolve-symbols.cpp b/tolk/pipe-resolve-symbols.cpp deleted file mode 100644 index 31d25229..00000000 --- a/tolk/pipe-resolve-symbols.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" -#include "platform-utils.h" -#include "src-file.h" -#include "ast.h" -#include "ast-visitor.h" -#include "compiler-state.h" -#include - -/* - * This pipe resolves identifiers (local variables) in all functions bodies. - * It happens before type inferring, but after all global symbols are registered. - * It means, that for any symbol `x` we can look up whether it's a global name or not. - * - * Example: `var x = 10; x = 20;` both `x` point to one LocalVarData. - * Example: `x = 20` undefined symbol `x` is also here (unless it's a global) - * Variables scoping and redeclaration are also here. - * - * As a result of this step, every V::sym is filled, pointing either to a local var/parameter, - * or to a global var / constant / function. - */ - -namespace tolk { - -static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { - SrcLocation sym_loc = used_sym->loc; - if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { - const SrcFile* declared_in = sym_loc.get_src_file(); - bool has_import = false; - for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { - if (import_stmt.imported_file == declared_in) { - has_import = true; - } - } - if (!has_import) { - v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); - } - } -} - -struct NameAndScopeResolver { - std::vector> scopes; - - static uint64_t key_hash(std::string_view name_key) { - return std::hash{}(name_key); - } - - void open_scope([[maybe_unused]] SrcLocation loc) { - // std::cerr << "open_scope " << scopes.size() + 1 << " at " << loc << std::endl; - scopes.emplace_back(); - } - - void close_scope([[maybe_unused]] SrcLocation loc) { - // std::cerr << "close_scope " << scopes.size() << " at " << loc << std::endl; - if (UNLIKELY(scopes.empty())) { - throw Fatal{"cannot close the outer scope"}; - } - scopes.pop_back(); - } - - const Symbol* lookup_symbol(std::string_view name) const { - uint64_t key = key_hash(name); - for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) { // NOLINT(*-loop-convert) - const auto& scope = *it; - if (auto it_sym = scope.find(key); it_sym != scope.end()) { - return it_sym->second; - } - } - return G.symtable.lookup(name); - } - - const Symbol* add_local_var(const LocalVarData* v_sym) { - if (UNLIKELY(scopes.empty())) { - throw Fatal("unexpected scope_level = 0"); - } - if (v_sym->name.empty()) { // underscore - return v_sym; - } - - uint64_t key = key_hash(v_sym->name); - const auto& [_, inserted] = scopes.rbegin()->emplace(key, v_sym); - if (UNLIKELY(!inserted)) { - throw ParseError(v_sym->loc, "redeclaration of local variable `" + v_sym->name + "`"); - } - return v_sym; - } -}; - - -class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { - // more correctly this field shouldn't be static, but currently there is no need to make it a part of state - static NameAndScopeResolver current_scope; - - static const Symbol* create_local_var_sym(std::string_view name, SrcLocation loc, TypeExpr* var_type, bool immutable) { - LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, -1, var_type); - if (immutable) { - v_sym->flags |= LocalVarData::flagImmutable; - } - return current_scope.add_local_var(v_sym); - } - - static void process_catch_variable(AnyV catch_var) { - if (auto v_ident = catch_var->try_as()) { - const Symbol* sym = create_local_var_sym(v_ident->name, catch_var->loc, TypeExpr::new_hole(), true); - v_ident->mutate()->assign_sym(sym); - } - } - - static void process_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { - int delta_self = lhs_of_dot_call ? 1 : 0; - int n_arguments = static_cast(v->get_arguments().size()) + delta_self; - int n_parameters = static_cast(fun_ref->parameters.size()); - - // Tolk doesn't have optional parameters currently, so just compare counts - if (n_parameters < n_arguments) { - v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - if (n_arguments < n_parameters) { - v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - } - - void visit(V v) override { - if (v->marked_as_redef) { - auto v_ident = v->get_identifier()->as(); - const Symbol* sym = current_scope.lookup_symbol(v_ident->name); - if (sym == nullptr) { - v->error("`redef` for unknown variable"); - } - if (!sym->try_as() && !sym->try_as()) { - v->error("`redef` for unknown variable"); - } - v->mutate()->assign_var_ref(sym); - v_ident->mutate()->assign_sym(sym); - } else if (auto v_ident = v->get_identifier()->try_as()) { - TypeExpr* var_type = v->declared_type ? v->declared_type : TypeExpr::new_hole(); - const Symbol* sym = create_local_var_sym(v_ident->name, v->loc, var_type, v->is_immutable); - v->mutate()->assign_var_ref(sym); - v_ident->mutate()->assign_sym(sym); - } else { - // underscore, do nothing, v->sym remains nullptr - } - } - - void visit(V v) override { - parent::visit(v->get_assigned_val()); - parent::visit(v->get_lhs()); - } - - void visit(V v) override { - const Symbol* sym = current_scope.lookup_symbol(v->name); - if (!sym) { - v->error("undefined symbol `" + static_cast(v->name) + "`"); - } - v->mutate()->assign_sym(sym); - - // for global functions, global vars and constants, `import` must exist - if (!sym->try_as()) { - check_import_exists_when_using_sym(v, sym); - } - } - - void visit(V v) override { - parent::visit(v->get_called_f()); - parent::visit(v->get_arg_list()); - - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` - // for such corner cases, sym remains nullptr - if (auto v_ident = v->get_called_f()->try_as()) { - if (const auto* fun_ref = v_ident->sym->try_as()) { - v->mutate()->assign_fun_ref(fun_ref); - process_function_arguments(fun_ref, v->get_arg_list(), nullptr); - } - } - // for `some_var(args)`, if it's called with wrong arguments count, the error is not here - // it will be fired later, it's a type checking error - } - - void visit(V v) override { - const Symbol* sym = lookup_global_symbol(v->method_name); - if (!sym) { - v->error("undefined symbol `" + static_cast(v->method_name) + "`"); - } - const auto* fun_ref = sym->try_as(); - if (!fun_ref) { - v->error("`" + static_cast(v->method_name) + "` is not a method"); - } - - if (fun_ref->parameters.empty()) { - v->error("`" + static_cast(v->method_name) + "` has no parameters and can not be called as method"); - } - - v->mutate()->assign_fun_ref(fun_ref); - parent::visit(v); - process_function_arguments(fun_ref, v->get_arg_list(), v->get_obj()); - } - - void visit(V v) override { - const Symbol* sym = current_scope.lookup_symbol("self"); - if (!sym) { - v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); - } - v->mutate()->assign_param_ref(sym->as()); - } - - void visit(V v) override { - if (v->empty()) { - return; - } - current_scope.open_scope(v->loc); - parent::visit(v); - current_scope.close_scope(v->loc_end); - } - - void visit(V v) override { - current_scope.open_scope(v->loc); - parent::visit(v->get_body()); - parent::visit(v->get_cond()); // in 'while' condition it's ok to use variables declared inside do - current_scope.close_scope(v->get_body()->loc_end); - } - - void visit(V v) override { - visit(v->get_try_body()); - current_scope.open_scope(v->get_catch_body()->loc); - const std::vector& catch_items = v->get_catch_expr()->get_items(); - tolk_assert(catch_items.size() == 2); - process_catch_variable(catch_items[1]); - process_catch_variable(catch_items[0]); - parent::visit(v->get_catch_body()); - current_scope.close_scope(v->get_catch_body()->loc_end); - } - -public: - void start_visiting_function(V v_function) override { - auto v_seq = v_function->get_body()->try_as(); - tolk_assert(v_seq != nullptr); - - current_scope.open_scope(v_function->loc); - - for (int i = 0; i < v_function->get_num_params(); ++i) { - current_scope.add_local_var(&v_function->fun_ref->parameters[i]); - v_function->get_param(i)->mutate()->assign_param_ref(&v_function->fun_ref->parameters[i]); - } - parent::visit(v_seq); - - current_scope.close_scope(v_seq->loc_end); - tolk_assert(current_scope.scopes.empty()); - } -}; - -NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; - -void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); -} - -} // namespace tolk diff --git a/tolk/pipeline.h b/tolk/pipeline.h index 5c0fe656..afea66d6 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -25,25 +25,33 @@ */ #pragma once -#include "src-file.h" +#include "fwd-declarations.h" #include namespace tolk { -AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); +void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); -void pipeline_register_global_symbols(const AllSrcFiles&); -void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles&); -void pipeline_calculate_rvalue_lvalue(const AllSrcFiles&); -void pipeline_detect_unreachable_statements(const AllSrcFiles&); -void pipeline_infer_and_check_types(const AllSrcFiles&); -void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles&); -void pipeline_check_rvalue_lvalue(const AllSrcFiles&); -void pipeline_check_pure_impure_operations(const AllSrcFiles&); -void pipeline_constant_folding(const AllSrcFiles&); -void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&); +void pipeline_register_global_symbols(); +void pipeline_resolve_identifiers_and_assign_symbols(); +void pipeline_calculate_rvalue_lvalue(); +void pipeline_detect_unreachable_statements(); +void pipeline_infer_types_and_calls_and_fields(); +void pipeline_refine_lvalue_for_mutate_arguments(); +void pipeline_check_rvalue_lvalue(); +void pipeline_check_pure_impure_operations(); +void pipeline_constant_folding(); +void pipeline_convert_ast_to_legacy_Expr_Op(); void pipeline_find_unused_symbols(); -void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles&); +void pipeline_generate_fif_output_to_std_cout(); + +// these pipes also can be called per-function individually +// they are called for instantiated generics functions, when `f` is deeply cloned as `f` +void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData*); +void pipeline_calculate_rvalue_lvalue(const FunctionData*); +void pipeline_detect_unreachable_statements(const FunctionData*); +void pipeline_infer_types_and_calls_and_fields(const FunctionData*); + } // namespace tolk diff --git a/tolk/platform-utils.h b/tolk/platform-utils.h index 7b16226e..5ab01220 100644 --- a/tolk/platform-utils.h +++ b/tolk/platform-utils.h @@ -27,11 +27,15 @@ #if __GNUC__ #define GNU_ATTRIBUTE_COLD [[gnu::cold]] +#define GNU_ATTRIBUTE_FLATTEN [[gnu::flatten]] #define GNU_ATTRIBUTE_NORETURN [[gnu::noreturn]] +#define GNU_ATTRIBUTE_NOINLINE [[gnu::noinline]] #define GNU_ATTRIBUTE_ALWAYS_INLINE [[gnu::always_inline]] #else #define GNU_ATTRIBUTE_COLD +#define GNU_ATTRIBUTE_FLATTEN #define GNU_ATTRIBUTE_NORETURN [[noreturn]] +#define GNU_ATTRIBUTE_NOINLINE [[noinline]] #define GNU_ATTRIBUTE_ALWAYS_INLINE #endif diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index e5533f69..52ac3821 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -23,8 +23,8 @@ namespace tolk { static_assert(sizeof(SrcLocation) == 8); -SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { - for (SrcFile* file : all_src_files) { +const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { + for (const SrcFile* file : all_src_files) { if (file->file_id == file_id) { return file; } @@ -32,8 +32,8 @@ SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { return nullptr; } -SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { - for (SrcFile* file : all_src_files) { +const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { + for (const SrcFile* file : all_src_files) { if (file->abs_filename == abs_filename) { return file; } @@ -41,7 +41,7 @@ SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const return nullptr; } -SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) { +const SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) { td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); if (path.is_error()) { if (included_from.is_defined()) { @@ -51,7 +51,7 @@ SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::strin } std::string abs_filename = path.move_as_ok(); - if (SrcFile* file = find_file(abs_filename)) { + if (const SrcFile* file = find_file(abs_filename)) { return file; } @@ -75,16 +75,7 @@ SrcFile* AllRegisteredSrcFiles::get_next_unparsed_file() { if (last_parsed_file_id >= last_registered_file_id) { return nullptr; } - return all_src_files[++last_parsed_file_id]; -} - -AllSrcFiles AllRegisteredSrcFiles::get_all_files() const { - AllSrcFiles src_files_immutable; - src_files_immutable.reserve(all_src_files.size()); - for (const SrcFile* file : all_src_files) { - src_files_immutable.push_back(file); - } - return src_files_immutable; + return const_cast(all_src_files[++last_parsed_file_id]); } bool SrcFile::is_stdlib_file() const { diff --git a/tolk/src-file.h b/tolk/src-file.h index 9fbbfbb4..0c82bf18 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -30,7 +30,7 @@ struct SrcFile { std::string_view line_str; }; - struct ImportStatement { + struct ImportDirective { const SrcFile* imported_file; }; @@ -39,7 +39,7 @@ struct SrcFile { std::string abs_filename; // absolute from root std::string text; // file contents loaded into memory, every Token::str_val points inside it AnyV ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here - std::vector imports; // to check strictness (can't use a symbol without importing its file) + std::vector imports; // to check strictness (can't use a symbol without importing its file) SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text) : file_id(file_id) @@ -95,21 +95,20 @@ public: std::ostream& operator<<(std::ostream& os, SrcLocation loc); -using AllSrcFiles = std::vector; - class AllRegisteredSrcFiles { - std::vector all_src_files; + std::vector all_src_files; int last_registered_file_id = -1; int last_parsed_file_id = -1; public: - SrcFile *find_file(int file_id) const; - SrcFile* find_file(const std::string& abs_filename) const; + const SrcFile* find_file(int file_id) const; + const SrcFile* find_file(const std::string& abs_filename) const; - SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from); + const SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from); SrcFile* get_next_unparsed_file(); - AllSrcFiles get_all_files() const; + auto begin() const { return all_src_files.begin(); } + auto end() const { return all_src_files.end(); } }; struct Fatal final : std::exception { diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index dc715031..918fdab3 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -17,28 +17,48 @@ #include "symtable.h" #include "compiler-state.h" #include "platform-utils.h" -#include -#include +#include "generics-helpers.h" namespace tolk { +std::string FunctionData::as_human_readable() const { + if (!genericTs) { + return name; // if it's generic instantiation like `f`, its name is "f", not "f" + } + return name + genericTs->as_human_readable(); +} + bool FunctionData::does_need_codegen() const { // when a function is declared, but not referenced from code in any way, don't generate its body if (!is_really_used() && G.settings.remove_unused_functions) { return false; } + // functions with asm body don't need code generation + // (even if used as non-call: `var a = beginCell;` inserts TVM continuation inline) + if (is_asm_function() || is_builtin_function()) { + return false; + } // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist if (is_used_as_noncall()) { return true; } + // generic functions also don't need code generation, only generic instantiations do + if (is_generic_function()) { + return false; + } // currently, there is no inlining, all functions are codegenerated // (but actually, unused ones are later removed by Fift) // in the future, we may want to implement a true AST inlining for "simple" functions return true; } -void FunctionData::assign_is_really_used() { - this->flags |= flagReallyUsed; +void FunctionData::assign_resolved_type(TypePtr declared_return_type) { + this->declared_return_type = declared_return_type; +} + +void FunctionData::assign_inferred_type(TypePtr inferred_return_type, TypePtr inferred_full_type) { + this->inferred_return_type = inferred_return_type; + this->inferred_full_type = inferred_full_type; } void FunctionData::assign_is_used_as_noncall() { @@ -49,14 +69,45 @@ void FunctionData::assign_is_implicit_return() { this->flags |= flagImplicitReturn; } +void FunctionData::assign_is_type_inferring_done() { + this->flags |= flagTypeInferringDone; +} + +void FunctionData::assign_is_really_used() { + this->flags |= flagReallyUsed; +} + +void FunctionData::assign_arg_order(std::vector&& arg_order) { + this->arg_order = std::move(arg_order); +} + +void GlobalVarData::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + void GlobalVarData::assign_is_really_used() { this->flags |= flagReallyUsed; } +void GlobalConstData::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + void LocalVarData::assign_idx(int idx) { this->idx = idx; } +void LocalVarData::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void LocalVarData::assign_inferred_type(TypePtr inferred_type) { +#ifdef TOLK_DEBUG + assert(this->declared_type == nullptr); // called when type declaration omitted, inferred from assigned value +#endif + this->declared_type = inferred_type; +} + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD static void fire_error_redefinition_of_symbol(SrcLocation loc, const Symbol* previous) { SrcLocation prev_loc = previous->loc; diff --git a/tolk/symtable.h b/tolk/symtable.h index 53a5e52e..3cda24ed 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -17,7 +17,7 @@ #pragma once #include "src-file.h" -#include "type-expr.h" +#include "fwd-declarations.h" #include "constant-evaluator.h" #include "crypto/common/refint.h" #include @@ -57,27 +57,31 @@ struct LocalVarData final : Symbol { flagImmutable = 2, // variable was declared via `val` (not `var`) }; - TypeExpr* declared_type; - int flags = 0; + TypePtr declared_type; // either at declaration `var x:int`, or if omitted, from assigned value `var x=2` + int flags; int idx; - LocalVarData(std::string name, SrcLocation loc, int idx, TypeExpr* declared_type) + LocalVarData(std::string name, SrcLocation loc, TypePtr declared_type, int flags, int idx) : Symbol(std::move(name), loc) , declared_type(declared_type) + , flags(flags) , idx(idx) { } - bool is_underscore() const { return name.empty(); } bool is_immutable() const { return flags & flagImmutable; } bool is_mutate_parameter() const { return flags & flagMutateParameter; } LocalVarData* mutate() const { return const_cast(this); } void assign_idx(int idx); + void assign_resolved_type(TypePtr declared_type); + void assign_inferred_type(TypePtr inferred_type); }; struct FunctionBodyCode; struct FunctionBodyAsm; struct FunctionBodyBuiltin; +struct GenericsDeclaration; +struct GenericsInstantiation; typedef std::variant< FunctionBodyCode*, @@ -91,7 +95,7 @@ struct FunctionData final : Symbol { enum { flagInline = 1, // marked `@inline` flagInlineRef = 2, // marked `@inline_ref` - flagReallyUsed = 4, // calculated via dfs from used functions; declared but unused functions are not codegenerated + flagTypeInferringDone = 4, // type inferring step of function's body (all AST nodes assigning v->inferred_type) is done flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out flagImplicitReturn = 32, // control flow reaches end of function, so it needs implicit return at the end @@ -100,25 +104,36 @@ struct FunctionData final : Symbol { flagHasMutateParams = 256, // has parameters declared as `mutate` flagAcceptsSelf = 512, // is a member function (has `self` first parameter) flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable + flagReallyUsed = 2048, // calculated via dfs from used functions; declared but unused functions are not codegenerated }; int method_id = EMPTY_METHOD_ID; int flags; - TypeExpr* full_type; // currently, TypeExpr::_Map, probably wrapped with forall std::vector parameters; std::vector arg_order, ret_order; + TypePtr declared_return_type; // may be nullptr, meaning "auto infer" + TypePtr inferred_return_type = nullptr; // assigned on type inferring + TypePtr inferred_full_type = nullptr; // assigned on type inferring, it's TypeDataFunCallable(params -> return) + const GenericsDeclaration* genericTs; + const GenericsInstantiation* instantiationTs; FunctionBody body; + AnyV ast_root; // V for user-defined (not builtin) - FunctionData(std::string name, SrcLocation loc, TypeExpr* full_type, std::vector parameters, int initial_flags, FunctionBody body) + FunctionData(std::string name, SrcLocation loc, TypePtr declared_return_type, std::vector parameters, int initial_flags, const GenericsDeclaration* genericTs, const GenericsInstantiation* instantiationTs, FunctionBody body, AnyV ast_root) : Symbol(std::move(name), loc) , flags(initial_flags) - , full_type(full_type) , parameters(std::move(parameters)) - , body(body) { + , declared_return_type(declared_return_type) + , genericTs(genericTs) + , instantiationTs(instantiationTs) + , body(body) + , ast_root(ast_root) { } + std::string as_human_readable() const; + const std::vector* get_arg_order() const { return arg_order.empty() ? nullptr : &arg_order; } @@ -126,13 +141,19 @@ struct FunctionData final : Symbol { return ret_order.empty() ? nullptr : &ret_order; } - bool is_regular_function() const { return std::holds_alternative(body); } + int get_num_params() const { return static_cast(parameters.size()); } + const LocalVarData& get_param(int idx) const { return parameters[idx]; } + + bool is_code_function() const { return std::holds_alternative(body); } bool is_asm_function() const { return std::holds_alternative(body); } - bool is_builtin_function() const { return std::holds_alternative(body); } + bool is_builtin_function() const { return ast_root == nullptr; } + + bool is_generic_function() const { return genericTs != nullptr; } + bool is_instantiation_of_generic_function() const { return instantiationTs != nullptr; } bool is_inline() const { return flags & flagInline; } bool is_inline_ref() const { return flags & flagInlineRef; } - bool is_really_used() const { return flags & flagReallyUsed; } + bool is_type_inferring_done() const { return flags & flagTypeInferringDone; } bool is_used_as_noncall() const { return flags & flagUsedAsNonCall; } bool is_marked_as_pure() const { return flags & flagMarkedAsPure; } bool is_implicit_return() const { return flags & flagImplicitReturn; } @@ -143,13 +164,18 @@ struct FunctionData final : Symbol { bool does_accept_self() const { return flags & flagAcceptsSelf; } bool does_return_self() const { return flags & flagReturnsSelf; } bool does_mutate_self() const { return (flags & flagAcceptsSelf) && parameters[0].is_mutate_parameter(); } + bool is_really_used() const { return flags & flagReallyUsed; } bool does_need_codegen() const; FunctionData* mutate() const { return const_cast(this); } - void assign_is_really_used(); + void assign_resolved_type(TypePtr declared_return_type); + void assign_inferred_type(TypePtr inferred_return_type, TypePtr inferred_full_type); void assign_is_used_as_noncall(); void assign_is_implicit_return(); + void assign_is_type_inferring_done(); + void assign_is_really_used(); + void assign_arg_order(std::vector&& arg_order); }; struct GlobalVarData final : Symbol { @@ -157,10 +183,10 @@ struct GlobalVarData final : Symbol { flagReallyUsed = 1, // calculated via dfs from used functions; unused globals are not codegenerated }; - TypeExpr* declared_type; + TypePtr declared_type; // always exists, declaring globals without type is prohibited int flags = 0; - GlobalVarData(std::string name, SrcLocation loc, TypeExpr* declared_type) + GlobalVarData(std::string name, SrcLocation loc, TypePtr declared_type) : Symbol(std::move(name), loc) , declared_type(declared_type) { } @@ -168,17 +194,18 @@ struct GlobalVarData final : Symbol { bool is_really_used() const { return flags & flagReallyUsed; } GlobalVarData* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr declared_type); void assign_is_really_used(); }; struct GlobalConstData final : Symbol { ConstantValue value; - TypeExpr* inferred_type; + TypePtr declared_type; // may be nullptr - GlobalConstData(std::string name, SrcLocation loc, ConstantValue&& value) + GlobalConstData(std::string name, SrcLocation loc, TypePtr declared_type, ConstantValue&& value) : Symbol(std::move(name), loc) , value(std::move(value)) - , inferred_type(TypeExpr::new_atomic(this->value.is_int() ? TypeExpr::_Int : TypeExpr::_Slice)) { + , declared_type(declared_type) { } bool is_int_const() const { return value.is_int(); } @@ -186,6 +213,9 @@ struct GlobalConstData final : Symbol { td::RefInt256 as_int_const() const { return value.as_int(); } const std::string& as_slice_const() const { return value.as_slice(); } + + GlobalConstData* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr declared_type); }; class GlobalSymbolTable { diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index f2255ce3..d1159d3c 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -28,6 +28,7 @@ #include "compiler-state.h" #include "lexer.h" #include "ast.h" +#include "type-system.h" namespace tolk { @@ -45,27 +46,28 @@ void on_assertion_failed(const char *description, const char *file_name, int lin } int tolk_proceed(const std::string &entrypoint_filename) { + type_system_init(); define_builtins(); lexer_init(); // on any error, an exception is thrown, and the message is printed out below // (currently, only a single error can be printed) try { - AllSrcFiles all_files = pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); + pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); - pipeline_register_global_symbols(all_files); - pipeline_resolve_identifiers_and_assign_symbols(all_files); - pipeline_calculate_rvalue_lvalue(all_files); - pipeline_detect_unreachable_statements(all_files); - pipeline_infer_and_check_types(all_files); - pipeline_refine_lvalue_for_mutate_arguments(all_files); - pipeline_check_rvalue_lvalue(all_files); - pipeline_check_pure_impure_operations(all_files); - pipeline_constant_folding(all_files); - pipeline_convert_ast_to_legacy_Expr_Op(all_files); + pipeline_register_global_symbols(); + pipeline_resolve_identifiers_and_assign_symbols(); + pipeline_calculate_rvalue_lvalue(); + pipeline_detect_unreachable_statements(); + pipeline_infer_types_and_calls_and_fields(); + pipeline_refine_lvalue_for_mutate_arguments(); + pipeline_check_rvalue_lvalue(); + pipeline_check_pure_impure_operations(); + pipeline_constant_folding(); + pipeline_convert_ast_to_legacy_Expr_Op(); pipeline_find_unused_symbols(); - pipeline_generate_fif_output_to_std_cout(all_files); + pipeline_generate_fif_output_to_std_cout(); return 0; } catch (Fatal& fatal) { @@ -74,11 +76,6 @@ int tolk_proceed(const std::string &entrypoint_filename) { } catch (ParseError& error) { std::cerr << error << std::endl; return 2; - } catch (UnifyError& unif_err) { - std::cerr << "fatal: "; - unif_err.print_message(std::cerr); - std::cerr << std::endl; - return 2; } catch (UnexpectedASTNodeType& error) { std::cerr << "fatal: " << error.what() << std::endl; std::cerr << "It's a compiler bug, please report to developers" << std::endl; diff --git a/tolk/tolk.h b/tolk/tolk.h index 0408648f..5ec4d3e0 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -18,7 +18,6 @@ #include "platform-utils.h" #include "src-file.h" -#include "type-expr.h" #include "symtable.h" #include "crypto/common/refint.h" #include "td/utils/Status.h" @@ -35,30 +34,6 @@ namespace tolk { GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN void on_assertion_failed(const char *description, const char *file_name, int line_number); -/* - * - * TYPE EXPRESSIONS - * - */ - -struct UnifyError : std::exception { - TypeExpr* te1; - TypeExpr* te2; - std::string msg; - - UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(std::move(_msg)) { - } - - void print_message(std::ostream& os) const; - const char* what() const noexcept override { - return msg.c_str(); - } -}; - -std::ostream& operator<<(std::ostream& os, const UnifyError& ue); - -void unify(TypeExpr*& te1, TypeExpr*& te2); - /* * * ABSTRACT CODE @@ -69,15 +44,15 @@ typedef int var_idx_t; typedef int const_idx_t; struct TmpVar { - TypeExpr* v_type; + TypePtr v_type; var_idx_t idx; const LocalVarData* v_sym; // points to var defined in code; nullptr for implicitly created tmp vars int coord; SrcLocation where; std::vector> on_modification; - TmpVar(var_idx_t _idx, TypeExpr* _type, const LocalVarData* v_sym, SrcLocation loc) - : v_type(_type) + TmpVar(var_idx_t _idx, TypePtr type, const LocalVarData* v_sym, SrcLocation loc) + : v_type(type) , idx(_idx) , v_sym(v_sym) , coord(0) @@ -410,13 +385,13 @@ inline ListIterator end(const Op* op_list) { return ListIterator{}; } -typedef std::tuple FormalArg; +typedef std::tuple FormalArg; typedef std::vector FormalArgList; struct AsmOpList; struct FunctionBodyCode { - CodeBlob* code; + CodeBlob* code = nullptr; void set_code(CodeBlob* code); }; @@ -597,6 +572,7 @@ inline std::ostream& operator<<(std::ostream& os, const AsmOp& op) { } std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg); +std::ostream& operator<<(std::ostream& os, TypePtr type_data); struct AsmOpList { std::vector list_; @@ -1116,7 +1092,6 @@ struct FunctionBodyAsm { struct CodeBlob { int var_cnt, in_var_cnt; - TypeExpr* ret_type; const FunctionData* fun_ref; std::string name; SrcLocation loc; @@ -1128,8 +1103,8 @@ struct CodeBlob { #endif std::stack*> cur_ops_stack; bool require_callxargs = false; - CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref, TypeExpr* ret_type) - : var_cnt(0), in_var_cnt(0), ret_type(ret_type), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) { + CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref) + : var_cnt(0), in_var_cnt(0), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) { } template Op& emplace_back(Args&&... args) { @@ -1141,8 +1116,8 @@ struct CodeBlob { return res; } bool import_params(FormalArgList&& arg_list); - var_idx_t create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation loc); - var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { + var_idx_t create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc); + var_idx_t create_tmp_var(TypePtr var_type, SrcLocation loc) { return create_var(var_type, nullptr, loc); } int split_vars(bool strict = false); @@ -1164,7 +1139,6 @@ struct CodeBlob { close_blk(location); pop_cur(); } - void simplify_var_types(); void prune_unreachable_code(); void fwd_analyze(); void mark_noreturn(); diff --git a/tolk/type-expr.h b/tolk/type-expr.h deleted file mode 100644 index 21a35a8e..00000000 --- a/tolk/type-expr.h +++ /dev/null @@ -1,131 +0,0 @@ -#pragma once - -#include -#include - -namespace tolk { - -struct TypeExpr { - enum Kind { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll }; - enum AtomicType { _Int, _Cell, _Slice, _Builder, _Continutaion, _Tuple }; - Kind constr; - int value; - int minw, maxw; - static constexpr int w_inf = 1023; - std::vector args; - bool was_forall_var = false; - - explicit TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { - } - TypeExpr(Kind _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { - } - TypeExpr(Kind _constr, std::vector list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(Kind _constr, std::initializer_list list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(Kind _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { - compute_width(); - } - TypeExpr(Kind _constr, TypeExpr* elem0, std::vector list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - TypeExpr(Kind _constr, TypeExpr* elem0, std::initializer_list list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - - bool is_atomic() const { - return constr == te_Atomic; - } - bool is_atomic(int v) const { - return constr == te_Atomic && value == v; - } - bool is_int() const { - return is_atomic(_Int); - } - bool is_var() const { - return constr == te_Var; - } - bool is_map() const { - return constr == te_Map; - } - bool is_tuple() const { - return constr == te_Tuple; - } - bool has_fixed_width() const { - return minw == maxw; - } - int get_width() const { - return has_fixed_width() ? minw : -1; - } - void compute_width(); - bool recompute_width(); - void show_width(std::ostream& os); - std::ostream& print(std::ostream& os, int prio = 0) const; - void replace_with(TypeExpr* te2); - int extract_components(std::vector& comp_list); - bool equals_to(const TypeExpr* rhs) const; - bool has_unknown_inside() const; - static int holes, type_vars; - static TypeExpr* new_hole() { - return new TypeExpr{te_Unknown, ++holes}; - } - static TypeExpr* new_hole(int width) { - return new TypeExpr{te_Unknown, ++holes, width}; - } - static TypeExpr* new_unit() { - return new TypeExpr{te_Tensor, 0, 0}; - } - static TypeExpr* new_atomic(int value) { - return new TypeExpr{te_Atomic, value, 1}; - } - static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); - static TypeExpr* new_func() { - return new_map(new_hole(), new_hole()); - } - static TypeExpr* new_tensor(std::vector list, bool red = true) { - return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(std::initializer_list list) { - return new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { - return new_tensor({te1, te2}); - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { - return new_tensor({te1, te2, te3}); - } - static TypeExpr* new_tuple(TypeExpr* arg0) { - return new TypeExpr{te_Tuple, arg0}; - } - static TypeExpr* new_tuple(std::vector list, bool red = false) { - return new_tuple(new_tensor(std::move(list), red)); - } - static TypeExpr* new_tuple(std::initializer_list list) { - return new_tuple(new_tensor(list)); - } - static TypeExpr* new_var() { - return new TypeExpr{te_Var, --type_vars, 1}; - } - static TypeExpr* new_var(int idx) { - return new TypeExpr{te_Var, idx, 1}; - } - static TypeExpr* new_forall(std::vector list, TypeExpr* body) { - return new TypeExpr{te_ForAll, body, std::move(list)}; - } - - static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); - static std::vector remove_forall(TypeExpr*& te); - static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); -}; - -std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); - -} // namespace tolk diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp new file mode 100644 index 00000000..31f84e75 --- /dev/null +++ b/tolk/type-system.cpp @@ -0,0 +1,684 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "type-system.h" +#include "lexer.h" +#include "platform-utils.h" +#include "compiler-state.h" +#include + +namespace tolk { + +/* + * This class stores a big hashtable [hash => TypeData] + * Every non-trivial TypeData*::create() method at first looks here, and allocates an object only if not found. + * That's why all allocated TypeData objects are unique, storing unique type_id. + */ +class TypeDataTypeIdCalculation { + uint64_t cur_hash; + int children_flags_mask = 0; + + static std::unordered_map all_unique_occurred_types; + +public: + explicit TypeDataTypeIdCalculation(uint64_t initial_arbitrary_unique_number) + : cur_hash(initial_arbitrary_unique_number) {} + + void feed_hash(uint64_t val) { + cur_hash = cur_hash * 56235515617499ULL + val; + } + + void feed_string(const std::string& s) { + feed_hash(std::hash{}(s)); + } + + void feed_child(TypePtr inner) { + feed_hash(inner->type_id); + children_flags_mask |= inner->flags; + } + + uint64_t type_id() const { + return cur_hash; + } + + int children_flags() const { + return children_flags_mask; + } + + GNU_ATTRIBUTE_FLATTEN + TypePtr get_existing() const { + auto it = all_unique_occurred_types.find(cur_hash); + return it != all_unique_occurred_types.end() ? it->second : nullptr; + } + + GNU_ATTRIBUTE_NOINLINE + TypePtr register_unique(TypePtr newly_created) const { +#ifdef TOLK_DEBUG + assert(newly_created->type_id == cur_hash); +#endif + all_unique_occurred_types[cur_hash] = newly_created; + return newly_created; + } +}; + +std::unordered_map TypeDataTypeIdCalculation::all_unique_occurred_types; +TypePtr TypeDataInt::singleton; +TypePtr TypeDataCell::singleton; +TypePtr TypeDataSlice::singleton; +TypePtr TypeDataBuilder::singleton; +TypePtr TypeDataTuple::singleton; +TypePtr TypeDataContinuation::singleton; +TypePtr TypeDataNullLiteral::singleton; +TypePtr TypeDataUnknown::singleton; +TypePtr TypeDataVoid::singleton; + +void type_system_init() { + TypeDataInt::singleton = new TypeDataInt; + TypeDataCell::singleton = new TypeDataCell; + TypeDataSlice::singleton = new TypeDataSlice; + TypeDataBuilder::singleton = new TypeDataBuilder; + TypeDataTuple::singleton = new TypeDataTuple; + TypeDataContinuation::singleton = new TypeDataContinuation; + TypeDataNullLiteral::singleton = new TypeDataNullLiteral; + TypeDataUnknown::singleton = new TypeDataUnknown; + TypeDataVoid::singleton = new TypeDataVoid; +} + + +// -------------------------------------------- +// create() +// +// all constructors of TypeData classes are private, only TypeData*::create() is allowed +// each non-trivial create() method calculates hash (type_id) +// and creates an object only if it isn't found in a global hashtable +// + +TypePtr TypeDataFunCallable::create(std::vector&& params_types, TypePtr return_type) { + TypeDataTypeIdCalculation hash(3184039965511020991ULL); + for (TypePtr param : params_types) { + hash.feed_child(param); + hash.feed_hash(767721); + } + hash.feed_child(return_type); + hash.feed_hash(767722); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataFunCallable(hash.type_id(), hash.children_flags(), std::move(params_types), return_type)); +} + +TypePtr TypeDataGenericT::create(std::string&& nameT) { + TypeDataTypeIdCalculation hash(9145033724911680012ULL); + hash.feed_string(nameT); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataGenericT(hash.type_id(), std::move(nameT))); +} + +TypePtr TypeDataTensor::create(std::vector&& items) { + TypeDataTypeIdCalculation hash(3159238551239480381ULL); + for (TypePtr item : items) { + hash.feed_child(item); + hash.feed_hash(819613); + } + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataTensor(hash.type_id(), hash.children_flags(), std::move(items))); +} + +TypePtr TypeDataTypedTuple::create(std::vector&& items) { + TypeDataTypeIdCalculation hash(9189266157349499320ULL); + for (TypePtr item : items) { + hash.feed_child(item); + hash.feed_hash(735911); + } + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataTypedTuple(hash.type_id(), hash.children_flags(), std::move(items))); +} + +TypePtr TypeDataUnresolved::create(std::string&& text, SrcLocation loc) { + TypeDataTypeIdCalculation hash(3680147223540048162ULL); + hash.feed_string(text); + // hash.feed_hash(*reinterpret_cast(&loc)); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataUnresolved(hash.type_id(), std::move(text), loc)); +} + + +// -------------------------------------------- +// as_human_readable() +// +// is used only for error messages and debugging, therefore no optimizations for simplicity +// only non-trivial implementations are here; trivial are defined in .h file +// + +std::string TypeDataFunCallable::as_human_readable() const { + std::string result = "("; + for (TypePtr param : params_types) { + if (result.size() > 1) { + result += ", "; + } + result += param->as_human_readable(); + } + result += ") -> "; + result += return_type->as_human_readable(); + return result; +} + +std::string TypeDataTensor::as_human_readable() const { + std::string result = "("; + for (TypePtr item : items) { + if (result.size() > 1) { + result += ", "; + } + result += item->as_human_readable(); + } + result += ')'; + return result; +} + +std::string TypeDataTypedTuple::as_human_readable() const { + std::string result = "["; + for (TypePtr item : items) { + if (result.size() > 1) { + result += ", "; + } + result += item->as_human_readable(); + } + result += ']'; + return result; +} + + +// -------------------------------------------- +// traverse() +// +// invokes a callback for TypeData itself and all its children +// only non-trivial implementations are here; by default (no children), `callback(this)` is executed +// + +void TypeDataFunCallable::traverse(const TraverserCallbackT& callback) const { + callback(this); + for (TypePtr param : params_types) { + param->traverse(callback); + } + return_type->traverse(callback); +} + +void TypeDataTensor::traverse(const TraverserCallbackT& callback) const { + callback(this); + for (TypePtr item : items) { + item->traverse(callback); + } +} + +void TypeDataTypedTuple::traverse(const TraverserCallbackT& callback) const { + callback(this); + for (TypePtr item : items) { + item->traverse(callback); + } +} + + +// -------------------------------------------- +// replace_children_custom() +// +// returns new TypeData with children replaced by a custom callback +// used to replace generic T on generics expansion — to convert `f` to `f` +// only non-trivial implementations are here; by default (no children), `return callback(this)` is executed +// + +TypePtr TypeDataFunCallable::replace_children_custom(const ReplacerCallbackT& callback) const { + std::vector mapped; + mapped.reserve(params_types.size()); + for (TypePtr param : params_types) { + mapped.push_back(param->replace_children_custom(callback)); + } + return callback(create(std::move(mapped), return_type->replace_children_custom(callback))); +} + +TypePtr TypeDataTensor::replace_children_custom(const ReplacerCallbackT& callback) const { + std::vector mapped; + mapped.reserve(items.size()); + for (TypePtr item : items) { + mapped.push_back(item->replace_children_custom(callback)); + } + return callback(create(std::move(mapped))); +} + +TypePtr TypeDataTypedTuple::replace_children_custom(const ReplacerCallbackT& callback) const { + std::vector mapped; + mapped.reserve(items.size()); + for (TypePtr item : items) { + mapped.push_back(item->replace_children_custom(callback)); + } + return callback(create(std::move(mapped))); +} + + +// -------------------------------------------- +// calc_width_on_stack() +// +// returns the number of stack slots occupied by a variable of this type +// only non-trivial implementations are here; by default (most types) occupy 1 stack slot +// + +int TypeDataGenericT::calc_width_on_stack() const { + // this function is invoked only in functions with generics already instantiated + assert(false); + return -999999; +} + +int TypeDataTensor::calc_width_on_stack() const { + int sum = 0; + for (TypePtr item : items) { + sum += item->calc_width_on_stack(); + } + return sum; +} + +int TypeDataUnresolved::calc_width_on_stack() const { + // since early pipeline stages, no unresolved types left + assert(false); + return -999999; +} + +int TypeDataVoid::calc_width_on_stack() const { + return 0; +} + + +// -------------------------------------------- +// can_rhs_be_assigned() +// +// on `var lhs: = rhs`, having inferred rhs_type, check that it can be assigned without any casts +// the same goes for passing arguments, returning values, etc. — where the "receiver" (lhs) checks "applier" (rhs) +// for now, `null` can be assigned to any TVM primitive, be later we'll have T? types and null safety +// + +bool TypeDataInt::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataCell::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataSlice::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataBuilder::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataTuple::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataContinuation::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataNullLiteral::can_rhs_be_assigned(TypePtr rhs) const { + return rhs == this; +} + +bool TypeDataFunCallable::can_rhs_be_assigned(TypePtr rhs) const { + return rhs == this; +} + +bool TypeDataGenericT::can_rhs_be_assigned(TypePtr rhs) const { + assert(false); + return false; +} + +bool TypeDataTensor::can_rhs_be_assigned(TypePtr rhs) const { + if (const auto* as_tensor = rhs->try_as(); as_tensor && as_tensor->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_rhs_be_assigned(as_tensor->items[i])) { + return false; + } + } + return true; + } + // note, that tensors can not accept null + return false; +} + +bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { + if (const auto* as_tuple = rhs->try_as(); as_tuple && as_tuple->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_rhs_be_assigned(as_tuple->items[i])) { + return false; + } + } + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataUnknown::can_rhs_be_assigned(TypePtr rhs) const { + return true; +} + +bool TypeDataUnresolved::can_rhs_be_assigned(TypePtr rhs) const { + assert(false); + return false; +} + +bool TypeDataVoid::can_rhs_be_assigned(TypePtr rhs) const { + return rhs == this; +} + + +// -------------------------------------------- +// can_be_casted_with_as_operator() +// +// on `expr as `, check whether casting is applicable +// note, that it's not auto-casts `var lhs: = rhs`, it's an expression `rhs as ` +// + +bool TypeDataInt::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataCell::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataSlice::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataBuilder::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataContinuation::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataNullLiteral::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this + || cast_to == TypeDataInt::create() || cast_to == TypeDataCell::create() || cast_to == TypeDataSlice::create() + || cast_to == TypeDataBuilder::create() || cast_to == TypeDataContinuation::create() || cast_to == TypeDataTuple::create() + || cast_to->try_as(); +} + +bool TypeDataFunCallable::can_be_casted_with_as_operator(TypePtr cast_to) const { + return this == cast_to; +} + +bool TypeDataGenericT::can_be_casted_with_as_operator(TypePtr cast_to) const { + return true; +} + +bool TypeDataTensor::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_tensor = cast_to->try_as(); to_tensor && to_tensor->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_be_casted_with_as_operator(to_tensor->items[i])) { + return false; + } + } + return true; + } + return false; +} + +bool TypeDataTypedTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_tuple = cast_to->try_as(); to_tuple && to_tuple->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_be_casted_with_as_operator(to_tuple->items[i])) { + return false; + } + } + return true; + } + return false; +} + +bool TypeDataUnknown::can_be_casted_with_as_operator(TypePtr cast_to) const { + // 'unknown' can be cast to any type + // (though it's not valid for exception arguments when casting them to non-1 stack width, + // but to ensure it, we need a special type "unknown TVM primitive", which is overwhelming I think) + return true; +} + +bool TypeDataUnresolved::can_be_casted_with_as_operator(TypePtr cast_to) const { + return false; +} + +bool TypeDataVoid::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + + +// -------------------------------------------- +// extract_components() +// +// used in code generation (transforming Ops to other Ops) +// to be removed in the future +// + +void TypeDataGenericT::extract_components(std::vector& comp_types) const { + assert(false); +} + +void TypeDataTensor::extract_components(std::vector& comp_types) const { + for (TypePtr item : items) { + item->extract_components(comp_types); + } +} + +void TypeDataUnresolved::extract_components(std::vector& comp_types) const { + assert(false); +} + +void TypeDataVoid::extract_components(std::vector& comp_types) const { +} + + +// -------------------------------------------- +// parsing type from tokens +// +// here we implement parsing types (mostly after colon) to TypeData +// example: `var v: int` is TypeDataInt +// example: `var v: (builder, [cell])` is TypeDataTensor(TypeDataBuilder, TypeDataTypedTuple(TypeDataCell)) +// example: `fun f(): ()` is TypeDataTensor() (an empty one) +// +// note, that unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved, +// and later, when all files are parsed and all symbols registered, such identifiers are resolved +// example: `fun f(v: T)` at first v is TypeDataUnresolved("T"), later becomes TypeDataGenericT +// see finalize_type_data() +// +// note, that `self` does not name a type, it can appear only as a return value of a function (parsed specially) +// when `self` appears as a type, it's parsed as TypeDataUnresolved, and later an error is emitted +// + +static TypePtr parse_type_expression(Lexer& lex); + +std::vector parse_nested_type_list(Lexer& lex, TokenType tok_op, const char* s_op, TokenType tok_cl, const char* s_cl) { + lex.expect(tok_op, s_op); + std::vector sub_types; + while (true) { + if (lex.tok() == tok_cl) { // empty lists allowed + lex.next(); + break; + } + + sub_types.emplace_back(parse_type_expression(lex)); + if (lex.tok() == tok_comma) { + lex.next(); + } else if (lex.tok() != tok_cl) { + lex.unexpected(s_cl); + } + } + return sub_types; +} + +std::vector parse_nested_type_list_in_parenthesis(Lexer& lex) { + return parse_nested_type_list(lex, tok_oppar, "`(`", tok_clpar, "`)` or `,`"); +} + +static TypePtr parse_simple_type(Lexer& lex) { + switch (lex.tok()) { + case tok_int: + lex.next(); + return TypeDataInt::create(); + case tok_cell: + lex.next(); + return TypeDataCell::create(); + case tok_builder: + lex.next(); + return TypeDataBuilder::create(); + case tok_slice: + lex.next(); + return TypeDataSlice::create(); + case tok_tuple: + lex.next(); + return TypeDataTuple::create(); + case tok_continuation: + lex.next(); + return TypeDataContinuation::create(); + case tok_null: + lex.next(); + return TypeDataNullLiteral::create(); + case tok_void: + lex.next(); + return TypeDataVoid::create(); + case tok_bool: + case tok_self: + case tok_identifier: { + SrcLocation loc = lex.cur_location(); + std::string text = static_cast(lex.cur_str()); + lex.next(); + return TypeDataUnresolved::create(std::move(text), loc); + } + case tok_oppar: { + std::vector items = parse_nested_type_list_in_parenthesis(lex); + if (items.size() == 1) { + return items.front(); + } + return TypeDataTensor::create(std::move(items)); + } + case tok_opbracket: { + std::vector items = parse_nested_type_list(lex, tok_opbracket, "`[`", tok_clbracket, "`]` or `,`"); + return TypeDataTypedTuple::create(std::move(items)); + } + case tok_fun: { + lex.next(); + std::vector params_types = parse_nested_type_list_in_parenthesis(lex); + lex.expect(tok_arrow, "`->`"); + } + default: + lex.unexpected(""); + } +} + +static TypePtr parse_type_nullable(Lexer& lex) { + TypePtr result = parse_simple_type(lex); + + if (lex.tok() == tok_question) { + lex.error("nullable types are not supported yet"); + } + + return result; +} + +static TypePtr parse_type_expression(Lexer& lex) { + TypePtr result = parse_type_nullable(lex); + + if (lex.tok() == tok_arrow) { // `int -> int`, `(cell, slice) -> void` + lex.next(); + TypePtr return_type = parse_type_expression(lex); + std::vector params_types = {result}; + if (const auto* as_tensor = result->try_as()) { + params_types = as_tensor->items; + } + return TypeDataFunCallable::create(std::move(params_types), return_type); + } + + if (lex.tok() != tok_bitwise_or) { + return result; + } + + lex.error("union types are not supported yet"); +} + +TypePtr parse_type_from_tokens(Lexer& lex) { + return parse_type_expression(lex); +} + +std::ostream& operator<<(std::ostream& os, TypePtr type_data) { + return os << (type_data ? type_data->as_human_readable() : "(nullptr-type)"); +} + +} // namespace tolk diff --git a/tolk/type-system.h b/tolk/type-system.h new file mode 100644 index 00000000..2805bb34 --- /dev/null +++ b/tolk/type-system.h @@ -0,0 +1,405 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" +#include +#include +#include + +namespace tolk { + +/* + * TypeData is both a user-given and an inferred type representation. + * `int`, `cell`, `T`, `(int, [tuple])` are instances of TypeData. + * Every unique TypeData is created only once, so for example TypeDataTensor::create(int, int) + * returns one and the same pointer always. This "uniqueness" is called type_id, calculated before creation. + * + * In Tolk code, types after colon `var v: (int, T)` are parsed to TypeData. + * See parse_type_from_tokens(). + * So, AST nodes which can have declared types (local/global variables and others) store a pointer to TypeData. + * + * Type inferring also creates TypeData for inferred expressions. All AST expression nodes have inferred_type. + * For example, `1 + 2`, both operands are TypeDataInt, its result is also TypeDataInt. + * Type checking also uses TypeData. For example, `var i: slice = 1 + 2`, at first rhs (TypeDataInt) is inferred, + * then lhs (TypeDataSlice from declaration) is checked whether rhs can be assigned. + * See can_rhs_be_assigned(). + * + * Note, that while initial parsing Tolk files to AST, known types (`int`, `cell`, etc.) are created as-is, + * but user-defined types (`T`, `MyStruct`, `MyAlias`) are saved as TypeDataUnresolved. + * After all symbols have been registered, resolving identifiers step is executed, where particularly + * all TypeDataUnresolved instances are converted to a resolved type. At inferring, no unresolved remain. + * For instance, `fun f(v: T)`, at first "T" of `v` is unresolved, and then converted to TypeDataGenericT. + */ +class TypeData { + // all unique types have unique type_id; it's used both for allocating memory once and for tagged unions + const uint64_t type_id; + // bits of flag_mask, to store often-used properties and return them without tree traversing + const int flags; + + friend class TypeDataTypeIdCalculation; + +protected: + enum flag_mask { + flag_contains_unknown_inside = 1 << 1, + flag_contains_genericT_inside = 1 << 2, + flag_contains_unresolved_inside = 1 << 3, + }; + + explicit TypeData(uint64_t type_id, int flags_with_children) + : type_id(type_id) + , flags(flags_with_children) { + } + +public: + virtual ~TypeData() = default; + + template + const Derived* try_as() const { + return dynamic_cast(this); + } + + uint64_t get_type_id() const { return type_id; } + + bool has_unknown_inside() const { return flags & flag_contains_unknown_inside; } + bool has_genericT_inside() const { return flags & flag_contains_genericT_inside; } + bool has_unresolved_inside() const { return flags & flag_contains_unresolved_inside; } + + using TraverserCallbackT = std::function; + using ReplacerCallbackT = std::function; + + virtual std::string as_human_readable() const = 0; + virtual bool can_rhs_be_assigned(TypePtr rhs) const = 0; + virtual bool can_be_casted_with_as_operator(TypePtr cast_to) const = 0; + + virtual void traverse(const TraverserCallbackT& callback) const { + callback(this); + } + + virtual TypePtr replace_children_custom(const ReplacerCallbackT& callback) const { + return callback(this); + } + + virtual int calc_width_on_stack() const { + return 1; + } + + virtual void extract_components(std::vector& comp_types) const { + comp_types.push_back(this); + } +}; + +/* + * `int` is TypeDataInt, representation of TVM int. + */ +class TypeDataInt final : public TypeData { + TypeDataInt() : TypeData(1ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "int"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `cell` is TypeDataCell, representation of TVM cell. + */ +class TypeDataCell final : public TypeData { + TypeDataCell() : TypeData(3ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "cell"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `slice` is TypeDataSlice, representation of TVM slice. + */ +class TypeDataSlice final : public TypeData { + TypeDataSlice() : TypeData(4ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "slice"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `builder` is TypeDataBuilder, representation of TVM builder. + */ +class TypeDataBuilder final : public TypeData { + TypeDataBuilder() : TypeData(5ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "builder"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `tuple` is TypeDataTuple, representation of TVM tuple. + * Note, that it's UNTYPED tuple. It occupies 1 stack slot in TVM. Its elements are any TVM values at runtime, + * so getting its element results in TypeDataUnknown (which must be assigned/cast explicitly). + */ +class TypeDataTuple final : public TypeData { + TypeDataTuple() : TypeData(6ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "tuple"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `continuation` is TypeDataContinuation, representation of TVM continuation. + * It's like "untyped callable", not compatible with other types. + */ +class TypeDataContinuation final : public TypeData { + TypeDataContinuation() : TypeData(7ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "continuation"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `null` has TypeDataNullLiteral type. + * Currently, it can be assigned to int/slice/etc., but later Tolk will have T? types and null safety. + * Note, that `var i = null`, though valid (i would be constant null), fires an "always-null" compilation error + * (it's much better for user to see an error here than when he passes this variable somewhere). + */ +class TypeDataNullLiteral final : public TypeData { + TypeDataNullLiteral() : TypeData(8ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "null"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `fun(int, int) -> void` is TypeDataFunCallable, think of is as a typed continuation. + * A type of function `fun f(x: int) { return x; }` is actually `fun(int) -> int`. + * So, when assigning it to a variable `var cb = f`, this variable also has this type. + */ +class TypeDataFunCallable final : public TypeData { + TypeDataFunCallable(uint64_t type_id, int children_flags, std::vector&& params_types, TypePtr return_type) + : TypeData(type_id, children_flags) + , params_types(std::move(params_types)) + , return_type(return_type) {} + +public: + const std::vector params_types; + const TypePtr return_type; + + static TypePtr create(std::vector&& params_types, TypePtr return_type); + + int params_size() const { return static_cast(params_types.size()); } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; +}; + +/* + * `T` inside generic functions is TypeDataGenericT. + * Example: `fun f(a: X, b: Y): [X, Y]` (here X and Y are). + * On instantiation like `f(1,"")`, a new function `f` is created with type `fun(int,slice)->[int,slice]`. + */ +class TypeDataGenericT final : public TypeData { + TypeDataGenericT(uint64_t type_id, std::string&& nameT) + : TypeData(type_id, flag_contains_genericT_inside) + , nameT(std::move(nameT)) {} + +public: + const std::string nameT; + + static TypePtr create(std::string&& nameT); + + std::string as_human_readable() const override { return nameT; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + +/* + * `(int, slice)` is TypeDataTensor of 2 elements. Tensor of N elements occupies N stack slots. + * Of course, there may be nested tensors, like `(int, (int, slice), cell)`. + * Arguments, variables, globals, return values, etc. can be tensors. + * A tensor can be empty. + */ +class TypeDataTensor final : public TypeData { + TypeDataTensor(uint64_t type_id, int children_flags, std::vector&& items) + : TypeData(type_id, children_flags) + , items(std::move(items)) {} + +public: + const std::vector items; + + static TypePtr create(std::vector&& items); + + int size() const { return static_cast(items.size()); } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + +/* + * `[int, slice]` is TypeDataTypedTuple, a TVM 'tuple' under the hood, contained in 1 stack slot. + * Unlike TypeDataTuple (untyped tuples), it has a predefined inner structure and can be assigned as + * `var [i, cs] = [0, ""]` (where a and b become two separate variables on a stack, int and slice). + */ +class TypeDataTypedTuple final : public TypeData { + TypeDataTypedTuple(uint64_t type_id, int children_flags, std::vector&& items) + : TypeData(type_id, children_flags) + , items(std::move(items)) {} + +public: + const std::vector items; + + static TypePtr create(std::vector&& items); + + int size() const { return static_cast(items.size()); } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; +}; + +/* + * `unknown` is a special type, which can appear in corner cases. + * The type of exception argument (which can hold any TVM value at runtime) is unknown. + * The type of `_` used as rvalue is unknown. + * The only thing available to do with unknown is to cast it: `catch (excNo, arg) { var i = arg as int; }` + */ +class TypeDataUnknown final : public TypeData { + TypeDataUnknown() : TypeData(20ULL, flag_contains_unknown_inside) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "unknown"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * "Unresolved" is not actually a type — it's an intermediate state between parsing and resolving. + * At parsing to AST, unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved, + * and after all source files parsed and global symbols registered, they are replaced by actual ones. + * Example: `fun f(v: T)` at first v is TypeDataUnresolved("T"), later becomes TypeDataGenericT. + */ +class TypeDataUnresolved final : public TypeData { + TypeDataUnresolved(uint64_t type_id, std::string&& text, SrcLocation loc) + : TypeData(type_id, flag_contains_unresolved_inside) + , text(std::move(text)) + , loc(loc) {} + +public: + const std::string text; + const SrcLocation loc; + + static TypePtr create(std::string&& text, SrcLocation loc); + + std::string as_human_readable() const override { return text + "*"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + +/* + * `void` is TypeDataVoid. + * From the type system point of view, `void` functions return nothing. + * Empty tensor is not compatible with void, although at IR level they are similar, 0 stack slots. + */ +class TypeDataVoid final : public TypeData { + TypeDataVoid() : TypeData(10ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "void"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + + +// -------------------------------------------- + + +class Lexer; +TypePtr parse_type_from_tokens(Lexer& lex); + +void type_system_init(); + +} // namespace tolk diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp deleted file mode 100644 index 3712c6f5..00000000 --- a/tolk/unify-types.cpp +++ /dev/null @@ -1,454 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" - -namespace tolk { - -/* - * - * TYPE EXPRESSIONS - * - */ - -int TypeExpr::holes = 0, TypeExpr::type_vars = 0; // not thread safe, but it is ok for now - -void TypeExpr::compute_width() { - switch (constr) { - case te_Atomic: - case te_Map: - minw = maxw = 1; - break; - case te_Tensor: - minw = maxw = 0; - for (TypeExpr* arg : args) { - minw += arg->minw; - maxw += arg->maxw; - } - if (minw > w_inf) { - minw = w_inf; - } - if (maxw > w_inf) { - maxw = w_inf; - } - break; - case te_Tuple: - minw = maxw = 1; - for (TypeExpr* arg : args) { - arg->compute_width(); - } - break; - case te_Indirect: - minw = args[0]->minw; - maxw = args[0]->maxw; - break; - default: - minw = 0; - maxw = w_inf; - break; - } -} - -bool TypeExpr::recompute_width() { - switch (constr) { - case te_Tensor: - case te_Indirect: { - int min = 0, max = 0; - for (TypeExpr* arg : args) { - min += arg->minw; - max += arg->maxw; - } - if (min > maxw || max < minw) { - return false; - } - if (min > w_inf) { - min = w_inf; - } - if (max > w_inf) { - max = w_inf; - } - if (minw < min) { - minw = min; - } - if (maxw > max) { - maxw = max; - } - return true; - } - case te_Tuple: { - for (TypeExpr* arg : args) { - if (arg->minw > 1 || arg->maxw < 1 || arg->minw > arg->maxw) { - return false; - } - } - return true; - } - default: - return false; - } -} - -int TypeExpr::extract_components(std::vector& comp_list) { - if (constr != te_Indirect && constr != te_Tensor) { - comp_list.push_back(this); - return 1; - } - int res = 0; - for (TypeExpr* arg : args) { - res += arg->extract_components(comp_list); - } - return res; -} - -bool TypeExpr::equals_to(const TypeExpr *rhs) const { - const TypeExpr *l = this; - const TypeExpr *r = rhs; - while (l->constr == te_Indirect) - l = l->args[0]; - while (r->constr == te_Indirect) - r = r->args[0]; - - bool eq = l->constr == r->constr && (l->constr == te_Unknown || l->value == r->value) && - l->minw == r->minw && l->maxw == r->maxw && - l->was_forall_var == r->was_forall_var && - l->args.size() == r->args.size(); - if (!eq) - return false; - - for (int i = 0; i < static_cast(l->args.size()); ++i) { - if (!l->args[i]->equals_to(r->args[i])) - return false; - } - return true; -} - -bool TypeExpr::has_unknown_inside() const { - if (constr == te_Unknown) - return true; - - for (const TypeExpr* inner : args) { - if (inner->has_unknown_inside()) - return true; - } - return false; -} - -TypeExpr* TypeExpr::new_map(TypeExpr* from, TypeExpr* to) { - return new TypeExpr{te_Map, std::vector{from, to}}; -} - -void TypeExpr::replace_with(TypeExpr* te2) { - if (te2 == this) { - return; - } - constr = te_Indirect; - value = 0; - minw = te2->minw; - maxw = te2->maxw; - args.clear(); - args.push_back(te2); -} - -bool TypeExpr::remove_indirect(TypeExpr*& te, TypeExpr* forbidden) { - tolk_assert(te); - while (te->constr == te_Indirect) { - te = te->args[0]; - } - if (te->constr == te_Unknown) { - return te != forbidden; - } - bool res = true; - for (auto& x : te->args) { - res &= remove_indirect(x, forbidden); - } - return res; -} - -std::vector TypeExpr::remove_forall(TypeExpr*& te) { - tolk_assert(te && te->constr == te_ForAll); - tolk_assert(te->args.size() >= 1); - std::vector new_vars; - for (std::size_t i = 1; i < te->args.size(); i++) { - new_vars.push_back(new_hole(1)); - } - TypeExpr* te2 = te; - // std::cerr << "removing universal quantifier in " << te << std::endl; - te = te->args[0]; - remove_forall_in(te, te2, new_vars); - // std::cerr << "-> " << te << std::endl; - return new_vars; -} - -bool TypeExpr::remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars) { - tolk_assert(te); - tolk_assert(te2 && te2->constr == te_ForAll); - if (te->constr == te_Var) { - for (std::size_t i = 0; i < new_vars.size(); i++) { - if (te == te2->args[i + 1]) { - te = new_vars[i]; - return true; - } - } - return false; - } - if (te->constr == te_ForAll) { - return false; - } - if (te->args.empty()) { - return false; - } - auto te1 = new TypeExpr(*te); - bool res = false; - for (auto& arg : te1->args) { - res |= remove_forall_in(arg, te2, new_vars); - } - if (res) { - te = te1; - } else { - delete te1; - } - return res; -} - -void TypeExpr::show_width(std::ostream& os) { - os << minw; - if (maxw != minw) { - os << ".."; - if (maxw < w_inf) { - os << maxw; - } - } -} - -std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr) { - if (!type_expr) { - return os << "(null-type-ptr)"; - } - return type_expr->print(os); -} - -std::ostream& TypeExpr::print(std::ostream& os, int lex_level) const { - switch (constr) { - case te_Unknown: - return os << "??" << value; - case te_Var: - if (value >= -26 && value < 0) { - return os << "_" << (char)(91 + value); - } else if (value >= 0 && value < 26) { - return os << (char)(65 + value); - } else { - return os << "TVAR" << value; - } - case te_Indirect: - return os << args[0]; - case te_Atomic: { - switch (value) { - case _Int: - return os << "int"; - case _Cell: - return os << "cell"; - case _Slice: - return os << "slice"; - case _Builder: - return os << "builder"; - case _Continutaion: - return os << "cont"; - case _Tuple: - return os << "tuple"; - default: - return os << "atomic-type-" << value; - } - } - case te_Tensor: { - if (lex_level > -127) { - os << "("; - } - auto c = args.size(); - if (c) { - for (const auto& x : args) { - x->print(os); - if (--c) { - os << ", "; - } - } - } - if (lex_level > -127) { - os << ")"; - } - return os; - } - case te_Tuple: { - os << "["; - auto c = args.size(); - if (c == 1 && args[0]->constr == te_Tensor) { - args[0]->print(os, -127); - } else if (c) { - for (const auto& x : args) { - x->print(os); - if (--c) { - os << ", "; - } - } - } - return os << "]"; - } - case te_Map: { - tolk_assert(args.size() == 2); - if (lex_level > 0) { - os << "("; - } - args[0]->print(os, 1); - os << " -> "; - args[1]->print(os); - if (lex_level > 0) { - os << ")"; - } - return os; - } - case te_ForAll: { - tolk_assert(args.size() >= 1); - if (lex_level > 0) { - os << '('; - } - os << "Forall "; - for (std::size_t i = 1; i < args.size(); i++) { - os << (i > 1 ? ' ' : '('); - args[i]->print(os); - } - os << ") "; - args[0]->print(os); - if (lex_level > 0) { - os << ')'; - } - return os; - } - default: - return os << "unknown-type-expr-" << constr; - } -} - -void UnifyError::print_message(std::ostream& os) const { - os << "cannot unify type " << te1 << " with " << te2; - if (!msg.empty()) { - os << ": " << msg; - } -} - -std::ostream& operator<<(std::ostream& os, const UnifyError& ue) { - ue.print_message(os); - return os; -} - -void check_width_compat(TypeExpr* te1, TypeExpr* te2) { - if (te1->minw > te2->maxw || te2->minw > te1->maxw) { - std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate}; - te1->show_width(os); - os << " and "; - te2->show_width(os); - throw UnifyError{te1, te2, os.str()}; - } -} - -void check_update_widths(TypeExpr* te1, TypeExpr* te2) { - check_width_compat(te1, te2); - te1->minw = te2->minw = std::max(te1->minw, te2->minw); - te1->maxw = te2->maxw = std::min(te1->maxw, te2->maxw); - tolk_assert(te1->minw <= te1->maxw); -} - -void unify(TypeExpr*& te1, TypeExpr*& te2) { - tolk_assert(te1 && te2); - // std::cerr << "unify( " << te1 << " , " << te2 << " )\n"; - while (te1->constr == TypeExpr::te_Indirect) { - te1 = te1->args[0]; - } - while (te2->constr == TypeExpr::te_Indirect) { - te2 = te2->args[0]; - } - if (te1 == te2) { - return; - } - if (te1->constr == TypeExpr::te_ForAll) { - TypeExpr* te = te1; - std::vector new_vars = TypeExpr::remove_forall(te); - for (TypeExpr* t : new_vars) { - t->was_forall_var = true; - } - unify(te, te2); - for (TypeExpr* t : new_vars) { - t->was_forall_var = false; - } - return; - } - if (te2->constr == TypeExpr::te_ForAll) { - TypeExpr* te = te2; - std::vector new_vars = TypeExpr::remove_forall(te); - for (TypeExpr* t : new_vars) { - t->was_forall_var = true; - } - unify(te1, te); - for (TypeExpr* t : new_vars) { - t->was_forall_var = false; - } - return; - } - if (te1->was_forall_var && te2->constr == TypeExpr::te_Tensor) { - throw UnifyError{te1, te2, "cannot unify generic type and tensor"}; - } - if (te2->was_forall_var && te1->constr == TypeExpr::te_Tensor) { - throw UnifyError{te2, te1, "cannot unify generic type and tensor"}; - } - if (te1->constr == TypeExpr::te_Unknown) { - if (te2->constr == TypeExpr::te_Unknown) { - tolk_assert(te1->value != te2->value); - } - if (!TypeExpr::remove_indirect(te2, te1)) { - throw UnifyError{te1, te2, "type unification results in an infinite cyclic type"}; - } - check_update_widths(te1, te2); - te1->replace_with(te2); - te1 = te2; - return; - } - if (te2->constr == TypeExpr::te_Unknown) { - if (!TypeExpr::remove_indirect(te1, te2)) { - throw UnifyError{te2, te1, "type unification results in an infinite cyclic type"}; - } - check_update_widths(te2, te1); - te2->replace_with(te1); - te2 = te1; - return; - } - if (te1->constr != te2->constr || te1->value != te2->value || te1->args.size() != te2->args.size()) { - throw UnifyError{te1, te2}; - } - for (std::size_t i = 0; i < te1->args.size(); i++) { - unify(te1->args[i], te2->args[i]); - } - if (te1->constr == TypeExpr::te_Tensor) { - if (!te1->recompute_width()) { - throw UnifyError{te1, te2, "type unification incompatible with known width of first type"}; - } - if (!te2->recompute_width()) { - throw UnifyError{te2, te1, "type unification incompatible with known width of first type"}; - } - check_update_widths(te1, te2); - } - te1->replace_with(te2); - te1 = te2; -} - -} // namespace tolk