diff --git a/tolk-tester/tests/indexed-access.tolk b/tolk-tester/tests/indexed-access.tolk index 7915536e..e2bd3dd9 100644 --- a/tolk-tester/tests/indexed-access.tolk +++ b/tolk-tester/tests/indexed-access.tolk @@ -178,7 +178,7 @@ fun test114(f: int, s: int) { @method_id(115) fun test115() { var y = [[[[true]]]]; - return (y, y.0.0.0.0 = !y.0.0.0.0, y.0); + return (y, ((((y).0).0).0).0 = !y.0.0.0.0, y.0); } @method_id(116) @@ -248,7 +248,7 @@ fun test122(x: (int, int)) { @method_id(123) fun test123() { var t = [[10, 20]] as [[int,int]]?; - t!.0.0 = t!.0.1 = 100; + ((t!).0).0 = ((t!).0).1 = 100; return t; } diff --git a/tolk-tester/tests/invalid-generics-13.tolk b/tolk-tester/tests/invalid-generics-13.tolk index 7574bde7..d10e2174 100644 --- a/tolk-tester/tests/invalid-generics-13.tolk +++ b/tolk-tester/tests/invalid-generics-13.tolk @@ -6,6 +6,6 @@ fun cantApplyPlusOnNullable() { /** @compilation_should_fail -@stderr while instantiating generic function `calcSum` +@stderr in function `calcSum` @stderr can not apply operator `+` to `int?` and `int?` */ diff --git a/tolk-tester/tests/invalid-generics-14.tolk b/tolk-tester/tests/invalid-generics-14.tolk new file mode 100644 index 00000000..eb3adc92 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-14.tolk @@ -0,0 +1,17 @@ +fun eq(v: X) {} + +fun cantDeduceWhenNotInferred() { + // at type inferring (before type checking) they are unknown + var (x, y) = 2; + + eq(x as int); // ok (since execution doesn't reach type checking) + eq(x); // ok (since execution doesn't reach type checking) + eq(x); +} + +/** +@compilation_should_fail +@stderr in function `cantDeduceWhenNotInferred` +@stderr can not deduce X for generic function `eq` +@stderr eq(x); + */ diff --git a/tolk-tester/tests/invalid-generics-7.tolk b/tolk-tester/tests/invalid-generics-7.tolk index b51bb82c..076b7804 100644 --- a/tolk-tester/tests/invalid-generics-7.tolk +++ b/tolk-tester/tests/invalid-generics-7.tolk @@ -11,8 +11,7 @@ fun foo(value: X) : X { /** @compilation_should_fail -@stderr while instantiating generic function `foo` -@stderr while instantiating generic function `bar` +@stderr in function `bar` @stderr can not convert type `int` to return type `slice` @stderr return 1 */ diff --git a/tolk-tester/tests/invalid-typing-11.tolk b/tolk-tester/tests/invalid-typing-11.tolk index d6aa09c3..f6e89d08 100644 --- a/tolk-tester/tests/invalid-typing-11.tolk +++ b/tolk-tester/tests/invalid-typing-11.tolk @@ -3,6 +3,7 @@ fun failBitwiseNotOnBool() { if (~eq) { return 0; } + return -1; } /** diff --git a/tolk-tester/tests/invalid-typing-19.tolk b/tolk-tester/tests/invalid-typing-19.tolk new file mode 100644 index 00000000..58b6c1fc --- /dev/null +++ b/tolk-tester/tests/invalid-typing-19.tolk @@ -0,0 +1,12 @@ +fun getNullableInt(): int? { return 5; } + +fun testCantApplyNotNullForAlwaysNull() { + var x: int? = getNullableInt(); + if (x != null) { return 0; } + return x! + 1; +} + +/** +@compilation_should_fail +@stderr operator `!` used for always null expression + */ diff --git a/tolk-tester/tests/invalid-typing-20.tolk b/tolk-tester/tests/invalid-typing-20.tolk new file mode 100644 index 00000000..457bc97a --- /dev/null +++ b/tolk-tester/tests/invalid-typing-20.tolk @@ -0,0 +1,15 @@ +fun getNullableInt(): int? { return 5; } + +fun testFlowContextAppliedInBinaryOperator() { + var x: int? = getNullableInt(); + var y: int? = getNullableInt(); + if ((y = null) < y) { + return -100; + } + return 0; +} + +/** +@compilation_should_fail +@stderr can not apply operator `<` to `null` and `null` + */ diff --git a/tolk-tester/tests/invalid-typing-21.tolk b/tolk-tester/tests/invalid-typing-21.tolk new file mode 100644 index 00000000..d2a815ee --- /dev/null +++ b/tolk-tester/tests/invalid-typing-21.tolk @@ -0,0 +1,14 @@ +fun getNullableInt(): int? { return 5; } + +fun testNeverTypeOccurs() { + var x: int? = getNullableInt(); + if (x == null && x != null) { + return x + 0; + } + return 0; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `never` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-22.tolk b/tolk-tester/tests/invalid-typing-22.tolk new file mode 100644 index 00000000..f962f364 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-22.tolk @@ -0,0 +1,9 @@ +fun testLogicalAndNotConditionDoesntAffect(x: int?) { + var gt1 = x != null && x > 1; + return x + 0; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-23.tolk b/tolk-tester/tests/invalid-typing-23.tolk new file mode 100644 index 00000000..74feed52 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-23.tolk @@ -0,0 +1,15 @@ +fun getTensor(): (int?, int?) { return (5, null); } + +fun testSmartCastsForFieldsDropAfterAssign() { + var t = getTensor(); + if (t.0 != null && t.1 != null) { + t = getTensor(); + return t.0 + t.1; + } + return -1; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-24.tolk b/tolk-tester/tests/invalid-typing-24.tolk new file mode 100644 index 00000000..75f61be9 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-24.tolk @@ -0,0 +1,16 @@ +fun getNullableInt(): int? { return 5; } + +fun getTensor(x: int?): (int?, int) { return (x, 0); } + +fun testSmartCastsDropAfterAssign() { + var x: int? = 0; + var y: int? = 0; + (getTensor(x = getNullableInt()).0, getTensor(y = getNullableInt()).0) = (x + y, x - y); + return x+y; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int?` +@stderr x + y, x - y + */ diff --git a/tolk-tester/tests/invalid-typing-25.tolk b/tolk-tester/tests/invalid-typing-25.tolk new file mode 100644 index 00000000..1621bab1 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-25.tolk @@ -0,0 +1,14 @@ +fun takeNullableTensor(mutate ij: (int, int)?) { } + +fun testSmartCastsDropAfterMutate() { + var x: (int, int)? = (1, 2); + return x.0; // ok + takeNullableTensor(mutate x); + return x.1; // error +} + +/** +@compilation_should_fail +@stderr type `(int, int)?` is not indexable +@stderr return x.1 + */ diff --git a/tolk-tester/tests/invalid-typing-26.tolk b/tolk-tester/tests/invalid-typing-26.tolk new file mode 100644 index 00000000..bf5a1165 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-26.tolk @@ -0,0 +1,12 @@ +fun getNullableInt(): int? { return 5; } + +fun testAssertThrowIsConditional() { + var (x, y) = (getNullableInt(), getNullableInt()); + assert(x != null) throw(y = 10); + return x + y; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-27.tolk b/tolk-tester/tests/invalid-typing-27.tolk new file mode 100644 index 00000000..3861403b --- /dev/null +++ b/tolk-tester/tests/invalid-typing-27.tolk @@ -0,0 +1,18 @@ +fun assignNull2(mutate x: T1?, mutate y: T2?) { + if (false) { + x = null; + y = null; + } +} + +fun testSmartCastsDropAfterNullableGeneric() { + var (x: int?, y: int?) = (1, 2); + x * y; // ok + assignNull2(x, y); // treated like assignments to nullable + x << y; // error +} + +/** +@compilation_should_fail +@stderr can not apply operator `<<` to `int?` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-28.tolk b/tolk-tester/tests/invalid-typing-28.tolk new file mode 100644 index 00000000..5d60ff22 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-28.tolk @@ -0,0 +1,15 @@ +fun getNullableInt(): int? { return 5; } + +fun testReassignInRedef() { + var t1: int? = getNullableInt(); + if (t1 != null) { + var (t1 redef, t2) = (getNullableInt(), 5); + return t1 + t2; + } + return -1; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-29.tolk b/tolk-tester/tests/invalid-typing-29.tolk new file mode 100644 index 00000000..e8a4e5e2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-29.tolk @@ -0,0 +1,14 @@ +fun getNullableInt(): int? { return 5; } + +fun testTryBodyDontSmartCast() { + var x = getNullableInt(); + try { + x = 5; + } catch {} + return x * 10; // x is not int here; for now, we have no exception edges, assuming it can be anywhere inside try +} + +/** +@compilation_should_fail +@stderr can not apply operator `*` to `int?` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-30.tolk b/tolk-tester/tests/invalid-typing-30.tolk new file mode 100644 index 00000000..53dfc5ca --- /dev/null +++ b/tolk-tester/tests/invalid-typing-30.tolk @@ -0,0 +1,15 @@ +fun getNullableInt(): int? { return 5; } + +fun testDoWhileCondition() { + var (x: int?, y: int?) = (10, 20); + do { + x = getNullableInt(); + y = getNullableInt(); + } while(x == null); + return x * y; // x is 100% int, but y is not +} + +/** +@compilation_should_fail +@stderr can not apply operator `*` to `int` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-44.tolk b/tolk-tester/tests/invalid-typing-44.tolk new file mode 100644 index 00000000..2ec5d0e8 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-44.tolk @@ -0,0 +1,9 @@ +fun cantAssignIntToTensor() { + var (x, y) = 2; + x + y; +} + +/** +@compilation_should_fail +@stderr can not assign `int` to a tensor + */ diff --git a/tolk-tester/tests/invalid-typing-45.tolk b/tolk-tester/tests/invalid-typing-45.tolk new file mode 100644 index 00000000..b357b637 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-45.tolk @@ -0,0 +1,9 @@ +fun cantAssignSizesMismatch() { + var [x, y] = [2, 3, 4]; + x + y; +} + +/** +@compilation_should_fail +@stderr can not assign `[int, int, int]`, sizes mismatch + */ diff --git a/tolk-tester/tests/never-type-tests.tolk b/tolk-tester/tests/never-type-tests.tolk new file mode 100644 index 00000000..89447389 --- /dev/null +++ b/tolk-tester/tests/never-type-tests.tolk @@ -0,0 +1,28 @@ +fun takeInt(a: int) {} + +@method_id(101) +fun test1(x: int?) { + if (x == null && x != null) { + var y = x; + __expect_type(y, "never"); + __expect_type(y!, "never"); + // `never` type is assignable to anything, flow won't reach this point + var t: (int, int) = x; + t = y; + takeInt(x); + var cb: (int) -> int = x; + x as int?; + x as (int, int)?; + x as never; + return x; + } + return 123; +} + +fun main() { + __expect_type(test1, "(int?) -> int"); +} + +/** +@testcase | 101 | null | 123 + */ diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 3ea0aaa2..65890a92 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -26,7 +26,9 @@ fun test2(x: int?) { if (null != x) { var y: int? = null; if (y != null) { return 10; } - return y; + if (10 < 20) { // always true at runtime (not at compile-time) + return y; + } } try { return x! + 10; // will throw, since not a number @@ -45,14 +47,6 @@ fun test3(x: int) { return myIsNull(x > 10 ? null : x); } -fun getUntypedNull() { - var untyped: null = null; - if (true) { - return untyped; - } - return untyped; -} - @method_id(104) fun test4(): null { var (_, (_, untyped: null)) = (3, (createEmptyTuple, null)); @@ -62,12 +56,6 @@ fun test4(): null { return untyped; } -@method_id(105) -fun test5() { - var n: slice? = getUntypedNull(); - return !(null == n) ? n!.loadInt(32) : 100; -} - @method_id(107) fun test7() { var b = beginCell().storeMaybeRef(null) as builder?; @@ -85,6 +73,7 @@ fun test8() { } fun main() { + // the compiler optimizes this at compile-time var i: int? = null; if (i == null) { return 1; @@ -99,7 +88,6 @@ fun main() { @testcase | 103 | 5 | 5 @testcase | 103 | 15 | -1 @testcase | 104 | | (null) -@testcase | 105 | | 100 @testcase | 107 | | -11 @fif_codegen """ @@ -127,12 +115,7 @@ fun main() { """ main PROC:<{ // - PUSHNULL // i - ISNULL // '2 - IFJMP:<{ // - 1 PUSHINT // '3=1 - }> // - 10 PUSHINT // '4=10 + 1 PUSHINT // '3=1 }> """ diff --git a/tolk-tester/tests/nullable-tensors.tolk b/tolk-tester/tests/nullable-tensors.tolk index 4008482f..d0720273 100644 --- a/tolk-tester/tests/nullable-tensors.tolk +++ b/tolk-tester/tests/nullable-tensors.tolk @@ -73,7 +73,7 @@ fun test104() { var t1_1: (int, int)? = (1, 2); var t1_2: (int, int)? = t1_1; var t1_3: (int, int)? = t1_1!; - var t2_1: (int, int)? = null; + var t2_1: (int, int)? = getNullableTensor(null); var t2_2 = t2_1; return (t1_3, t2_2); } @@ -101,9 +101,12 @@ fun test108(x1: (int, int)) { incrementTensorComponents(mutate x1); x1.incrementTensorComponents(); var x2: (int, int)? = x1; + __expect_type(x2, "(int, int)"); x2.incrementNullableTensorComponents().incrementNullableTensorComponents(); incrementNullableTensorComponents(mutate x2); + __expect_type(x2, "(int, int)?"); var x3: (int, int)? = null; + __expect_type(x3, "null"); x3.incrementNullableTensorComponents().incrementNullableTensorComponents(); incrementNullableTensorComponents(mutate x3); return (x1, x2, x3); @@ -148,7 +151,7 @@ fun test111() { var x = (1, 2); assignFirstComponent(mutate x, 50); var x2: (int, int)? = null; - var x3 = x2; + var x3 = x2 as (int, int)?; assignFirstComponentNullable(mutate x2, 30); assignFirstComponentNullable(mutate x3, 70); g110_1 = (1, 2); @@ -361,23 +364,36 @@ fun test132() { return (result, 777, aln1, aln2, doubleNulls.1 == null, doubleNulls); } +@method_id(133) +fun test133() { + var x: (int, int)? = (10, 20); + return sumOfTensor(x) + x.0 + x.1; // smart casted +} + +@method_id(134) +fun test134(): (int, int)? { + var x: (int, int)? = (10, 20); + incrementTensorComponents(mutate x); // smart casted + return x; +} + fun getNormalNullableTensorWidth1(vLess100: int?): ([int?], ())? { - if (vLess100 != null && vLess100! >= 100) { + if (vLess100 != null && vLess100 >= 100) { return null; } return ([vLess100], ()); // such a nullable tensor can store NULL in the same slot } fun getTrickyNullableTensorWidth1(vLess100: int?): (int?, ())? { - if (vLess100 != null && vLess100! >= 100) { + if (vLess100 != null && vLess100 >= 100) { return null; } return (vLess100, ()); // such a nullable tensor requires an extra stack slot for null presence } fun getEvenTrickierNullableWidth1(vLess100: int?): ((), (int?, ()), ())? { - if (vLess100 != null && vLess100! >= 100) { + if (vLess100 != null && vLess100 >= 100) { return null; } return ((), (vLess100, ()), ()); @@ -406,35 +422,35 @@ fun main(){} /** @testcase | 101 | | 1 2 -1 @testcase | 102 | | 1 2 -1 (null) (null) 0 -@testcase | 103 | 1 2 | 3 3 0 1 2 -1 -@testcase | 104 | | 1 2 -1 (null) (null) 0 +@testcase | 103 | 1 2 | 3 3 0 1 2 +@testcase | 104 | | 1 2 (null) (null) 0 @testcase | 105 | | (null) (null) (null) 0 1 2 3 -1 -@testcase | 106 | | 1 2 -1 +@testcase | 106 | | 1 2 @testcase | 107 | | 0 0 -1 0 0 -1 @testcase | 108 | 5 6 | 7 8 10 11 -1 (null) (null) 0 @testcase | 109 | | 0 0 -1 0 -1 0 0 -1 -1 @testcase | 110 | | 3 4 (null) (null) 0 6 7 -1 @testcase | 111 | | 50 30 70 90 100 -@testcase | 112 | | 12 22 -1 +@testcase | 112 | | 12 22 @testcase | 113 | | -1 @testcase | 114 | | (null) (null) (null) 0 (null) (null) (null) 0 @testcase | 115 | | 2 3 7 (null) (null) 0 5 0 -1 0 @testcase | 116 | -1 | (null) (null) 0 (null) (null) 0 @testcase | 116 | 0 | 1 2 -1 1 2 -1 -@testcase | 117 | | (null) (null) 0 1 3 +@testcase | 117 | | (null) 1 3 @testcase | 118 | 5 | 5 10 -1 @testcase | 118 | null | (null) (null) 0 -@testcase | 119 | | (null) (null) 0 (null) (null) 0 1 2 -1 100 +@testcase | 119 | | (null) (null) 1 2 -1 100 @testcase | 120 | -1 | (null) (null) 0 @testcase | 120 | 0 | 1 2 -1 @testcase | 121 | | [ 1 [ 3 4 ] ] @testcase | 122 | 0 | [ 1 [ 3 4 ] 4 (null) ] @testcase | 122 | -1 | [ 1 (null) 4 (null) ] @testcase | 123 | | 1 3 4 -1 -@testcase | 124 | 0 | 1 3 4 -1 4 (null) (null) 0 -1 -@testcase | 124 | -1 | 1 (null) (null) 0 4 (null) (null) 0 -1 +@testcase | 124 | 0 | 1 3 4 -1 4 (null) (null) 0 +@testcase | 124 | -1 | 1 (null) (null) 0 4 (null) (null) 0 @testcase | 125 | | 3 -@testcase | 126 | | 1 (null) (null) 0 2 +@testcase | 126 | | 1 (null) 2 @testcase | 127 | 1 | 1 (null) (null) 0 2 @testcase | 127 | 2 | 1 2 3 -1 4 @testcase | 127 | 3 | 1 (null) (null) 0 5 @@ -447,6 +463,8 @@ fun main(){} @testcase | 130 | -1 | 1 (null) (null) 0 @testcase | 131 | | -1 777 0 777 777 777 0 0 -1 -1 777 -1 -1 -1 777 @testcase | 132 | | -1 0 -1 0 777 (null) (null) -1 0 0 +@testcase | 133 | | 60 +@testcase | 134 | | 11 21 -1 @testcase | 135 | | [ 10 ] [ (null) ] (null) 777 10 -1 (null) -1 (null) 0 777 10 -1 (null) -1 (null) 0 777 0 0 -1 0 0 -1 0 0 -1 777 0 -1 0 0 -1 0 @fif_codegen diff --git a/tolk-tester/tests/nullable-types.tolk b/tolk-tester/tests/nullable-types.tolk index ebabb80d..24aa7f8a 100644 --- a/tolk-tester/tests/nullable-types.tolk +++ b/tolk-tester/tests/nullable-types.tolk @@ -80,7 +80,7 @@ fun test107() { @method_id(108) fun test108() { var (a, b: cell?, c) = (1, beginCell().endCell(), 3); - b = null; + if (10>3) { b = null; } return a + (b == null ? 0 : b!.beginParse().loadInt(32)) + c; } diff --git a/tolk-tester/tests/smart-cast-tests.tolk b/tolk-tester/tests/smart-cast-tests.tolk new file mode 100644 index 00000000..4d71bb63 --- /dev/null +++ b/tolk-tester/tests/smart-cast-tests.tolk @@ -0,0 +1,678 @@ +// the goal of this file is not only to @testcase results — +// but to check that this file compiles + +fun getNullableInt(): int? { return 5; } +fun getNullableSlice(): slice? { return null; } +fun takeNullableInt(a: int?) {} +fun takeNullableSlice(a: slice?) {} +fun increment(mutate self: int) { self += 1; } +fun assignToInt(mutate self: int, value: int) { self = value; } +fun assignToNullableInt(mutate self: int?, value: int) { self = value; } +fun sameTensor(t: (int, int)) { return t; } +fun sameTensor2(t: (int?, (slice, slice, slice, builder)?)) { return t; } +fun eq(v: T) { return v; } +fun getTwo(): X { return 2 as X; } + +fun test1(): int { + var x = getNullableInt(); + var y = getNullableInt(); + if (x != null && y != null) { + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; + } + return -1; +} + +fun test2() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (x == null || y == null) { + return null; + } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test3(): int { + var ([x, y]) = [getNullableInt(), getNullableInt()]; + if (x != null) { + if (((y)) != null) { + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; + } + return x; + } + if (random() > -1) { + if (y == null) { return -1; } + else { return y; } + } + return 0; +} + +fun test4() { + var x = getNullableInt(); + if (x != null && x > 0) { + var x = getNullableInt(); + if ((x) != null && x + 10 < 0) { + var x = getNullableInt(); + return 10 > 3 && 10 < 10 && x != null && x + 8 > 10; + } + } + if (x != null && x < 1) { + return false; + } + if (x == null && x == null) { + __expect_type(x, "null"); + return true; + } + return x < x + 3; +} + +fun test5() { + var (a, (b, c)) = (getNullableInt(), (getNullableInt(), getNullableInt())); + if (a == null) { return -1; } + if (!(b != null)) { return -2; } + if (random() ? c == null && c == null : c == null) { return -3; } + return a + b + c; +} + +fun test6() { + var a: int? = 5; + __expect_type(a, "int"); + __expect_type(a != null ? a : null, "int"); + __expect_type(a == null ? "" : a, "int"); + takeNullableInt(a); + __expect_type(a, "int"); + if (random()) { + a = null; + } else { + if (random()) { a = null; } + else { a = null; } + } + __expect_type(a, "null"); + takeNullableSlice(a); // ok, `slice?` is `slice | null`, here a definitely null + var b: int? = true ? null : "sl"; + __expect_type(b, "null"); + takeNullableInt(b); + takeNullableSlice(b); // same reason + var c: int? = 10; + __expect_type(c, "int"); + takeNullableSlice(c = null); +} + +fun test7() { + var (a, b, c, d) = (getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()); + if (a == null && true) { return -1; } + if (true && true && 1 && !0 && b == null) { return -2; } + if (true ? c == null && (((c))) == null && true : false) { return -3; } + if (!true ? random() > 0 : a != null && (d == null && b != null)) { return -4; } + return a + b + c + d; +} + +fun test8(x: int?, y: int?) { + var allGt1 = x != null && x > 1 && y != null && y > 1; + var xGtY = x != null && y != null && x > y; + var xLtEq0 = x == null || x < 0; + (x = 0) < random() || x > 10; + return x + 0; +} + +fun test9() { + var x = getNullableInt(); + var y = getNullableInt(); + if (x == null || y == null) { + return -1; + } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test10(): int { + var (x, y) = (getNullableInt(), getNullableInt()); + if (x == null) { + if (y == null) { return -1; } + __expect_type(x, "null"); + __expect_type(y, "int"); + return y; + } + if (y == null) { + return x; + } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test11() { + var [x, y] = [getNullableInt(), getNullableInt()]; + if (random()) { return x == null || y == null ? -1 : x + y; } + if (true && (x == null || y == null) && !!true) { return 0; } + return x + y; +} + +fun test12() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (random() ? x == null || y == null : x == null || y == null) { return -1; } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test13() { + var x: int? = getNullableInt(); + var y: int? = 10; + var z = getNullableInt(); + var w = getNullableInt(); + beginCell().storeInt(x!, 32).storeInt(x = getNullableInt()!, 32).storeInt(x, 32) + .storeInt(y, 32).storeInt(z = 10, 32).storeInt(x + y + z, 32) + .storeInt(w == null ? -1 : w, 32).storeInt(!(null == w) ? w : -1, 32); +} + +fun test14() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (x == null) { + x = 0; + } + if (y == null) { + if (random()) { return 0; } + else { y = 0; } + } + return x + y; +} + +fun test20() { + var t = (getNullableInt(), getNullableInt()); + if (t.0 != null && t.1 != null) { + __expect_type(t.0, "int"); + __expect_type(t.1, "int"); + return t.0 + t.1; + } + t.0 = 10; + if (t.1 == null) { + t.1 = 20; + } + __expect_type(t.0, "int"); + __expect_type(t.1, "int"); + return t.0 + t.1; +} + +fun test21() { + var t = (getNullableInt(), (getNullableInt(), getNullableInt())); + if (t.0 != null && t.1.0 != null) { + if (t.1.1 != null) { return t.0 + t.1.0 + t.1.1; } + return t.0 + t.1.0; + } + if (t.0 != null) { + return t.0 + 0; + } + __expect_type(t.0, "null"); + __expect_type(t.1.0, "int?"); + return t.1.0 == null ? -1 : t.1.0 + 0; +} + +fun test22() { + var t = (getNullableInt(), (getNullableInt(), getNullableInt())); + if (t.0 == null || t.1.0 == null || t.1.1 == null) { + return -1; + } + return t.0 + t.1.0 + t.1.1; +} + +@method_id(123) +fun test23() { + var (x: int?, y: int?, z: int?) = (getNullableInt(), getNullableInt(), getNullableInt()); + ((x = 1, 0).0, (y = 2, 1).0) = (3, z = 4); + return x + y + z; +} + +@method_id(124) +fun test24(x: int?) { + if (x == null) { + __expect_type(x, "null"); + assignToNullableInt(mutate x, 10); + __expect_type(x, "int?"); + x.assignToNullableInt(x! + 5); + } else { + __expect_type(x, "int"); + increment(mutate x); + x.increment(); + __expect_type(x, "int"); + } + __expect_type(x, "int?"); + return x; +} + +fun test25() { + var x = (getNullableInt(), getNullableInt(), getNullableInt()); + x.0 = x.2 = random(); + return (x.0) + ((x.2)); +} + +fun test26() { + var x = [getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), + getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()]; + if (~(x.0 = random())) { return; } + if ((x.1 = random()) < (x.2 = random())) { return; } + else if (!(x.2 <=> (x.3 = random()))) { return; } + x.5 = (x.4 = random()) ? (x.6 = random()) : (x.6 = random()); + if ((x.7 = random()) as int) { return; } + if (((((x.8 = random()) != null)))) { return; } + if ([x.1, (x.9 = random())!].1) { return; } + val result = x.0+x.1+x.2+x.3+x.4+x.5+x.6+x.7+x.8+x.9; +} + +fun test27() { + var (x, _) = ([getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), + getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()], []); + +(x.0 = random()); + x.0 += [((x.1 = random()) < (x.2 = random() + x.1)) as int].0; + !(x.2 <=> (x.3 = random() + x.2)); + x.5 = (x.4 = random()) ? (x.6 = random()) : (x.6 = random()); + (x.7 = random()) as int; + (((((x.8 = random()) != null)))); + [x.1, (x.9 = random())!].1; + return x.0+x.1+x.2+x.3+x.4+x.5+x.6+x.7+x.8+x.9; +} + +fun test28() { + var x = (getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()); + __expect_type((x.0 = random(), x.0 += (x.1 = random()) as int, !(x.1 <=> (x.2 = random() + x.0)) == null, (x.3 = random()) ? x.3 : (!x.3) as int), + "(int, int, bool, int)"); +} + +fun test29() { + var x = (getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()); + __expect_type([x.0 = random(), ((x.0 += (x.1 = random()) as int)), !(x.1 <=> (x.2 = random() + x.0)) == null, (x.3 = random()) ? x.3 : (!x.3) as int], + "[int, int, bool, int]"); +} + +@method_id(130) +fun test30(initial5: bool) { + var t: (int?, (int?, (int?, int?))) = initial5 + ? (getNullableInt(), (getNullableInt(), (getNullableInt(), getNullableInt()))) + : (null, (null, (null, null))); + if (t.0 == null || t.1.0 == null || t.1.1.0 == null || t.1.1.1 == null) { + if (t.1.0 == null || t.1.1.0 == null) { + if (t.1.1.0 == null) { + t.1.1.0 = 4; + } + __expect_type(t.1.1.0, "int"); + __expect_type(t.1.1.1, "int?"); + __expect_type(t.1.0, "int?"); + t.1.1.1 = 3; + t.1.0 = 2; + __expect_type(t.1.1.1, "int"); + __expect_type(t.1.0, "int"); + } + if (((((t.1.1.1)))) != null) {} + else { t.1.1.1 = 3; } + t.0 = 1; + } + return t.0 + t.1.0 + t.1.1.0 + t.1.1.1; +} + +fun test31() { + var t = (getNullableInt(), getNullableInt()); + t.0 == null ? (t.0, t.1) = (1, 2) : (t.1, t.0) = (4, 3); + return t.0 + t.1; +} + +@method_id(132) +fun test32() { + var t: (int?, (int?, int?)?, (int?, int?)) = (getNullableInt(), (getNullableInt(), getNullableInt()), (getNullableInt(), getNullableInt())); + if (t.0 == null) { return -1; } + t.1 != null && t.1.0 == null ? t.1 = (1, 2) : t.1 = (3, 4); + if (t.2.1 != null) { t.2.0 = 1; t.2.1 = 2; } + else { [t.2.0, t.2.1] = [3, 4]; } + return t.0 + t.1.0! + t.1.1! + t.2.0 + t.2.1; +} + +@method_id(133) +fun test33(): int { + var x = getNullableInt(); + repeat (eq(x = 5)) { + __expect_type(x, "int"); + increment(mutate x); + } + return x; +} + +fun test34() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (random()) { throw (x = 1, y = 2); } + else { throw (x = 3, y = (1, getNullableInt()!).1); } + return x + y; +} + +fun test35() { + var (x, y, z, t) = (getNullableInt(), getNullableInt(), getNullableInt(), (getNullableInt(), getNullableInt())); + assert (x != null, 404); + assert (t.0 != null && true && !(t.1 == null) && !(z = 4)) throw (y = 404); + __expect_type(y, "int?"); + return x + t.0 + t.1 + z; +} + +fun test36() { + var x = getNullableInt(); + assert (x == null, x + 0); // check that x is int there + __expect_type(x, "null"); +} + +fun test37() { + var (x, code) = (getNullableInt()!, getNullableInt()); + try { + } catch(code) { + x = 20; + return x + code; // code is scoped + } + return code == null ? x : x + code; +} + +fun assignNull2(mutate x: T1?, mutate y: T2?) { + x = null; + y = null; +} + +fun test38() { + var (x: int?, y: int?) = (1, 2); + __expect_type(x, "int"); + __expect_type(y, "int"); + assignNull2(mutate x, mutate y); + __expect_type(x, "int?"); + __expect_type(y, "int?"); + if (x != null) { + if (y == null) { return -1; } + return x + y; + } + var t: (int?, slice?) = (null, null); + if (!false) { t.0 = 1; } + if (true) { t.1 = beginCell().endCell().beginParse(); } + __expect_type(t.0, "int"); + __expect_type(t.1, "slice"); + t.0 + t.1.loadInt(32); + assignNull2(mutate t.0, mutate t.1); + __expect_type(t.0, "int?"); + __expect_type(t.1, "slice?"); + t.0 != null && t.1 != null ? t.0 + loadInt(mutate t.1, 32) : -1; + return t.0 != null && t.1 != null ? t.0 + loadInt(mutate t.1, 32) : -1; +} + +@method_id(139) +fun test39() { + var x: (int?, int?)? = (4, null); + x.1 = 10; + x.1 += 1; + x!.1 += 1; + return (x!.0! + x.1); +} + +@method_id(140) +fun test40(second: int?) { + var x: (int?, int?)? = (4, second); + if (x.1 != null) { + val result = x.1 + x!.1 + x!!.1 + x.1! + x!!.1!!; + } + if (x!.1 != null) { + val result = x.1 + x!.1 + x!!.1 + x.1! + x!!.1!!; + } + if (!(x!!.1 != null)) { + return -1; + } + return x.1 + x!.1 + x!!.1 + x.1! + x!!.1!!; +} + +@method_id(141) +fun test41() { + var t: (int, int)? = null; + return sameTensor(t = (1, 2)); +} + +@method_id(142) +fun test42() { + var t: (int?, (int?, (int, int)?)?) = (getNullableInt(), (1, (2, 3))); + t.1 = (3,null); + __expect_type(t.1, "(int?, (int, int)?)"); + __expect_type(t, "(int?, (int?, (int, int)?)?)"); + return (t, t.1); +} + +@method_id(143) +fun test43() { + var t1: ((int, int), int?) = ((1, 2), 3); + var t2: ((int?, int?), (int?,int?)?) = ((null, null), (null, 5)); + t2.0 = t1.0 = (10, 11); + t2.1 = t1.1 = null; + return (t1, t2); +} + +@method_id(144) +fun test44() { + var t1: ((int, int), int?) = ((1, 2), 3); + var t2: ((int?, int?), (int?,int?)?) = ((null, null), (null, 5)); + t1.0 = t2.0 = (10, 11); + t1.1 = t2.1 = null; + __expect_type(t1, "((int, int), int?)"); + __expect_type(t2, "((int?, int?), (int?, int?)?)"); + return (t1, t2); +} + +@method_id(145) +fun test45() { + var t: (int?, (int?, (int, int)?)?) = (getNullableInt(), (1, (2, 3))); + var t2 = sameTensor2(t.1 = (3,null)); + return (t, t2, t.1); +} + +fun autoInfer46() { + var t1: int? = 3; + var t2: (int, int)? = (4, 5); + __expect_type(t1, "int"); + __expect_type(t2, "(int, int)"); + return (t1, t2); // proven to be not null, inferred (int, (int,int)) +} + +@method_id(146) +fun test46() { + var r46_1: (int, (int,int)) = autoInfer46(); + var r46_2: (int, (int,int)?) = autoInfer46(); + return (r46_1, r46_2); +} + +@method_id(147) +fun test47() { + var t1: int? = 3; + var t2: (int, int)? = (4, 5); + t1 = t2 = null; + __expect_type(t1, "null"); + __expect_type(t2, "null"); + var result = (t1, t2); // proven to be always null, inferred (null, null), 2 slots on a stack + return (result, 100, result.1, 100, t2 as (int, int)?); +} + +fun test48() { + var t1: int? = getNullableInt(); + if (t1 != null) { + var (t1 redef, t2) = (10, 5); + return t1 + t2; + var t2 redef = getNullableInt()!; + return t1 + t2; + } + return -1; +} + +fun test49(x: int?) { + while (x == null) { + x = getNullableInt(); + } + __expect_type(x, "int"); + return x + 1; +} + +fun test50() { + var (x: int?, y: int?) = (1, 2); + do { + x = getNullableInt(); + y = getNullableInt(); + } while (x == null || y == null); + return x + y; +} + +fun test51() { + while (true) { return; } + // test that no error "control reaches end of function" +} + +fun test52() { + do { } while (true); +} + +fun test53() { + var x1: int? = getNullableInt(); + var x2: int? = 5; + var x3: int? = 5; + var x10: int? = null; + var x11: int? = 5; + var x12: int? = 5; + while (x1 != null) { + __expect_type(x1, "int"); // because condition + __expect_type(x2, "int?"); // because re-assigned + __expect_type(x3, "int?"); // because re-assigned + __expect_type(x10, "null"); + __expect_type(x11, "int"); + x1 = getNullableInt(); + __expect_type(x1, "int?"); + assignToNullableInt(mutate x2, 5); + x3.assignToNullableInt(5); + x11 = 10; + assignToInt(mutate x12, 5); + } + __expect_type(x1, "null"); + __expect_type(x2, "int?"); + __expect_type(x3, "int?"); +} + +fun test54() { + var x1: int? = null; + var x2: int? = 5; + var x3: int? = 5; + var x10: int? = null; + var x11: int? = 5; + var x12: int? = 5; + do { + __expect_type(x1, "int?"); // because re-assigned + __expect_type(x2, "int?"); // because re-assigned + __expect_type(x3, "int?"); // because re-assigned + __expect_type(x10, "null"); + __expect_type(x11, "int"); + x1 = getNullableInt(); + __expect_type(x1, "int?"); + assignToNullableInt(mutate x2, 5); + if (random()) { x3.assignToNullableInt(5); } + x11 = 10; + assignToInt(mutate x12, 5); + } while (x1 != null); + __expect_type(x1, "null"); + __expect_type(x2, "int?"); + __expect_type(x3, "int?"); +} + +fun eq55(v: T) { return v; } + +fun test55() { + var x: int? = 4; + while (true) { + // currently, generic functions are instantiated at the type inferring step + // in case of loops, type inferring is re-enterable + // first iteration: x is int, eq instantiated + // second (final) iteration: x is int?, eq instantiated + // (checked via codegen) + eq55(x); + __expect_type(x, "int?"); // types are checked (unlike generics instantiated) after inferring + x = random() ? 1 : null; + } + __expect_type(x, "int?"); +} + +fun test56() { + var i: int? = null; + var (j: int?, k: int?) = (null, null); + __expect_type(i, "null"); + __expect_type(k, "null"); + i = getTwo(); + [j, ((k))] = [getTwo(), ((getTwo()))]; + __expect_type(i, "int?"); + __expect_type(j, "int?"); + __expect_type(k, "int?"); +} + +fun test57(mutate x: int?): int { + if (x == null) { x = 5; } + else { + if (x < 10) { x = 10; } + else { x = 20; } + } + if (x != null) { + return 123; + } + __expect_type(x, "int"); + // no "return" needed, because end of function is unreachable +} + +@method_id(158) +fun test58() { + var (x1, x2: int?) = (getNullableInt(), null); + return (test57(mutate x1), x1, test57(mutate x2), x2); +} + +fun test59() { + var (x1: int?, x2, x3) = (getNullableInt()!, getNullableInt(), 5); + if ((x2 = x3) != null) { + __expect_type(x2, "int"); + } + __expect_type(x2, "int"); + if ((x2 = getNullableInt()) != null) { + __expect_type(x2, "int"); + } + __expect_type(x2, "int?"); + if (((x1) = x2) == null) { + return; + } + __expect_type(x1, "int"); +} + + + +fun main(x: int?): int { + return x == null ? -1 : x; +} + +/** +@testcase | 0 | 1 | 1 +@testcase | 123 | | 7 +@testcase | 124 | 4 | 6 +@testcase | 124 | null | 15 +@testcase | 130 | -1 | 20 +@testcase | 130 | 0 | 10 +@testcase | 132 | | 15 +@testcase | 133 | | 10 +@testcase | 139 | | 16 +@testcase | 140 | 5 | 25 +@testcase | 141 | | 1 2 +@testcase | 142 | | 5 3 (null) (null) 0 -1 3 (null) (null) 0 +@testcase | 143 | | 10 11 (null) 10 11 (null) (null) 0 +@testcase | 144 | | 10 11 (null) 10 11 (null) (null) 0 +@testcase | 145 | | 5 3 (null) (null) 0 -1 3 (null) (null) (null) (null) 0 3 (null) (null) 0 +@testcase | 146 | | 3 4 5 3 4 5 -1 +@testcase | 147 | | (null) (null) 100 (null) 100 (null) (null) 0 +@testcase | 158 | | 123 10 123 5 + +@stderr warning: expression of type `int` is always not null, this condition is always true +@stderr warning: unreachable code +@stderr var t2 redef = getNullableInt()!; + +@fif_codegen eq55 PROC:<{ +@fif_codegen eq55 PROC:<{ +*/ diff --git a/tolk-tester/tests/unreachable-3.tolk b/tolk-tester/tests/unreachable-3.tolk new file mode 100644 index 00000000..fab21fd2 --- /dev/null +++ b/tolk-tester/tests/unreachable-3.tolk @@ -0,0 +1,22 @@ +fun main(x: int?) { + if (x != null && x == null) { + return 1 + 2; + } + if (x == null) { + return -1; + } + if (x != null) { + return -2; + } + return 3 + 4; +} + +/** +@testcase | 0 | 5 | -2 +@testcase | 0 | null | -1 + +@stderr warning: variable `x` of type `int` is always not null +@stderr if (x != null) +@stderr warning: unreachable code +@stderr return 3 + 4 + */ diff --git a/tolk-tester/tests/warnings-1.tolk b/tolk-tester/tests/warnings-1.tolk new file mode 100644 index 00000000..040057d1 --- /dev/null +++ b/tolk-tester/tests/warnings-1.tolk @@ -0,0 +1,28 @@ +fun getNullableInt(): int? { return null; } + +fun main() { + var c: int? = 6; + __expect_type(c, "int"); + if (c == null) {} + + var d: int? = c; + if (((d)) != null && tupleSize(createEmptyTuple())) {} + + var e: int? = getNullableInt(); + if (e != null) { + return true; + } + __expect_type(e, "null"); + null == e; + + return null != null; +} + +/** +@testcase | 0 | | 0 + +@stderr warning: variable `c` of type `int` is always not null, this condition is always false +@stderr warning: variable `d` of type `int` is always not null, this condition is always true +@stderr warning: variable `e` is always null, this condition is always true +@stderr warning: expression is always null, this condition is always false + */ diff --git a/tolk-tester/tests/warnings-2.tolk b/tolk-tester/tests/warnings-2.tolk new file mode 100644 index 00000000..57ecb21a --- /dev/null +++ b/tolk-tester/tests/warnings-2.tolk @@ -0,0 +1,26 @@ +fun main() { + var (a, b, c, d, e) = (1, beginCell(), beginCell().endCell().beginParse(), [1], true as bool?); + + var alwaysInt = a != null ? 1 : null; + __expect_type(alwaysInt, "int"); + + if (!(c == null)) { + if (10 < 3) { assert(b == null, 100); } + } + while (d == null || false) {} + + return e! != null; +} + +/** +@testcase | 0 | | -1 + +@stderr warning: variable `a` of type `int` is always not null, this condition is always true +@stderr warning: condition of ternary operator is always true +@stderr warning: variable `c` of type `slice` is always not null, this condition is always false +@stderr warning: condition of `if` is always true +@stderr warning: variable `b` of type `builder` is always not null, this condition is always false +@stderr warning: condition of `assert` is always false +@stderr warning: condition of `while` is always false +@stderr warning: expression of type `bool` is always not null, this condition is always true + */ diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 9d720024..de408115 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -12,8 +12,8 @@ set(TOLK_SOURCE pipe-register-symbols.cpp pipe-resolve-identifiers.cpp pipe-calc-rvalue-lvalue.cpp - pipe-detect-unreachable.cpp pipe-infer-types-and-calls.cpp + pipe-check-inferred-types.cpp pipe-refine-lvalue-for-mutate.cpp pipe-check-rvalue-lvalue.cpp pipe-check-pure-impure.cpp @@ -23,6 +23,7 @@ set(TOLK_SOURCE pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp type-system.cpp + smart-casts-cfg.cpp generics-helpers.cpp abscode.cpp analyzer.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 72da0ac8..fc160984 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -414,7 +414,7 @@ std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, s std::string null_flag_name = name.empty() ? name : name + ".NNFlag"; ir_idx = create_var(t_nullable->inner, loc, std::move(name)); ir_idx.emplace_back(create_var(TypeDataBool::create(), loc, std::move(null_flag_name))[0]); - } else if (var_type != TypeDataVoid::create()) { + } else if (var_type != TypeDataVoid::create() && var_type != TypeDataNever::create()) { #ifdef TOLK_DEBUG tolk_assert(stack_w == 1); #endif diff --git a/tolk/ast.cpp b/tolk/ast.cpp index 8f1aa98f..26eaacd5 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -117,6 +117,11 @@ void ASTNodeExpressionBase::assign_lvalue_true() { this->is_lvalue = true; } +void ASTNodeExpressionBase::assign_always_true_or_false(int flow_true_false_state) { + this->is_always_true = flow_true_false_state == 1; // see smart-casts-cfg.h + this->is_always_false = flow_true_false_state == 2; +} + void Vertex::assign_sym(const Symbol* sym) { this->sym = sym; } @@ -173,6 +178,10 @@ void Vertex::assign_is_negated(bool is_negated) { this->is_negated = is_negated; } +void Vertex::assign_first_unreachable(AnyV first_unreachable) { + this->first_unreachable = first_unreachable; +} + void Vertex::assign_target(const DotTarget& target) { this->target = target; } diff --git a/tolk/ast.h b/tolk/ast.h index cd410187..9b7c5d1a 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -186,11 +186,14 @@ struct ASTNodeExpressionBase : ASTNodeBase { TypePtr inferred_type = nullptr; bool is_rvalue: 1 = false; bool is_lvalue: 1 = false; + bool is_always_true: 1 = false; // inside `if`, `while`, ternary condition, `== null`, etc. + bool is_always_false: 1 = false; // (when expression is guaranteed to be always true or always false) ASTNodeExpressionBase* mutate() const { return const_cast(this); } void assign_inferred_type(TypePtr type); void assign_rvalue_true(); void assign_lvalue_true(); + void assign_always_true_or_false(int flow_true_false_state); ASTNodeExpressionBase(ASTNodeType type, SrcLocation loc) : ASTNodeBase(type, loc) {} }; @@ -734,10 +737,14 @@ template<> // example: do while body is a sequence struct Vertex final : ASTStatementVararg { SrcLocation loc_end; + AnyV first_unreachable = nullptr; const std::vector& get_items() const { return children; } AnyV get_item(int i) const { return children.at(i); } + Vertex* mutate() const { return const_cast(this); } + void assign_first_unreachable(AnyV first_unreachable); + Vertex(SrcLocation loc, SrcLocation loc_end, std::vector items) : ASTStatementVararg(ast_sequence, loc, std::move(items)) , loc_end(loc_end) {} diff --git a/tolk/generics-helpers.cpp b/tolk/generics-helpers.cpp index 86cdf82b..9dae3f00 100644 --- a/tolk/generics-helpers.cpp +++ b/tolk/generics-helpers.cpp @@ -119,14 +119,14 @@ TypePtr GenericSubstitutionsDeduceForCall::replace_by_manually_specified(TypePtr return replace_genericT_with_deduced(param_type, fun_ref->genericTs, substitutionTs); } -TypePtr GenericSubstitutionsDeduceForCall::auto_deduce_from_argument(SrcLocation loc, TypePtr param_type, TypePtr arg_type) { +TypePtr GenericSubstitutionsDeduceForCall::auto_deduce_from_argument(FunctionPtr cur_f, SrcLocation loc, TypePtr param_type, TypePtr arg_type) { try { if (!manually_specified) { consider_next_condition(param_type, arg_type); } return replace_genericT_with_deduced(param_type, fun_ref->genericTs, substitutionTs); } catch (const GenericDeduceError& ex) { - throw ParseError(loc, ex.message + " for generic function `" + fun_ref->as_human_readable() + "`; instantiate it manually with `" + fun_ref->name + "<...>()`"); + throw ParseError(cur_f, loc, ex.message + " for generic function `" + fun_ref->as_human_readable() + "`; instantiate it manually with `" + fun_ref->name + "<...>()`"); } } @@ -201,7 +201,6 @@ static void run_pipeline_for_instantiated_function(FunctionPtr inst_fun_ref) { // these pipes are exactly the same as in tolk.cpp — all preceding (and including) type inferring pipeline_resolve_identifiers_and_assign_symbols(inst_fun_ref); pipeline_calculate_rvalue_lvalue(inst_fun_ref); - pipeline_detect_unreachable_statements(inst_fun_ref); pipeline_infer_types_and_calls_and_fields(inst_fun_ref); } diff --git a/tolk/generics-helpers.h b/tolk/generics-helpers.h index 893bd98c..5ed245af 100644 --- a/tolk/generics-helpers.h +++ b/tolk/generics-helpers.h @@ -78,7 +78,7 @@ public: void provide_manually_specified(std::vector&& substitutionTs); TypePtr replace_by_manually_specified(TypePtr param_type) const; - TypePtr auto_deduce_from_argument(SrcLocation loc, TypePtr param_type, TypePtr arg_type); + TypePtr auto_deduce_from_argument(FunctionPtr cur_f, SrcLocation loc, TypePtr param_type, TypePtr arg_type); int get_first_not_deduced_idx() const; std::vector&& flush() { diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 269c5fe1..1561aa40 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -442,6 +442,21 @@ static std::vector transition_expr_to_runtime_type_impl(std::vectortry_as(); const TypeDataNullable* o_nullable = original_type->try_as(); + // handle `never` + // it may occur due to smart cast and in unreachable branches + // we can't do anything reasonable here, but (hopefully) execution will never reach this point, and stack won't be polluted + if (original_type == TypeDataNever::create()) { + std::vector dummy_rvect; + dummy_rvect.reserve(target_w); + for (int i = 0; i < target_w; ++i) { + dummy_rvect.push_back(code.create_tmp_var(TypeDataUnknown::create(), loc, "(never)")[0]); + } + return dummy_rvect; + } + if (target_type == TypeDataNever::create()) { + return {}; + } + // pass `null` to `T?` // for primitives like `int?`, no changes in rvect, null occupies the same TVM slot // for tensors like `(int,int)?`, `null` is represented as N nulls + 1 null flag, insert N nulls @@ -493,6 +508,8 @@ static std::vector transition_expr_to_runtime_type_impl(std::vectorcan_rhs_be_assigned(target_type)) { tolk_assert(o_nullable || original_type == TypeDataUnknown::create()); if (o_nullable && !o_nullable->is_primitive_nullable()) { @@ -502,10 +519,12 @@ static std::vector transition_expr_to_runtime_type_impl(std::vectoris_primitive_nullable()) { rvect.pop_back(); @@ -572,6 +591,17 @@ static std::vector transition_to_target_type(std::vector&& return rvect; } +// the second overload of the same function, invoke impl only when original and target differ +#ifndef TOLK_DEBUG +GNU_ATTRIBUTE_ALWAYS_INLINE +#endif +static std::vector transition_to_target_type(std::vector&& rvect, CodeBlob& code, TypePtr original_type, TypePtr target_type, SrcLocation loc) { + if (target_type != original_type) { + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, original_type, target_type, loc); + } + return rvect; +} + std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { if (GlobalVarPtr glob_ref = sym->try_as()) { @@ -617,20 +647,33 @@ std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, Co static std::vector process_reference(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { std::vector rvect = pre_compile_symbol(v->loc, v->sym, code, lval_ctx); + + // a local variable might be smart cast at this point, for example we're in `if (v != null)` + // it means that we must drop the null flag (if it's a tensor), or maybe perform other stack transformations + // (from original var_ref->ir_idx to fit smart cast) + if (LocalVarPtr var_ref = v->sym->try_as()) { + // note, inside `if (v != null)` when `v` is used for writing, v->inferred_type is an original (declared_type) + // (smart casts apply only for rvalue, not for lvalue, we don't check it here, it's a property of inferring) + rvect = transition_to_target_type(std::move(rvect), code, var_ref->declared_type, v->inferred_type, v->loc); + } + return transition_to_target_type(std::move(rvect), code, target_type, v); } static std::vector process_assignment(V v, CodeBlob& code, TypePtr target_type) { - if (auto lhs_decl = v->get_lhs()->try_as()) { - std::vector rvect = pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + + if (auto lhs_decl = lhs->try_as()) { + std::vector rvect = pre_compile_let(code, lhs_decl->get_expr(), rhs, v->loc); return transition_to_target_type(std::move(rvect), code, target_type, v); } else { - std::vector rvect = pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); + std::vector rvect = pre_compile_let(code, lhs, rhs, v->loc); // now rvect contains rhs IR vars constructed to fit lhs (for correct assignment, lhs type was target_type for rhs) // but the type of `lhs = rhs` is RHS (see type inferring), so rvect now should fit rhs->inferred_type (= v->inferred_type) // example: `t1 = t2 = null`, we're at `t2 = null`, earlier declared t1: `int?`, t2: `(int,int)?` // currently "null" matches t2 (3 null slots), but type of this assignment is "plain null" (1 slot) assigned later to t1 - rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, v->get_lhs()->inferred_type, v->inferred_type, v->loc); + rvect = transition_to_target_type(std::move(rvect), code, lhs->inferred_type, v->inferred_type, v->loc); return transition_to_target_type(std::move(rvect), code, target_type, v); } } @@ -692,13 +735,21 @@ static std::vector process_ternary_operator(V v std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); tolk_assert(cond.size() == 1); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); - Op& if_op = code.emplace_back(v->loc, Op::_If, cond); - code.push_set_cur(if_op.block0); - code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code, v->inferred_type)); - code.close_pop_cur(v->get_when_true()->loc); - code.push_set_cur(if_op.block1); - code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code, v->inferred_type)); - code.close_pop_cur(v->get_when_false()->loc); + + if (v->get_cond()->is_always_true) { + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code, v->inferred_type)); + } else if (v->get_cond()->is_always_false) { + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code, v->inferred_type)); + } else { + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code, v->inferred_type)); + code.close_pop_cur(v->get_when_true()->loc); + code.push_set_cur(if_op.block1); + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code, v->inferred_type)); + code.close_pop_cur(v->get_when_false()->loc); + } + return transition_to_target_type(std::move(rvect), code, target_type, v); } @@ -768,6 +819,10 @@ static std::vector process_dot_access(V v, CodeBlob& stack_offset += t_tensor->items[i]->get_width_on_stack(); } std::vector rvect{lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width}; + // a tensor index might be smart cast at this point, for example we're in `if (t.1 != null)` + // it means that we must drop the null flag (if `t.1` is a tensor), or maybe perform other stack transformations + // (from original rvect = (vars of t.1) to fit smart cast) + rvect = transition_to_target_type(std::move(rvect), code, t_tensor->items[index_at], v->inferred_type, v->loc); return transition_to_target_type(std::move(rvect), code, target_type, v); } // `tupleVar.0` @@ -1090,8 +1145,19 @@ static void process_repeat_statement(V v, CodeBlob& code) } static void process_if_statement(V v, CodeBlob& code) { - std::vector tmp_vars = pre_compile_expr(v->get_cond(), code, nullptr); - Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars)); + std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); + tolk_assert(cond.size() == 1); + + if (v->get_cond()->is_always_true) { + process_any_statement(v->get_if_body(), code); // v->is_ifnot does not matter here + return; + } + if (v->get_cond()->is_always_false) { + process_any_statement(v->get_else_body(), code); + return; + } + + Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(cond)); code.push_set_cur(if_op.block0); process_any_statement(v->get_if_body(), code); code.close_pop_cur(v->get_if_body()->loc_end); @@ -1192,6 +1258,10 @@ static void process_return_statement(V v, CodeBlob& code) code.emplace_back(v->loc, Op::_Return, std::move(return_vars)); } +// append "return" (void) to the end of the function +// if it's not reachable, it will be dropped +// (IR cfg reachability may differ from FlowContext in case of "never" types, so there may be situations, +// when IR will consider this "return" reachable and leave it, but actually execution will never reach it) static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code) { std::vector mutated_vars; if (code.fun_ref->has_mutate_params()) { @@ -1256,9 +1326,7 @@ static void convert_function_body_to_CodeBlob(FunctionPtr fun_ref, FunctionBodyC for (AnyV item : v_body->get_items()) { process_any_statement(item, *blob); } - if (fun_ref->is_implicit_return()) { - append_implicit_return_statement(v_body->loc_end, *blob); - } + append_implicit_return_statement(v_body->loc_end, *blob); blob->close_blk(v_body->loc_end); code_body->set_code(blob); diff --git a/tolk/pipe-check-inferred-types.cpp b/tolk/pipe-check-inferred-types.cpp new file mode 100644 index 00000000..bae67c5f --- /dev/null +++ b/tolk/pipe-check-inferred-types.cpp @@ -0,0 +1,586 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" +#include "type-system.h" + +namespace tolk { + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(TypePtr type) { + return "`" + type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(AnyExprV v_with_type) { + return "`" + v_with_type->inferred_type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string expression_as_string(AnyExprV v) { + if (auto v_ref = v->try_as()) { + if (v_ref->sym->try_as() || v_ref->sym->try_as()) { + return "variable `" + static_cast(v_ref->get_identifier()->name) + "`"; + } + } + if (auto v_par = v->try_as()) { + return expression_as_string(v_par->get_expr()); + } + return "expression"; +} + +// fire a general "type mismatch" error, just a wrapper over `throw` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire(FunctionPtr cur_f, SrcLocation loc, const std::string& message) { + throw ParseError(cur_f, loc, message); +} + +// fire an error on `!cell` / `+slice` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(FunctionPtr cur_f, SrcLocation loc, std::string_view operator_name, AnyExprV unary_expr) { + std::string op = static_cast(operator_name); + fire(cur_f, loc, "can not apply operator `" + op + "` to " + to_string(unary_expr->inferred_type)); +} + +// fire an error on `int + cell` / `slice & int` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(FunctionPtr cur_f, SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { + std::string op = static_cast(operator_name); + fire(cur_f, loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type)); +} + +GNU_ATTRIBUTE_NOINLINE +static void warning_condition_always_true_or_false(FunctionPtr cur_f, SrcLocation loc, AnyExprV cond, const char* operator_name) { + loc.show_warning("condition of " + static_cast(operator_name) + " is always " + (cond->is_always_true ? "true" : "false")); +} + +// given `f(x: int)` and a call `f(expr)`, check that expr_type is assignable to `int` +static void check_function_argument_passed(FunctionPtr cur_f, TypePtr param_type, AnyExprV ith_arg, bool is_obj_of_dot_call) { + if (!param_type->can_rhs_be_assigned(ith_arg->inferred_type)) { + if (is_obj_of_dot_call) { + fire(cur_f, ith_arg->loc, "can not call method for " + to_string(param_type) + " with object of type " + to_string(ith_arg)); + } else { + fire(cur_f, ith_arg->loc, "can not pass " + to_string(ith_arg) + " to " + to_string(param_type)); + } + } +} + +// given `f(x: mutate int?)` and a call `f(expr)`, check that `int?` is assignable to expr_type +// (for instance, can't call `f(mutate intVal)`, since f can potentially assign null to it) +static void check_function_argument_mutate_back(FunctionPtr cur_f, TypePtr param_type, AnyExprV ith_arg, bool is_obj_of_dot_call) { + if (!ith_arg->inferred_type->can_rhs_be_assigned(param_type)) { + if (is_obj_of_dot_call) { + fire(cur_f, ith_arg->loc,"can not call method for mutate " + to_string(param_type) + " with object of type " + to_string(ith_arg) + ", because mutation is not type compatible"); + } else { + fire(cur_f, ith_arg->loc,"can not pass " + to_string(ith_arg) + " to mutate " + to_string(param_type) + ", because mutation is not type compatible"); + } + } +} + +// fire an error on `var n = null` +// technically it's correct, type of `n` is TypeDataNullLiteral, but it's not what the user wanted +// so, it's better to see an error on assignment, that later, on `n` usage and types mismatch +// (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_assign_always_null_to_variable(FunctionPtr cur_f, SrcLocation loc, LocalVarPtr assigned_var, bool is_assigned_null_literal) { + std::string var_name = assigned_var->name; + fire(cur_f, loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); +} + +// fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_put_non1_stack_width_arg_to_tuple(FunctionPtr cur_f, SrcLocation loc, TypePtr inferred_type) { + fire(cur_f, loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->get_width_on_stack()) + " stack slots in TVM, not 1"); +} + +// handle __expect_type(expr, "type") call +// this is used in compiler tests +GNU_ATTRIBUTE_NOINLINE GNU_ATTRIBUTE_COLD +static void handle_possible_compiler_internal_call(FunctionPtr cur_f, V v) { + FunctionPtr fun_ref = v->fun_maybe; + tolk_assert(fun_ref && fun_ref->is_builtin_function()); + + if (fun_ref->name == "__expect_type") { + tolk_assert(v->get_num_args() == 2); + TypePtr expected_type = parse_type_from_string(v->get_arg(1)->get_expr()->as()->str_val); + TypePtr expr_type = v->get_arg(0)->inferred_type; + if (expected_type != expr_type) { + fire(cur_f, v->loc, "__expect_type failed: expected " + to_string(expected_type) + ", got " + to_string(expr_type)); + } + } +} + +static bool expect_integer(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataInt::create(); +} + +static bool expect_boolean(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataBool::create(); +} + + +class CheckInferredTypesVisitor final : public ASTVisitorFunctionBody { + FunctionPtr cur_f = nullptr; // may be nullptr if checking `const a = ...` init_value + +protected: + void visit(V v) override { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + parent::visit(lhs); + parent::visit(rhs); + + // all operators (+=, etc.) can work for integers (if both sides are integers) + bool types_ok = expect_integer(lhs) && expect_integer(rhs); + // bitwise operators &= |= ^= are "overloaded" for booleans also (if both sides are booleans) + if (!types_ok && (v->tok == tok_set_bitwise_and || v->tok == tok_set_bitwise_or || v->tok == tok_set_bitwise_xor)) { + types_ok = expect_boolean(lhs) && expect_boolean(rhs); + } + // using += for other types (e.g. `tensorVar += tensorVar`) is not allowed + if (!types_ok) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + } + + void visit(V v) override { + AnyExprV rhs = v->get_rhs(); + parent::visit(rhs); + + switch (v->tok) { + case tok_logical_not: + if (!expect_integer(rhs) && !expect_boolean(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, rhs); + } + break; + default: + if (!expect_integer(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, rhs); + } + } + } + + void visit(V v) override { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + parent::visit(lhs); + parent::visit(rhs); + + switch (v->tok) { + // == != can compare both integers and booleans, (int == bool) is NOT allowed + // note, that `int?` and `int?` can't be compared, since Fift `EQUAL` works with integers only + // (if to allow `int?` in the future, `==` must be expressed in a complicated Fift code considering TVM NULL) + case tok_eq: + case tok_neq: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + if (lhs->inferred_type == rhs->inferred_type) { // compare slice with slice, int? with int? + fire(cur_f, v->loc, "type " + to_string(lhs) + " can not be compared with `== !=`"); + } else { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + } + break; + } + // < > can compare only strict integers + case tok_lt: + case tok_gt: + case tok_leq: + case tok_geq: + case tok_spaceship: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + break; + // & | ^ are "overloaded" both for integers and booleans, (int & bool) is NOT allowed + case tok_bitwise_and: + case tok_bitwise_or: + case tok_bitwise_xor: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + break; + } + // && || can work with integers and booleans, (int && bool) is allowed, (int16 && int32) also + case tok_logical_and: + case tok_logical_or: { + bool lhs_ok = expect_integer(lhs) || expect_boolean(lhs); + bool rhs_ok = expect_integer(rhs) || expect_boolean(rhs); + if (!lhs_ok || !rhs_ok) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + break; + } + // others are mathematical: + * ... + default: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + } + } + + void visit(V v) override { + parent::visit(v->get_expr()); + + if (!v->get_expr()->inferred_type->can_be_casted_with_as_operator(v->cast_to_type)) { + fire(cur_f, v->loc, "type " + to_string(v->get_expr()) + " can not be cast to " + to_string(v->cast_to_type)); + } + } + + void visit(V v) override { + parent::visit(v->get_expr()); + + if (v->get_expr()->inferred_type == TypeDataNullLiteral::create()) { + // operator `!` used for always-null (proven by smart casts, for example), it's an error + fire(cur_f, v->loc, "operator `!` used for always null expression"); + } + // if operator `!` used for non-nullable, probably a warning should be printed + } + + void visit(V v) override { + parent::visit(v->get_expr()); + + if ((v->is_always_true && !v->is_negated) || (v->is_always_false && v->is_negated)) { + v->loc.show_warning(expression_as_string(v->get_expr()) + " is always null, this condition is always " + (v->is_always_true ? "true" : "false")); + } + if ((v->is_always_false && !v->is_negated) || (v->is_always_true && v->is_negated)) { + v->loc.show_warning(expression_as_string(v->get_expr()) + " of type " + to_string(v->get_expr()) + " is always not null, this condition is always " + (v->is_always_true ? "true" : "false")); + } + } + + void visit(V v) override { + parent::visit(v); + + for (int i = 0; i < v->size(); ++i) { + AnyExprV item = v->get_item(i); + if (item->inferred_type->get_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(cur_f, v->get_item(i)->loc, item->inferred_type); + } + } + } + + void visit(V v) override { + parent::visit(v); + + TypePtr obj_type = v->get_obj()->inferred_type; + if (v->is_target_indexed_access()) { + if (obj_type->try_as() && v->inferred_type->get_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(cur_f, v->loc, v->inferred_type); + } + } + } + + void visit(V v) override { + parent::visit(v); // check against type mismatch inside nested arguments + + FunctionPtr fun_ref = v->fun_maybe; + if (!fun_ref) { + // `local_var(args)` and similar + const TypeDataFunCallable* f_callable = v->get_callee()->inferred_type->try_as(); + tolk_assert(f_callable && f_callable->params_size() == v->get_num_args()); + for (int i = 0; i < v->get_num_args(); ++i) { + auto arg_i = v->get_arg(i)->get_expr(); + TypePtr param_type = f_callable->params_types[i]; + if (!param_type->can_rhs_be_assigned(arg_i->inferred_type)) { + fire(cur_f, arg_i->loc, "can not pass " + to_string(arg_i) + " to " + to_string(param_type)); + } + } + return; + } + + // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) + int delta_self = 0; + AnyExprV dot_obj = nullptr; + if (auto v_dot = v->get_callee()->try_as()) { + delta_self = 1; + dot_obj = v_dot->get_obj(); + } + + if (dot_obj) { + const LocalVarData& param_0 = fun_ref->parameters[0]; + TypePtr param_type = param_0.declared_type; + check_function_argument_passed(cur_f, param_type, dot_obj, true); + if (param_0.is_mutate_parameter()) { + check_function_argument_mutate_back(cur_f, param_type, dot_obj, true); + } + } + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& param_i = fun_ref->parameters[delta_self + i]; + AnyExprV arg_i = v->get_arg(i)->get_expr(); + TypePtr param_type = param_i.declared_type; + check_function_argument_passed(cur_f, param_type, arg_i, false); + if (param_i.is_mutate_parameter()) { + check_function_argument_mutate_back(cur_f, param_type, arg_i, false); + } + } + + if (fun_ref->is_builtin_function() && fun_ref->name[0] == '_') { + handle_possible_compiler_internal_call(cur_f, v); + } + } + + void visit(V v) override { + parent::visit(v->get_lhs()); + parent::visit(v->get_rhs()); + + process_assignment_lhs(v->get_lhs(), v->get_rhs()->inferred_type, v->get_rhs()); + } + + // handle (and dig recursively) into `var lhs = rhs` + // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` + // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) + // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 + void process_assignment_lhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; + + // `var ... = rhs` - dig into left part + if (auto lhs_decl = lhs->try_as()) { + process_assignment_lhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); + return; + } + + // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") + if (auto lhs_var = lhs->try_as()) { + TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr) + if (lhs_var->marked_as_redef) { + tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type); + declared_type = lhs_var->var_ref->declared_type; + } + if (declared_type) { + if (!declared_type->can_rhs_be_assigned(rhs_type)) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); + } + } else { + if (rhs_type == TypeDataNullLiteral::create()) { + fire_error_assign_always_null_to_variable(cur_f, err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); + } + } + return; + } + + // `(v1, v2) = rhs` / `var (v1, v2) = rhs` (rhs may be `(1,2)` or `tensorVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tensor + if (auto lhs_tensor = lhs->try_as()) { + const TypeDataTensor* rhs_type_tensor = rhs_type->try_as(); + if (!rhs_type_tensor) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to a tensor"); + } + if (lhs_tensor->size() != rhs_type_tensor->size()) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + for (int i = 0; i < lhs_tensor->size(); ++i) { + process_assignment_lhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); + } + return; + } + + // `[v1, v2] = rhs` / `var [v1, v2] = rhs` (rhs may be `[1,2]` or `tupleVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tuple + if (auto lhs_tuple = lhs->try_as()) { + const TypeDataTypedTuple* rhs_type_tuple = rhs_type->try_as(); + if (!rhs_type_tuple) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to a tuple"); + } + if (lhs_tuple->size() != rhs_type_tuple->size()) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + for (int i = 0; i < lhs_tuple->size(); ++i) { + process_assignment_lhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); + } + return; + } + + // check `untypedTuple.0 = rhs_tensor` and other non-1 width elements + if (auto lhs_dot = lhs->try_as()) { + if (lhs_dot->is_target_indexed_access() && lhs_dot->get_obj()->inferred_type == TypeDataTuple::create()) { + if (rhs_type->get_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(cur_f, err_loc->loc, rhs_type); + } + } + } + + // here is `v = rhs` (just assignment, not `var v = rhs`) / `a.0 = rhs` / `getObj(z=f()).0 = rhs` etc. + // types were already inferred, so just check their compatibility + // for strange lhs like `f() = rhs` type checking will pass, but will fail lvalue check later + if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { + if (lhs->try_as()) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); + } else { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); + } + } + } + + void visit(V v) override { + parent::visit(v->get_return_value()); + + if (cur_f->does_return_self()) { + if (!is_expr_valid_as_return_self(v->get_return_value())) { + fire(cur_f, v->loc, "invalid return from `self` function"); + } + return; + } + + TypePtr expr_type = v->get_return_value()->inferred_type; + if (!cur_f->inferred_return_type->can_rhs_be_assigned(expr_type)) { + fire(cur_f, v->get_return_value()->loc, "can not convert type " + to_string(expr_type) + " to return type " + to_string(cur_f->inferred_return_type)); + } + } + + static bool is_expr_valid_as_return_self(AnyExprV return_expr) { + // `return self` + if (return_expr->type == ast_reference && return_expr->as()->get_name() == "self") { + return true; + } + // `return self.someMethod()` + if (auto v_call = return_expr->try_as(); v_call && v_call->is_dot_call()) { + return v_call->fun_maybe && v_call->fun_maybe->does_return_self() && is_expr_valid_as_return_self(v_call->get_dot_obj()); + } + // `return cond ? ... : ...` + if (auto v_ternary = return_expr->try_as()) { + return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); + } + return false; + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "ternary operator"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`if`"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond)) { + fire(cur_f, cond->loc, "condition of `repeat` must be an integer, got " + to_string(cond)); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`while`"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`do while`"); + } + } + + void visit(V v) override { + parent::visit(v); + + if (!expect_integer(v->get_thrown_code())) { + fire(cur_f, v->get_thrown_code()->loc, "excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); + } + if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->get_width_on_stack() != 1) { + fire(cur_f, v->get_thrown_arg()->loc, "can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + if (!expect_integer(v->get_thrown_code())) { + fire(cur_f, v->get_thrown_code()->loc, "thrown excNo of `assert` must be an integer, got " + to_string(v->get_thrown_code())); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`assert`"); + } + } + + void visit(V v) override { + parent::visit(v); + + if (v->first_unreachable) { + // it's essential to print "unreachable code" warning AFTER type checking + // (printing it while inferring might be a false positive if types are incorrect, due to smart casts for example) + // a more correct approach would be to access cfg here somehow, but since cfg is now available only while inferring, + // a special v->first_unreachable was set specifically for this warning (again, which is correct if types match) + v->first_unreachable->loc.show_warning("unreachable code"); + } + } + + public: + bool should_visit_function(FunctionPtr fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } + + void start_visiting_function(FunctionPtr fun_ref, V v_function) override { + cur_f = fun_ref; + parent::visit(v_function->get_body()); + cur_f = nullptr; + + if (fun_ref->is_implicit_return() && fun_ref->declared_return_type) { + if (!fun_ref->declared_return_type->can_rhs_be_assigned(TypeDataVoid::create()) || fun_ref->does_return_self()) { + fire(fun_ref, v_function->get_body()->as()->loc_end, "missing return"); + } + } + } +}; + +void pipeline_check_inferred_types() { + visit_ast_of_all_functions(); +} + +} // namespace tolk diff --git a/tolk/pipe-detect-unreachable.cpp b/tolk/pipe-detect-unreachable.cpp deleted file mode 100644 index 041e5581..00000000 --- a/tolk/pipe-detect-unreachable.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - This file is part of TON Blockchain source code. - - TON Blockchain is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - TON Blockchain is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with TON Blockchain. If not, see . -*/ -#include "tolk.h" -#include "ast.h" -#include "ast-visitor.h" - -/* - * This pipe does two things: - * 1) detects unreachable code and prints warnings about it - * example: `fun main() { if(1){return;}else{return;} var x = 0; }` — var is unreachable - * 2) if control flow reaches end of function, store a flag to insert an implicit return - * example: `fun main() { assert(...); }` — has an implicit `return ()` statement before a brace - * - * Note, that it does not delete unreachable code, only prints warnings. - * Actual deleting is done much later (in "legacy" part), after AST is converted to Op. - * - * Note, that it's not CFG, it's just a shallow reachability detection. - * In the future, a true CFG should be introduced. For instance, in order to have nullable types, - * I'll need to implement smart casts. Then I'll think of a complicated granular control flow graph, - * considering data flow and exceptions (built before type inferring, of course), - * and detecting unreachable code will be a part of it. - */ - -namespace tolk { - -class UnreachableStatementsDetectVisitor final { - bool always_returns(AnyV v) { - switch (v->type) { - case ast_sequence: return always_returns(v->as()); - case ast_return_statement: return always_returns(v->as()); - case ast_throw_statement: return always_returns(v->as()); - case ast_function_call: return always_returns(v->as()); - case ast_repeat_statement: return always_returns(v->as()); - case ast_while_statement: return always_returns(v->as()); - case ast_do_while_statement: return always_returns(v->as()); - case ast_try_catch_statement: return always_returns(v->as()); - case ast_if_statement: return always_returns(v->as()); - default: - // unhandled statements (like assert) and statement expressions - return false; - } - } - - bool always_returns(V v) { - bool always = false; - for (AnyV item : v->get_items()) { - if (always && item->type != ast_empty_statement) { - item->loc.show_warning("unreachable code"); - break; - } - always |= always_returns(item); - } - return always; - } - - static bool always_returns([[maybe_unused]] V v) { - // quite obvious: `return expr` interrupts control flow - return true; - } - - static bool always_returns([[maybe_unused]] V v) { - // todo `throw excNo` currently does not interrupt control flow - // (in other words, `throw 1; something` - something is reachable) - // the reason is that internally it's transformed to a call of built-in function __throw(), - // which is a regular function, like __throw_if() or loadInt() - // to fix this later on, it should be deeper, introducing Op::_Throw for example, - // to make intermediate representations and stack optimizer also be aware that after it there is unreachable - return false; - } - - static bool always_returns([[maybe_unused]] V v) { - // neither annotations like @noreturn nor auto-detection of always-throwing functions also doesn't exist - // in order to do this in the future, it should be handled not only at AST/CFG level, - // but inside Op and low-level optimizer (at least if reachability detection is not moved out of there) - // see comments for `throw` above, similar to this case - return false; - } - - bool always_returns(V v) { - return always_returns(v->get_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_try_body()) && always_returns(v->get_catch_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_if_body()) && always_returns(v->get_else_body()); - } - -public: - static bool should_visit_function(FunctionPtr fun_ref) { - return fun_ref->is_code_function() && !fun_ref->is_generic_function(); - } - - void start_visiting_function(FunctionPtr fun_ref, V v_function) { - bool control_flow_reaches_end = !always_returns(v_function->get_body()->as()); - if (control_flow_reaches_end) { - fun_ref->mutate()->assign_is_implicit_return(); - } - } -}; - - -void pipeline_detect_unreachable_statements() { - visit_ast_of_all_functions(); -} - -void pipeline_detect_unreachable_statements(FunctionPtr fun_ref) { - UnreachableStatementsDetectVisitor visitor; - if (UnreachableStatementsDetectVisitor::should_visit_function(fun_ref)) { - visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); - } -} - -} // namespace tolk diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index 2f4290d6..7ab0aa1c 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -20,20 +20,22 @@ #include "ast-visitor.h" #include "generics-helpers.h" #include "type-system.h" +#include "smart-casts-cfg.h" /* * This is a complicated and crucial part of the pipeline. It simultaneously does the following: * * infers types of all expressions; example: `2 + 3` both are TypeDataInt, result is also - * * AND checks types for assignment, arguments passing, etc.; example: `fInt(cs)` is error passing slice to int * * AND binds function/method calls (assigns fun_ref); example: `globalF()`, fun_ref is assigned to `globalF` (unless generic) * * AND instantiates generic functions; example: `t.tuplePush(2)` creates `tuplePush` and assigns fun_ref to dot field * * AND infers return type of functions if it's omitted (`fun f() { ... }` means "auto infer", not "void") + * * AND builds data flow graph, mostly used for smart casts (right at the time of inferring) + * Note, that type checking (errors about types mismatch) is a later compilation step, due to loops. * * It's important to do all these parts simultaneously, they can't be split or separated. * For example, we can't bind `f(2)` earlier, because if `f` is a generic `f`, we should instantiate it, * and in order to do it, we need to know argument types. - * For example, we can't bind `c.cellHash()` earlier, because in the future we'll have overloads (`cell.hash()` and `slice.hash()`), - * and in order to bind it, we need to know object type. + * For example, we can't bind `c.cellHash()` earlier, because in order to bind it, we need to know object type. + * For example, we can't infer `var y = x` without smart casts, because if x's type is refined, it affects y. * And vice versa, to infer type of expression in the middle, we need to have inferred all expressions preceding it, * which may also include generics, etc. * @@ -52,6 +54,36 @@ * Example: `fun tupleAt(t: tuple, idx: int):T`, just `t.tupleGet(2)` can't be deduced (T left unspecified), * but for assignment with left-defined type, or a call to `fInt(t.tupleGet(2))` hint "int" helps deduce T. * + * Control flow is represented NOT as a "graph with edges". Instead, it's a "structured DFS" for the AST: + * 1) at every point of inferring, we have "current flow facts" (FlowContext) + * 2) when we see an `if (...)`, we create two derived contexts (by cloning current) + * 3) after `if`, finalize them at the end and unify + * 4) if we detect unreachable code, we mark that path's context as "unreachable" + * In other words, we get the effect of a CFG but in a more direct approach. That's enough for AST-level data-flow. + * FlowContext contains "data-flow facts that are definitely known". + * // current facts: x is int?, t is (int, int) + * if (x != null && t.0 > 0) + * // current facts: x is int, t is (int, int), t.0 is positive + * else + * // current facts: x is null, t is (int, int), t.0 is not positive + * When branches rejoin, facts are merged back (int+null = int? and so on, here they would be equal to before if). + * See smart-casts-cfg.cpp for detailed comments. + * + * About loops and partial re-entering. Consider the following: + * var x: int? = 5; + * // <- here x is `int` (smart cast) + * while (true) { + * // <- but here x is `int?` (not `int`) due to assignment in a loop + * if (...) { x = getNullableInt(); } + * } + * When building control flow, loops are inferred twice. In the above, at first iteration, x will be `int`, + * but at the second, x will be `int?` (after merged with loop end). + * That's why type checking is done later, not to make false errors on the first iteration. + * Note, that it would also be better to postpone generics "materialization" also: here only to infer type arguments, + * but to instantiate and re-assign fun_ref later. But it complicates the architecture significantly. + * For now, generics may encounter problems within loops on first iteration, though it's unlikely to face this + * in practice. (example: in the loop above, `genericFn(x)` will at first instantiate and then ) + * * Unlike other pipes, inferring can dig recursively on demand. * Example: * fun getInt() { return 1; } @@ -87,183 +119,27 @@ static std::string to_string(FunctionPtr fun_ref) { return "`" + fun_ref->as_human_readable() + "`"; } +// fire a general error, just a wrapper over `throw` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire(FunctionPtr cur_f, SrcLocation loc, const std::string& message) { + throw ParseError(cur_f, loc, message); +} + // fire an error when `fun f(...) asm ...` is called with T=(int,int) or other non-1 width on stack // asm functions generally can't handle it, they expect T to be a TVM primitive // (in FunC, `forall` type just couldn't be unified with non-primitives; in Tolk, generic T is expectedly inferred) GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_calling_asm_function_with_non1_stack_width_arg(SrcLocation loc, FunctionPtr fun_ref, const std::vector& substitutions, int arg_idx) { - throw ParseError(loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->get_width_on_stack()) + " stack slots in TVM, not 1"); -} - -// fire an error on `var n = null` -// technically it's correct, type of `n` is TypeDataNullLiteral, but it's not what the user wanted -// so, it's better to see an error on assignment, that later, on `n` usage and types mismatch -// (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_assign_always_null_to_variable(SrcLocation loc, LocalVarPtr assigned_var, bool is_assigned_null_literal) { - std::string var_name = assigned_var->name; - throw ParseError(loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); -} - -// fire an error on `!cell` / `+slice` -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV unary_expr) { - std::string op = static_cast(operator_name); - throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(unary_expr->inferred_type)); -} - -// fire an error on `int + cell` / `slice & int` -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { - std::string op = static_cast(operator_name); - throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type)); +static void fire_error_calling_asm_function_with_non1_stack_width_arg(FunctionPtr cur_f, SrcLocation loc, FunctionPtr fun_ref, const std::vector& substitutions, int arg_idx) { + fire(cur_f, loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->get_width_on_stack()) + " stack slots in TVM, not 1"); } // fire an error on `untypedTupleVar.0` when used without a hint GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_deduce_untyped_tuple_access(SrcLocation loc, int index) { +static void fire_error_cannot_deduce_untyped_tuple_access(FunctionPtr cur_f, SrcLocation loc, int index) { std::string idx_access = "." + std::to_string(index); - throw ParseError(loc, "can not deduce type of `" + idx_access + "`; either assign it to variable like `var c: int = " + idx_access + "` or cast the result like `" + idx_access + " as int`"); + fire(cur_f, loc, "can not deduce type of `" + idx_access + "`; either assign it to variable like `var c: int = " + idx_access + "` or cast the result like `" + idx_access + " as int`"); } -// fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_tuple_cannot_have_non1_stack_width_elem(SrcLocation loc, TypePtr inferred_type) { - throw ParseError(loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->get_width_on_stack()) + " stack slots in TVM, not 1"); -} - -// check type correctness of a passed argument when calling a function/method -static void check_function_argument(TypePtr param_type, bool is_mutate_param, AnyExprV ith_arg, bool is_obj_of_dot_call) { - // given `f(x: int)` and a call `f(expr)`, check that expr_type is assignable to `int` - if (!param_type->can_rhs_be_assigned(ith_arg->inferred_type)) { - if (is_obj_of_dot_call) { - ith_arg->error("can not call method for " + to_string(param_type) + " with object of type " + to_string(ith_arg)); - } else { - ith_arg->error("can not pass " + to_string(ith_arg) + " to " + to_string(param_type)); - } - } - // given `f(x: mutate int?)` and a call `f(expr)`, check that `int?` is assignable to expr_type - // (for instance, can't call such a function with `f(mutate intVal)`, since f can potentially assign null to it) - if (is_mutate_param && !ith_arg->inferred_type->can_rhs_be_assigned(param_type)) { - if (is_obj_of_dot_call) { - ith_arg->error("can not call method for mutate " + to_string(param_type) + " with object of type " + to_string(ith_arg) + ", because mutation is not type compatible"); - } else { - ith_arg->error("can not pass " + to_string(ith_arg) + " to mutate " + to_string(param_type) + ", because mutation is not type compatible"); - } - } -} - -/* - * TypeInferringUnifyStrategy unifies types from various branches to a common result (lca). - * It's used to auto infer function return type based on return statements, like in TypeScript. - * Example: `fun f() { ... return 1; ... return null; }` inferred as `int`. - * - * Besides function returns, it's also useful for ternary `return cond ? 1 : null` and `match` expression. - * If types can't be unified (a function returns int and cell, for example), `unify()` returns false, handled outside. - * BTW, don't confuse this way of inferring with Hindley-Milner, they have nothing in common. - */ -class TypeInferringUnifyStrategy { - TypePtr unified_result = nullptr; - - static TypePtr calculate_type_lca(TypePtr t1, TypePtr t2) { - if (t1 == t2) { - return t1; - } - if (t1->can_rhs_be_assigned(t2)) { - return t1; - } - if (t2->can_rhs_be_assigned(t1)) { - return t2; - } - - if (t1 == TypeDataNullLiteral::create()) { - return TypeDataNullable::create(t2); - } - if (t2 == TypeDataNullLiteral::create()) { - return TypeDataNullable::create(t1); - } - - const auto* tensor1 = t1->try_as(); - const auto* tensor2 = t2->try_as(); - if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { - std::vector types_lca; - types_lca.reserve(tensor1->size()); - for (int i = 0; i < tensor1->size(); ++i) { - TypePtr next = calculate_type_lca(tensor1->items[i], tensor2->items[i]); - if (next == nullptr) { - return nullptr; - } - types_lca.push_back(next); - } - return TypeDataTensor::create(std::move(types_lca)); - } - - const auto* tuple1 = t1->try_as(); - const auto* tuple2 = t2->try_as(); - if (tuple1 && tuple2 && tuple1->size() == tuple2->size()) { - std::vector types_lca; - types_lca.reserve(tuple1->size()); - for (int i = 0; i < tuple1->size(); ++i) { - TypePtr next = calculate_type_lca(tuple1->items[i], tuple2->items[i]); - if (next == nullptr) { - return nullptr; - } - types_lca.push_back(next); - } - return TypeDataTypedTuple::create(std::move(types_lca)); - } - - return nullptr; - } - -public: - bool unify_with(TypePtr next) { - if (unified_result == nullptr) { - unified_result = next; - return true; - } - if (unified_result == next) { - return true; - } - - TypePtr combined = calculate_type_lca(unified_result, next); - if (!combined) { - return false; - } - - unified_result = combined; - return true; - } - - bool unify_with_implicit_return_void() { - if (unified_result == nullptr) { - unified_result = TypeDataVoid::create(); - return true; - } - - return unified_result == TypeDataVoid::create(); - } - - TypePtr get_result() const { return unified_result; } -}; - -// handle __expect_type(expr, "type") call -// this is used in compiler tests -GNU_ATTRIBUTE_NOINLINE GNU_ATTRIBUTE_COLD -static void handle_possible_compiler_internal_call(FunctionPtr current_function, V v) { - FunctionPtr fun_ref = v->fun_maybe; - tolk_assert(fun_ref && fun_ref->is_builtin_function()); - static_cast(current_function); - - if (fun_ref->name == "__expect_type") { - tolk_assert(v->get_num_args() == 2); - TypePtr expected_type = parse_type_from_string(v->get_arg(1)->get_expr()->as()->str_val); - TypePtr expr_type = v->get_arg(0)->inferred_type; - if (expected_type != expr_type) { - v->error("__expect_type failed: expected " + to_string(expected_type) + ", got " + to_string(expr_type)); - } - } -} /* * This class handles all types of AST vertices and traverses them, filling all AnyExprV::inferred_type. @@ -272,9 +148,9 @@ static void handle_possible_compiler_internal_call(FunctionPtr current_function, * 1) when a new AST node type is introduced, I want it to fail here, not to be left un-inferred with UB at next steps * 2) easy to maintain a hint (see comments at the top of the file) */ -class InferCheckTypesAndCallsAndFieldsVisitor final { - FunctionPtr current_function = nullptr; - TypeInferringUnifyStrategy return_unifier; +class InferTypesAndCallsAndFieldsVisitor final { + FunctionPtr cur_f = nullptr; + std::vector return_statements; GNU_ATTRIBUTE_ALWAYS_INLINE static void assign_inferred_type(AnyExprV dst, AnyExprV src) { @@ -307,130 +183,132 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } // traverse children in any statement - void process_any_statement(AnyV v) { + FlowContext process_any_statement(AnyV v, FlowContext&& flow) { switch (v->type) { case ast_sequence: - return process_sequence(v->as()); + return process_sequence(v->as(), std::move(flow)); case ast_return_statement: - return process_return_statement(v->as()); + return process_return_statement(v->as(), std::move(flow)); case ast_if_statement: - return process_if_statement(v->as()); + return process_if_statement(v->as(), std::move(flow)); case ast_repeat_statement: - return process_repeat_statement(v->as()); + return process_repeat_statement(v->as(), std::move(flow)); case ast_while_statement: - return process_while_statement(v->as()); + return process_while_statement(v->as(), std::move(flow)); case ast_do_while_statement: - return process_do_while_statement(v->as()); + return process_do_while_statement(v->as(), std::move(flow)); case ast_throw_statement: - return process_throw_statement(v->as()); + return process_throw_statement(v->as(), std::move(flow)); case ast_assert_statement: - return process_assert_statement(v->as()); + return process_assert_statement(v->as(), std::move(flow)); case ast_try_catch_statement: - return process_try_catch_statement(v->as()); + return process_try_catch_statement(v->as(), std::move(flow)); case ast_empty_statement: - return; + return flow; default: - infer_any_expr(reinterpret_cast(v)); + return process_expression_statement(reinterpret_cast(v), std::move(flow)); } } // assigns inferred_type for any expression (by calling assign_inferred_type) - void infer_any_expr(AnyExprV v, TypePtr hint = nullptr) { + // returns ExprFlow: out_facts that are "definitely known" after evaluating the whole expression + // if used_as_condition, true_facts/false_facts are also calculated (don't calculate them always for optimization) + ExprFlow infer_any_expr(AnyExprV v, FlowContext&& flow, bool used_as_condition, TypePtr hint = nullptr) { switch (v->type) { case ast_int_const: - return infer_int_const(v->as()); + return infer_int_const(v->as(), std::move(flow), used_as_condition); case ast_string_const: - return infer_string_const(v->as()); + return infer_string_const(v->as(), std::move(flow), used_as_condition); case ast_bool_const: - return infer_bool_const(v->as()); + return infer_bool_const(v->as(), std::move(flow), used_as_condition); case ast_local_vars_declaration: - return infer_local_vars_declaration(v->as()); + return infer_local_vars_declaration(v->as(), std::move(flow), used_as_condition); case ast_local_var_lhs: - return infer_local_var_lhs(v->as()); + return infer_local_var_lhs(v->as(), std::move(flow), used_as_condition); case ast_assign: - return infer_assignment(v->as()); + return infer_assignment(v->as(), std::move(flow), used_as_condition); case ast_set_assign: - return infer_set_assign(v->as()); + return infer_set_assign(v->as(), std::move(flow), used_as_condition); case ast_unary_operator: - return infer_unary_operator(v->as()); + return infer_unary_operator(v->as(), std::move(flow), used_as_condition); case ast_binary_operator: - return infer_binary_operator(v->as()); + return infer_binary_operator(v->as(), std::move(flow), used_as_condition); case ast_ternary_operator: - return infer_ternary_operator(v->as(), hint); + return infer_ternary_operator(v->as(), std::move(flow), used_as_condition, hint); case ast_cast_as_operator: - return infer_cast_as_operator(v->as()); + return infer_cast_as_operator(v->as(), std::move(flow), used_as_condition); case ast_not_null_operator: - return infer_not_null_operator(v->as()); + return infer_not_null_operator(v->as(), std::move(flow), used_as_condition); case ast_is_null_check: - return infer_is_null_check(v->as()); + return infer_is_null_check(v->as(), std::move(flow), used_as_condition); case ast_parenthesized_expression: - return infer_parenthesized(v->as(), hint); + return infer_parenthesized(v->as(), std::move(flow), used_as_condition, hint); case ast_reference: - return infer_reference(v->as()); + return infer_reference(v->as(), std::move(flow), used_as_condition); case ast_dot_access: - return infer_dot_access(v->as(), hint); + return infer_dot_access(v->as(), std::move(flow), used_as_condition, hint); case ast_function_call: - return infer_function_call(v->as(), hint); + return infer_function_call(v->as(), std::move(flow), used_as_condition, hint); case ast_tensor: - return infer_tensor(v->as(), hint); + return infer_tensor(v->as(), std::move(flow), used_as_condition, hint); case ast_typed_tuple: - return infer_typed_tuple(v->as(), hint); + return infer_typed_tuple(v->as(), std::move(flow), used_as_condition, hint); case ast_null_keyword: - return infer_null_keyword(v->as()); + return infer_null_keyword(v->as(), std::move(flow), used_as_condition); case ast_underscore: - return infer_underscore(v->as(), hint); + return infer_underscore(v->as(), std::move(flow), used_as_condition, hint); case ast_empty_expression: - return infer_empty_expression(v->as()); + return infer_empty_expression(v->as(), std::move(flow), used_as_condition); default: throw UnexpectedASTNodeType(v, "infer_any_expr"); } } - static TypePtr unwrap_nullable(TypePtr type) { - while (const TypeDataNullable* as_nullable = type->try_as()) { - type = as_nullable->inner; - } - return type; - } - - static bool expect_integer(AnyExprV v_inferred) { - return v_inferred->inferred_type == TypeDataInt::create(); - } - - static bool expect_integer(TypePtr inferred_type) { - return inferred_type == TypeDataInt::create(); - } - - static bool expect_boolean(AnyExprV v_inferred) { - return v_inferred->inferred_type == TypeDataBool::create(); - } - - static bool expect_boolean(TypePtr inferred_type) { - return inferred_type == TypeDataBool::create(); - } - - static void infer_int_const(V v) { + static ExprFlow infer_int_const(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataInt::create()); + + ExprFlow after_v(std::move(flow), used_as_condition); + if (used_as_condition) { // `if (0)` always false + if (v->intval == 0) { + after_v.true_flow.mark_unreachable(UnreachableKind::CantHappen); + } else { + after_v.false_flow.mark_unreachable(UnreachableKind::CantHappen); + } + } + return after_v; } - static void infer_string_const(V v) { + static ExprFlow infer_string_const(V v, FlowContext&& flow, bool used_as_condition) { if (v->is_bitslice()) { assign_inferred_type(v, TypeDataSlice::create()); } else { assign_inferred_type(v, TypeDataInt::create()); } + + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_bool_const(V v) { + static ExprFlow infer_bool_const(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataBool::create()); + + ExprFlow after_v(std::move(flow), used_as_condition); + if (used_as_condition) { // `if (false)` always false + if (v->bool_val == false) { + after_v.true_flow.mark_unreachable(UnreachableKind::CantHappen); + } else { + after_v.false_flow.mark_unreachable(UnreachableKind::CantHappen); + } + } + return after_v; } - void infer_local_vars_declaration(V v) { - infer_any_expr(v->get_expr()); + ExprFlow infer_local_vars_declaration(V v, FlowContext&& flow, bool used_as_condition) { + flow = infer_any_expr(v->get_expr(), std::move(flow), used_as_condition).out_flow; assign_inferred_type(v, v->get_expr()); + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_local_var_lhs(V v) { + static ExprFlow infer_local_var_lhs(V v, FlowContext&& flow, bool used_as_condition) { // `var v = rhs`, inferring is called for `v` // at the moment of inferring left side of assignment, we don't know type of rhs (since lhs is executed first) // so, mark `v` as unknown @@ -440,59 +318,87 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } else { assign_inferred_type(v, v->declared_type ? v->declared_type : TypeDataUnknown::create()); } + return ExprFlow(std::move(flow), used_as_condition); } - void infer_assignment(V v) { + ExprFlow infer_assignment(V v, FlowContext&& flow, bool used_as_condition) { // v is assignment: `x = 5` / `var x = 5` / `var x: slice = 5` / `(cs,_) = f()` / `val (a,[b],_) = (a,t,0)` // execution flow is: lhs first, rhs second (at IR generation, also lhs is evaluated first, unlike FunC) // after inferring lhs, use it for hint when inferring rhs // example: `var i: int = t.tupleAt(0)` is ok (hint=int, T=int), but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(lhs); - infer_any_expr(rhs, lhs->inferred_type); - process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, rhs); + flow = infer_left_side_of_assignment(lhs, std::move(flow)); + flow = infer_any_expr(rhs, std::move(flow), false, lhs->inferred_type).out_flow; + process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, flow); assign_inferred_type(v, rhs); // note, that the resulting type is rhs, not lhs + + return ExprFlow(std::move(flow), used_as_condition); + } + + // for `v = rhs` (NOT `var v = lhs`), variable `v` may be smart cast at this point + // the purpose of this function is to drop smart casts from expressions used as left side of assignments + // another example: `x.0 = rhs`, smart cast is dropped for `x.0` (not for `x`) + // the goal of dropping smart casts is to have lhs->inferred_type as actually declared, used as hint to infer rhs + FlowContext infer_left_side_of_assignment(AnyExprV lhs, FlowContext&& flow) { + if (auto lhs_tensor = lhs->try_as()) { + std::vector types_list; + types_list.reserve(lhs_tensor->size()); + for (int i = 0; i < lhs_tensor->size(); ++i) { + flow = infer_left_side_of_assignment(lhs_tensor->get_item(i), std::move(flow)); + types_list.push_back(lhs_tensor->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); + + } else if (auto lhs_tuple = lhs->try_as()) { + std::vector types_list; + types_list.reserve(lhs_tuple->size()); + for (int i = 0; i < lhs_tuple->size(); ++i) { + flow = infer_left_side_of_assignment(lhs_tuple->get_item(i), std::move(flow)); + types_list.push_back(lhs_tuple->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); + + } else if (auto lhs_par = lhs->try_as()) { + flow = infer_left_side_of_assignment(lhs_par->get_expr(), std::move(flow)); + assign_inferred_type(lhs, lhs_par->get_expr()->inferred_type); + + } else { + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + if (extract_sink_expression_from_vertex(lhs)) { + TypePtr lhs_declared_type = calc_declared_type_before_smart_cast(lhs); + assign_inferred_type(lhs, lhs_declared_type); + } + } + + return flow; } // handle (and dig recursively) into `var lhs = rhs` // at this point, both lhs and rhs are already inferred, but lhs newly-declared vars are unknown (unless have declared_type) // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` // the purpose is to update inferred_type of lhs vars (z, x, y) + // and to re-assign types of tensors/tuples inside: `var (x,[y]) = ...` was `(unknown,[unknown])`, becomes `(int,[int])` // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 - static void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + static void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, FlowContext& out_flow) { tolk_assert(lhs->inferred_type != nullptr); - AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; // `var ... = rhs` - dig into left part if (auto lhs_decl = lhs->try_as()) { - process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); + process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, out_flow); return; } // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") if (auto lhs_var = lhs->try_as()) { - if (lhs_var->inferred_type != TypeDataUnknown::create()) { // it's `var v: int` or redef - TypePtr declared_type = lhs_var->inferred_type; - if (!declared_type->can_rhs_be_assigned(rhs_type)) { - err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); - } - } else { - if (rhs_type == TypeDataNullLiteral::create()) { - fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); - } + TypePtr declared_type = lhs_var->marked_as_redef ? lhs_var->var_ref->declared_type : lhs_var->declared_type; + if (lhs_var->inferred_type == TypeDataUnknown::create()) { assign_inferred_type(lhs_var, rhs_type); assign_inferred_type(lhs_var->var_ref, rhs_type); } - return; - } - - // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) - if (lhs->try_as()) { - if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { - err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); - } + TypePtr smart_casted_type = declared_type ? calc_smart_cast_type_on_assignment(declared_type, rhs_type) : rhs_type; + out_flow.register_known_type(SinkExpression(lhs_var->var_ref), smart_casted_type); return; } @@ -500,16 +406,14 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // dig recursively into v1 and v2 with corresponding rhs i-th item of a tensor if (auto lhs_tensor = lhs->try_as()) { const TypeDataTensor* rhs_type_tensor = rhs_type->try_as(); - if (!rhs_type_tensor) { - err_loc->error("can not assign " + to_string(rhs_type) + " to a tensor"); - } - if (lhs_tensor->size() != rhs_type_tensor->size()) { - err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); - } - V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tensor->size()); for (int i = 0; i < lhs_tensor->size(); ++i) { - process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); + TypePtr ith_rhs_type = rhs_type_tensor && i < rhs_type_tensor->size() ? rhs_type_tensor->items[i] : TypeDataUnknown::create(); + process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), ith_rhs_type, out_flow); + types_list.push_back(lhs_tensor->get_item(i)->inferred_type); } + assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); return; } @@ -517,73 +421,57 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // dig recursively into v1 and v2 with corresponding rhs i-th item of a tuple if (auto lhs_tuple = lhs->try_as()) { const TypeDataTypedTuple* rhs_type_tuple = rhs_type->try_as(); - if (!rhs_type_tuple) { - err_loc->error("can not assign " + to_string(rhs_type) + " to a tuple"); - } - if (lhs_tuple->size() != rhs_type_tuple->size()) { - err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); - } - V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tuple->size()); for (int i = 0; i < lhs_tuple->size(); ++i) { - process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); + TypePtr ith_rhs_type = rhs_type_tuple && i < rhs_type_tuple->size() ? rhs_type_tuple->items[i] : TypeDataUnknown::create(); + process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), ith_rhs_type, out_flow); + types_list.push_back(lhs_tuple->get_item(i)->inferred_type); } + assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); return; } - // check `untypedTuple.0 = rhs_tensor` and other non-1 width elements - if (auto lhs_dot = lhs->try_as()) { - if (lhs_dot->is_target_indexed_access() && lhs_dot->get_obj()->inferred_type == TypeDataTuple::create()) { - if (rhs_type->get_width_on_stack() != 1) { - fire_error_tuple_cannot_have_non1_stack_width_elem(err_loc->loc, rhs_type); - } - } + // `(v) = (rhs)`, just surrounded by parenthesis + if (auto lhs_par = lhs->try_as()) { + process_assignment_lhs_after_infer_rhs(lhs_par->get_expr(), rhs_type, out_flow); + assign_inferred_type(lhs, lhs_par->get_expr()); + return; } - // here is something unhandled like `a.0 = rhs`, just check type matching - // for something strange like `f() = rhs` type inferring will pass, but will fail later - if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { - err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); + // here is `v = rhs` (just assignment, not `var v = rhs`) / `a.0 = rhs` / `getObj(z=f()).0 = rhs` etc. + // for instance, `tensorVar.0 = rhs` / `obj.field = rhs` has already checked index correctness while inferring lhs + // for strange lhs like `f() = rhs` type inferring (and later checking) will pass, but will fail lvalue check later + if (SinkExpression s_expr = extract_sink_expression_from_vertex(lhs)) { + TypePtr lhs_declared_type = calc_declared_type_before_smart_cast(lhs); + TypePtr smart_casted_type = calc_smart_cast_type_on_assignment(lhs_declared_type, rhs_type); + out_flow.register_known_type(s_expr, smart_casted_type); + assign_inferred_type(lhs, lhs_declared_type); } } - void infer_set_assign(V v) { + ExprFlow infer_set_assign(V v, FlowContext&& flow, bool used_as_condition) { AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(lhs); - infer_any_expr(rhs, lhs->inferred_type); + ExprFlow after_lhs = infer_any_expr(lhs, std::move(flow), false); + FlowContext rhs_flow = std::move(after_lhs.out_flow); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(rhs_flow), false, lhs->inferred_type); // almost all operators implementation is hardcoded by built-in functions `_+_` and similar std::string_view builtin_func = v->operator_name; // "+" for operator += - switch (v->tok) { - // &= |= ^= are "overloaded" both for integers and booleans, (int &= bool) is NOT allowed - case tok_set_bitwise_and: - case tok_set_bitwise_or: - case tok_set_bitwise_xor: { - bool both_int = expect_integer(lhs) && expect_integer(rhs); - bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); - if (!both_int && !both_bool) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } - break; - } - // others are mathematical: += *= ... - default: - if (!expect_integer(lhs) || !expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } - } - assign_inferred_type(v, lhs); if (!builtin_func.empty()) { FunctionPtr builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->try_as(); v->mutate()->assign_fun_ref(builtin_sym); } + + return ExprFlow(std::move(after_rhs.out_flow), used_as_condition); } - void infer_unary_operator(V v) { + ExprFlow infer_unary_operator(V v, FlowContext&& flow, bool used_as_condition) { AnyExprV rhs = v->get_rhs(); - infer_any_expr(rhs); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(flow), used_as_condition); // all operators implementation is hardcoded by built-in functions `~_` and similar std::string_view builtin_func = v->operator_name; @@ -592,95 +480,89 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { case tok_minus: case tok_plus: case tok_bitwise_not: - if (!expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); - } assign_inferred_type(v, TypeDataInt::create()); break; case tok_logical_not: - if (expect_boolean(rhs)) { + if (rhs->inferred_type == TypeDataBool::create()) { builtin_func = "!b"; // "overloaded" for bool - } else if (!expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); } assign_inferred_type(v, TypeDataBool::create()); + std::swap(after_rhs.false_flow, after_rhs.true_flow); break; default: tolk_assert(false); } - if (!builtin_func.empty()) { - FunctionPtr builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->try_as(); - v->mutate()->assign_fun_ref(builtin_sym); - } + FunctionPtr builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->try_as(); + v->mutate()->assign_fun_ref(builtin_sym); + + return after_rhs; } - void infer_binary_operator(V v) { + ExprFlow infer_binary_operator(V v, FlowContext&& flow, bool used_as_condition) { AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(lhs); - infer_any_expr(rhs); // almost all operators implementation is hardcoded by built-in functions `_+_` and similar std::string_view builtin_func = v->operator_name; switch (v->tok) { - // == != can compare both integers and booleans, (int == bool) is NOT allowed + // comparison operators, returning bool case tok_eq: - case tok_neq: { - bool both_int = expect_integer(unwrap_nullable(lhs->inferred_type)) && expect_integer(unwrap_nullable(rhs->inferred_type)); - bool both_bool = expect_boolean(unwrap_nullable(lhs->inferred_type)) && expect_boolean(unwrap_nullable(rhs->inferred_type)); - if (!both_int && !both_bool) { - if (lhs->inferred_type == rhs->inferred_type) { // compare slice with slice - v->error("type " + to_string(lhs) + " can not be compared with `== !=`"); - } else { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } - } - assign_inferred_type(v, TypeDataBool::create()); - break; - } - // < > can compare only integers + case tok_neq: case tok_lt: case tok_gt: case tok_leq: case tok_geq: - case tok_spaceship: { - if (!expect_integer(lhs) || !expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } + case tok_spaceship: + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + flow = infer_any_expr(rhs, std::move(flow), false).out_flow; assign_inferred_type(v, TypeDataBool::create()); break; - } - // & | ^ are "overloaded" both for integers and booleans, (int & bool) is NOT allowed + // & | ^ are "overloaded" both for integers and booleans case tok_bitwise_and: case tok_bitwise_or: - case tok_bitwise_xor: { - bool both_int = expect_integer(lhs) && expect_integer(rhs); - bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); - if (!both_int && !both_bool) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + case tok_bitwise_xor: + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + flow = infer_any_expr(rhs, std::move(flow), false).out_flow; + if (lhs->inferred_type == TypeDataBool::create() && rhs->inferred_type == TypeDataBool::create()) { + assign_inferred_type(v, TypeDataBool::create()); + } else { + assign_inferred_type(v, TypeDataInt::create()); } assign_inferred_type(v, rhs); // (int & int) is int, (bool & bool) is bool break; - } - // && || can work with integers and booleans, (int && bool) is allowed - case tok_logical_and: - case tok_logical_or: { - bool lhs_ok = expect_integer(lhs) || expect_boolean(lhs); - bool rhs_ok = expect_integer(rhs) || expect_boolean(rhs); - if (!lhs_ok || !rhs_ok) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } + // && || result in booleans, but building flow facts is tricky due to short-circuit + case tok_logical_and: { + ExprFlow after_lhs = infer_any_expr(lhs, std::move(flow), true); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(after_lhs.true_flow), true); assign_inferred_type(v, TypeDataBool::create()); - builtin_func = {}; // no built-in functions, logical operators are expressed as IFs at IR level - break; + if (!used_as_condition) { + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.false_flow), std::move(after_rhs.out_flow)); + return ExprFlow(std::move(out_flow), false); + } + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.out_flow), std::move(after_rhs.out_flow)); + FlowContext true_flow = std::move(after_rhs.true_flow); + FlowContext false_flow = FlowContext::merge_flow(std::move(after_lhs.false_flow), std::move(after_rhs.false_flow)); + return ExprFlow(std::move(out_flow), std::move(true_flow), std::move(false_flow)); + } + case tok_logical_or: { + ExprFlow after_lhs = infer_any_expr(lhs, std::move(flow), true); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(after_lhs.false_flow), true); + assign_inferred_type(v, TypeDataBool::create()); + if (!used_as_condition) { + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.true_flow), std::move(after_rhs.out_flow)); + return ExprFlow(std::move(after_rhs.out_flow), false); + } + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.out_flow), std::move(after_rhs.out_flow)); + FlowContext true_flow = FlowContext::merge_flow(std::move(after_lhs.true_flow), std::move(after_rhs.true_flow)); + FlowContext false_flow = std::move(after_rhs.false_flow); + return ExprFlow(std::move(out_flow), std::move(true_flow), std::move(false_flow)); } // others are mathematical: + * ... default: - if (!expect_integer(lhs) || !expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + flow = infer_any_expr(rhs, std::move(flow), false).out_flow; assign_inferred_type(v, TypeDataInt::create()); } @@ -688,63 +570,118 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { FunctionPtr builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->try_as(); v->mutate()->assign_fun_ref(builtin_sym); } + + return ExprFlow(std::move(flow), used_as_condition); } - void infer_ternary_operator(V v, TypePtr hint) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); + ExprFlow infer_ternary_operator(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond.get_always_true_false_state()); + + ExprFlow after_true = infer_any_expr(v->get_when_true(), std::move(after_cond.true_flow), used_as_condition, hint); + ExprFlow after_false = infer_any_expr(v->get_when_false(), std::move(after_cond.false_flow), used_as_condition, hint); + + if (v->get_cond()->is_always_true) { + assign_inferred_type(v, v->get_when_true()); + return after_true; + } + if (v->get_cond()->is_always_false) { + assign_inferred_type(v, v->get_when_false()); + return after_false; } - infer_any_expr(v->get_when_true(), hint); - infer_any_expr(v->get_when_false(), hint); TypeInferringUnifyStrategy tern_type; tern_type.unify_with(v->get_when_true()->inferred_type); if (!tern_type.unify_with(v->get_when_false()->inferred_type)) { - v->error("types of ternary branches are incompatible"); + fire(cur_f, v->loc, "types of ternary branches are incompatible: " + to_string(v->get_when_true()) + " and " + to_string(v->get_when_false())); } assign_inferred_type(v, tern_type.get_result()); + + FlowContext out_flow = FlowContext::merge_flow(std::move(after_true.out_flow), std::move(after_false.out_flow)); + return ExprFlow(std::move(out_flow), std::move(after_true.true_flow), std::move(after_false.false_flow)); } - void infer_cast_as_operator(V v) { + ExprFlow infer_cast_as_operator(V v, FlowContext&& flow, bool used_as_condition) { // for `expr as `, use this type for hint, so that `t.tupleAt(0) as int` is ok - infer_any_expr(v->get_expr(), v->cast_to_type); - if (!v->get_expr()->inferred_type->can_be_casted_with_as_operator(v->cast_to_type)) { - v->error("type " + to_string(v->get_expr()) + " can not be cast to " + to_string(v->cast_to_type)); - } + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), false, v->cast_to_type); assign_inferred_type(v, v->cast_to_type); + + if (!used_as_condition) { + return after_expr; + } + return ExprFlow(std::move(after_expr.out_flow), true); } - void infer_is_null_check(V v) { - infer_any_expr(v->get_expr()); + ExprFlow infer_is_null_check(V v, FlowContext&& flow, bool used_as_condition) { + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), false); assign_inferred_type(v, TypeDataBool::create()); + + TypePtr expr_type = v->get_expr()->inferred_type; + TypePtr non_null_type = calculate_type_subtract_null(expr_type); + if (expr_type == TypeDataNullLiteral::create()) { // `expr == null` is always true + v->mutate()->assign_always_true_or_false(v->is_negated ? 2 : 1); + } else if (non_null_type == TypeDataNever::create()) { // `expr == null` is always false + v->mutate()->assign_always_true_or_false(v->is_negated ? 1 : 2); + } else { + v->mutate()->assign_always_true_or_false(0); + } + + if (!used_as_condition) { + return after_expr; + } + + FlowContext true_flow = after_expr.out_flow.clone(); + FlowContext false_flow = after_expr.out_flow.clone(); + if (SinkExpression s_expr = extract_sink_expression_from_vertex(v->get_expr())) { + if (v->is_always_true) { + false_flow.mark_unreachable(UnreachableKind::CantHappen); + false_flow.register_known_type(s_expr, TypeDataNever::create()); + } else if (v->is_always_false) { + true_flow.mark_unreachable(UnreachableKind::CantHappen); + true_flow.register_known_type(s_expr, TypeDataNever::create()); + } else if (!v->is_negated) { + true_flow.register_known_type(s_expr, TypeDataNullLiteral::create()); + false_flow.register_known_type(s_expr, non_null_type); + } else { + true_flow.register_known_type(s_expr, non_null_type); + false_flow.register_known_type(s_expr, TypeDataNullLiteral::create()); + } + } + return ExprFlow(std::move(after_expr.out_flow), std::move(true_flow), std::move(false_flow)); } - void infer_not_null_operator(V v) { - infer_any_expr(v->get_expr()); + ExprFlow infer_not_null_operator(V v, FlowContext&& flow, bool used_as_condition) { + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), false); + if (const auto* as_nullable = v->get_expr()->inferred_type->try_as()) { - // operator `!` used for `T?`, leave `T` assign_inferred_type(v, as_nullable->inner); } else { - // operator `!` used for non-nullable, probably a warning should be printed assign_inferred_type(v, v->get_expr()); } + + if (!used_as_condition) { + return after_expr; + } + return ExprFlow(std::move(after_expr.out_flow), true); } - void infer_parenthesized(V v, TypePtr hint) { - infer_any_expr(v->get_expr(), hint); + ExprFlow infer_parenthesized(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), used_as_condition, hint); assign_inferred_type(v, v->get_expr()); + return after_expr; } - static void infer_reference(V v) { + ExprFlow infer_reference(V v, FlowContext&& flow, bool used_as_condition) { if (LocalVarPtr var_ref = v->sym->try_as()) { - assign_inferred_type(v, var_ref->declared_type); + TypePtr declared_or_smart_casted = flow.smart_cast_if_exists(SinkExpression(var_ref)); + tolk_assert(declared_or_smart_casted != nullptr); // all local vars are presented in flow + assign_inferred_type(v, declared_or_smart_casted); } else if (GlobalConstPtr const_ref = v->sym->try_as()) { assign_inferred_type(v, const_ref->is_int_const() ? TypeDataInt::create() : TypeDataSlice::create()); } else if (GlobalVarPtr glob_ref = v->sym->try_as()) { + // there are no smart casts for globals, it's a way of preventing reading one global multiple times, it costs gas assign_inferred_type(v, glob_ref->declared_type); } else if (FunctionPtr fun_ref = v->sym->try_as()) { @@ -753,7 +690,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { if (fun_ref->is_generic_function() && !v_instantiationTs) { // `genericFn` is invalid as non-call, can't be used without - v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + fire(cur_f, v->loc, "can not use a generic function " + to_string(fun_ref) + " as non-call"); } else if (fun_ref->is_generic_function()) { // `genericFn` is valid, it's a reference to instantiation @@ -761,15 +698,15 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); v->mutate()->assign_sym(fun_ref); - } else if (UNLIKELY(v_instantiationTs != nullptr)) { + } else if (v_instantiationTs != nullptr && !fun_ref->is_instantiation_of_generic_function()) { // non-generic function referenced like `return beginCell;` - v_instantiationTs->error("not generic function used with generic T"); + fire(cur_f, v_instantiationTs->loc, "not generic function used with generic T"); } fun_ref->mutate()->assign_is_used_as_noncall(); get_or_infer_return_type(fun_ref); assign_inferred_type(v, fun_ref->inferred_full_type); - return; + return ExprFlow(std::move(flow), used_as_condition); } else { tolk_assert(false); @@ -777,16 +714,17 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // for non-functions: `local_var` and similar not allowed if (UNLIKELY(v->has_instantiationTs())) { - v->get_instantiationTs()->error("generic T not expected here"); + fire(cur_f, v->get_instantiationTs()->loc, "generic T not expected here"); } + return ExprFlow(std::move(flow), used_as_condition); } // given `genericF` / `t.tupleFirst` (the user manually specified instantiation Ts), // validate and collect them // returns: [int, slice] / [cell] - static std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, FunctionPtr fun_ref, V instantiationT_list) { + std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, FunctionPtr fun_ref, V instantiationT_list) const { if (fun_ref->genericTs->size() != instantiationT_list->get_items().size()) { - throw ParseError(loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); + fire(cur_f, loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); } std::vector substitutions; @@ -804,30 +742,27 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // example: was `t.tuplePush(2)`, read , instantiate `tuplePush` (will later fail type check) // example: was `var cb = t.tupleFirst;` (used as reference, as non-call), instantiate `tupleFirst` // returns fun_ref to instantiated function - static FunctionPtr check_and_instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, std::vector&& substitutionTs) { + FunctionPtr check_and_instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, std::vector&& substitutionTs) const { // T for asm function must be a TVM primitive (width 1), otherwise, asm would act incorrectly if (fun_ref->is_asm_function() || fun_ref->is_builtin_function()) { for (int i = 0; i < static_cast(substitutionTs.size()); ++i) { if (substitutionTs[i]->get_width_on_stack() != 1) { - fire_error_calling_asm_function_with_non1_stack_width_arg(loc, fun_ref, substitutionTs, i); + fire_error_calling_asm_function_with_non1_stack_width_arg(cur_f, loc, fun_ref, substitutionTs, i); } } } std::string inst_name = generate_instantiated_name(fun_ref->name, substitutionTs); - try { - // make deep clone of `f` with substitutionTs - // (if `f` was already instantiated, it will be immediately returned from a symbol table) - return instantiate_generic_function(loc, fun_ref, inst_name, std::move(substitutionTs)); - } catch (const ParseError& ex) { - throw ParseError(ex.where, "while instantiating generic function `" + inst_name + "` at " + loc.to_string() + ": " + ex.message); - } + // make deep clone of `f` with substitutionTs + // (if `f` was already instantiated, it will be immediately returned from a symbol table) + return instantiate_generic_function(loc, fun_ref, inst_name, std::move(substitutionTs)); } - void infer_dot_access(V v, TypePtr hint) { + ExprFlow infer_dot_access(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { // it's NOT a method call `t.tupleSize()` (since such cases are handled by infer_function_call) // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) - infer_any_expr(v->get_obj()); + flow = infer_any_expr(v->get_obj(), std::move(flow), false).out_flow; + TypePtr obj_type = v->get_obj()->inferred_type; // our goal is to fill v->target knowing type of obj V v_ident = v->get_identifier(); // field/method name vertex @@ -840,19 +775,31 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { int index_at = std::stoi(std::string(field_name)); if (const auto* t_tensor = obj_type->try_as()) { if (index_at >= t_tensor->size()) { - v_ident->error("invalid tensor index, expected 0.." + std::to_string(t_tensor->items.size() - 1)); + fire(cur_f, v_ident->loc, "invalid tensor index, expected 0.." + std::to_string(t_tensor->items.size() - 1)); } v->mutate()->assign_target(index_at); - assign_inferred_type(v, t_tensor->items[index_at]); - return; + TypePtr inferred_type = t_tensor->items[index_at]; + if (SinkExpression s_expr = extract_sink_expression_from_vertex(v)) { + if (TypePtr smart_casted = flow.smart_cast_if_exists(s_expr)) { + inferred_type = smart_casted; + } + } + assign_inferred_type(v, inferred_type); + return ExprFlow(std::move(flow), used_as_condition); } if (const auto* t_tuple = obj_type->try_as()) { if (index_at >= t_tuple->size()) { - v_ident->error("invalid tuple index, expected 0.." + std::to_string(t_tuple->items.size() - 1)); + fire(cur_f, v_ident->loc, "invalid tuple index, expected 0.." + std::to_string(t_tuple->items.size() - 1)); } v->mutate()->assign_target(index_at); - assign_inferred_type(v, t_tuple->items[index_at]); - return; + TypePtr inferred_type = t_tuple->items[index_at]; + if (SinkExpression s_expr = extract_sink_expression_from_vertex(v)) { + if (TypePtr smart_casted = flow.smart_cast_if_exists(s_expr)) { + inferred_type = smart_casted; + } + } + assign_inferred_type(v, inferred_type); + return ExprFlow(std::move(flow), used_as_condition); } if (obj_type->try_as()) { TypePtr item_type = nullptr; @@ -860,35 +807,32 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { item_type = TypeDataUnknown::create(); } else { if (hint == nullptr) { - fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at); - } - if (hint->get_width_on_stack() != 1) { - fire_error_tuple_cannot_have_non1_stack_width_elem(v->loc, hint); + fire_error_cannot_deduce_untyped_tuple_access(cur_f, v->loc, index_at); } item_type = hint; } v->mutate()->assign_target(index_at); assign_inferred_type(v, item_type); - return; + return ExprFlow(std::move(flow), used_as_condition); } - v_ident->error("type " + to_string(obj_type) + " is not indexable"); + fire(cur_f, v_ident->loc, "type " + to_string(obj_type) + " is not indexable"); } // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` const Symbol* sym = lookup_global_symbol(field_name); FunctionPtr fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref) { - v_ident->error("non-existing field `" + static_cast(field_name) + "` of type " + to_string(obj_type)); + fire(cur_f, v_ident->loc, "non-existing field `" + static_cast(field_name) + "` of type " + to_string(obj_type)); } // `t.tupleSize` is ok, `cs.tupleSize` not if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(obj_type)) { - v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0].declared_type) + " with object of type " + to_string(obj_type)); + fire(cur_f, v_ident->loc, "referencing a method for " + to_string(fun_ref->parameters[0].declared_type) + " with object of type " + to_string(obj_type)); } if (fun_ref->is_generic_function() && !v_instantiationTs) { // `genericFn` and `t.tupleAt` are invalid as non-call, they can't be used without - v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + fire(cur_f, v->loc, "can not use a generic function " + to_string(fun_ref) + " as non-call"); } else if (fun_ref->is_generic_function()) { // `t.tupleAt` is valid, it's a reference to instantiation @@ -897,16 +841,17 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } else if (UNLIKELY(v_instantiationTs != nullptr)) { // non-generic method referenced like `var cb = c.cellHash;` - v_instantiationTs->error("not generic function used with generic T"); + fire(cur_f, v_instantiationTs->loc, "not generic function used with generic T"); } fun_ref->mutate()->assign_is_used_as_noncall(); v->mutate()->assign_target(fun_ref); get_or_infer_return_type(fun_ref); assign_inferred_type(v, fun_ref->inferred_full_type); // type of `t.tupleSize` is TypeDataFunCallable + return ExprFlow(std::move(flow), used_as_condition); } - void infer_function_call(V v, TypePtr hint) { + ExprFlow infer_function_call(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { AnyExprV callee = v->get_callee(); // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` @@ -926,19 +871,19 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { delta_self = 1; dot_obj = v_dot->get_obj(); v_instantiationTs = v_dot->get_instantiationTs(); // present for `obj.someMethod()` - infer_any_expr(dot_obj); + flow = infer_any_expr(dot_obj, std::move(flow), false).out_flow; // it can be indexed access (`tensorVar.0()`, `tupleVar.1()`) or a method (`t.tupleSize()`) std::string_view field_name = v_dot->get_field_name(); if (field_name[0] >= '0' && field_name[0] <= '9') { // indexed access `ab.2()`, then treat `ab.2` just like an expression, fun_ref remains nullptr - // infer_dot_access() will be called for a callee, it will check type, index correctness, etc. + // infer_dot_access() will be called for a callee, it will check index correctness } else { // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` const Symbol* sym = lookup_global_symbol(field_name); fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref) { - v_dot->get_identifier()->error("non-existing method `" + static_cast(field_name) + "` of type " + to_string(dot_obj)); + fire(cur_f, v_dot->get_identifier()->loc, "non-existing method `" + static_cast(field_name) + "` of type " + to_string(dot_obj)); } } @@ -949,46 +894,43 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` / `tensorVar.0()` if (!fun_ref) { - // treat callee like a usual expression, which must have "callable" inferred type - infer_any_expr(callee); + // treat callee like a usual expression + flow = infer_any_expr(callee, std::move(flow), false).out_flow; + // it must have "callable" inferred type const TypeDataFunCallable* f_callable = callee->inferred_type->try_as(); if (!f_callable) { // `5()` / `SOME_CONST()` / `null()` - v->error("calling a non-function " + to_string(callee->inferred_type)); + fire(cur_f, v->loc, "calling a non-function " + to_string(callee->inferred_type)); } - // check arguments count and their types + // check arguments count (their types will be checked in a later pipe) if (v->get_num_args() != static_cast(f_callable->params_types.size())) { - v->error("expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); + fire(cur_f, v->loc, "expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); } for (int i = 0; i < v->get_num_args(); ++i) { auto arg_i = v->get_arg(i)->get_expr(); - TypePtr param_type = f_callable->params_types[i]; - infer_any_expr(arg_i, param_type); - if (!param_type->can_rhs_be_assigned(arg_i->inferred_type)) { - arg_i->error("can not pass " + to_string(arg_i) + " to " + to_string(param_type)); - } + flow = infer_any_expr(arg_i, std::move(flow), false, f_callable->params_types[i]).out_flow; assign_inferred_type(v->get_arg(i), arg_i); } v->mutate()->assign_fun_ref(nullptr); // no fun_ref to a global function assign_inferred_type(v, f_callable->return_type); - return; + return ExprFlow(std::move(flow), used_as_condition); } // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) - // we're going to iterate over passed arguments, check type compatibility, and (if generic) infer substitutionTs + // we're going to iterate over passed arguments, and (if generic) infer substitutionTs // at first, check arguments count (Tolk doesn't have optional parameters, so just compare counts) int n_arguments = v->get_num_args() + delta_self; int n_parameters = fun_ref->get_num_params(); if (!n_parameters && dot_obj) { - v->error("`" + fun_ref->name + "` has no parameters and can not be called as method"); + fire(cur_f, v->loc, "`" + fun_ref->name + "` has no parameters and can not be called as method"); } if (n_parameters < n_arguments) { - v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + fire(cur_f, v->loc, "too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); } if (n_arguments < n_parameters) { - v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + fire(cur_f, v->loc, "too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); } - // now, for every passed argument, we need to infer its type, and check it against parameter type + // now, for every passed argument, we need to infer its type // for regular functions, it's obvious // but for generic functions, we need to infer type arguments (substitutionTs) on the fly // (unless Ts are specified by a user like `f(args)` / `t.tupleAt()`, take them) @@ -1005,9 +947,14 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { const LocalVarData& param_0 = fun_ref->parameters[0]; TypePtr param_type = param_0.declared_type; if (param_type->has_genericT_inside()) { - param_type = deducingTs->auto_deduce_from_argument(dot_obj->loc, param_type, dot_obj->inferred_type); + param_type = deducingTs->auto_deduce_from_argument(cur_f, dot_obj->loc, param_type, dot_obj->inferred_type); + } + if (param_0.is_mutate_parameter() && dot_obj->inferred_type != param_type) { + if (SinkExpression s_expr = extract_sink_expression_from_vertex(dot_obj)) { + assign_inferred_type(dot_obj, calc_declared_type_before_smart_cast(dot_obj)); + flow.register_known_type(s_expr, param_type); + } } - check_function_argument(param_type, param_0.is_mutate_parameter(), dot_obj, true); } for (int i = 0; i < v->get_num_args(); ++i) { const LocalVarData& param_i = fun_ref->parameters[delta_self + i]; @@ -1017,13 +964,20 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { param_type = deducingTs->replace_by_manually_specified(param_type); } if (param_type->has_genericT_inside()) { // `f(a)` where f is generic: use `a` to infer param type - infer_any_expr(arg_i); // then arg_i is inferred without any hint - param_type = deducingTs->auto_deduce_from_argument(arg_i->loc, param_type, arg_i->inferred_type); + // then arg_i is inferred without any hint + flow = infer_any_expr(arg_i, std::move(flow), false).out_flow; + param_type = deducingTs->auto_deduce_from_argument(cur_f, arg_i->loc, param_type, arg_i->inferred_type); } else { - infer_any_expr(arg_i, param_type); // param_type is hint, helps infer arg_i + // param_type is hint, helps infer arg_i + flow = infer_any_expr(arg_i, std::move(flow), false, param_type).out_flow; } assign_inferred_type(v->get_arg(i), arg_i); // arg itself is an expression - check_function_argument(param_type, param_i.is_mutate_parameter(), arg_i, false); + if (param_i.is_mutate_parameter() && arg_i->inferred_type != param_type) { + if (SinkExpression s_expr = extract_sink_expression_from_vertex(arg_i)) { + assign_inferred_type(arg_i, calc_declared_type_before_smart_cast(arg_i)); + flow.register_known_type(s_expr, param_type); + } + } } // if it's a generic function `f`, we need to instantiate it, like `f` @@ -1035,213 +989,204 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { if (idx != -1 && hint && fun_ref->declared_return_type->has_genericT_inside()) { // example: `t.tupleFirst()`, T doesn't depend on arguments, but is determined by return type // if used like `var x: int = t.tupleFirst()` / `t.tupleFirst() as int` / etc., use hint - deducingTs->auto_deduce_from_argument(v->loc, fun_ref->declared_return_type, hint); + deducingTs->auto_deduce_from_argument(cur_f, v->loc, fun_ref->declared_return_type, hint); idx = deducingTs->get_first_not_deduced_idx(); } if (idx != -1) { - v->error("can not deduce " + fun_ref->genericTs->get_nameT(idx)); + fire(cur_f, v->loc, "can not deduce " + fun_ref->genericTs->get_nameT(idx)); } fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, deducingTs->flush()); delete deducingTs; } else if (UNLIKELY(v_instantiationTs != nullptr)) { // non-generic function/method called with type arguments, like `c.cellHash()` / `beginCell()` - v_instantiationTs->error("calling a not generic function with generic T"); + fire(cur_f, v_instantiationTs->loc, "calling a not generic function with generic T"); } v->mutate()->assign_fun_ref(fun_ref); // since for `t.tupleAt()`, infer_dot_access() not called for callee = "t.tupleAt", assign its target here if (v->is_dot_call()) { v->get_callee()->as()->mutate()->assign_target(fun_ref); - v->get_callee()->as()->mutate()->assign_inferred_type(fun_ref->inferred_full_type); } // get return type either from user-specified declaration or infer here on demand traversing its body get_or_infer_return_type(fun_ref); TypePtr inferred_type = dot_obj && fun_ref->does_return_self() ? dot_obj->inferred_type : fun_ref->inferred_return_type; assign_inferred_type(v, inferred_type); assign_inferred_type(callee, fun_ref->inferred_full_type); - if (fun_ref->is_builtin_function() && fun_ref->name[0] == '_') { - handle_possible_compiler_internal_call(current_function, v); - } // note, that mutate params don't affect typing, they are handled when converting to IR + return ExprFlow(std::move(flow), used_as_condition); } - void infer_tensor(V v, TypePtr hint) { + ExprFlow infer_tensor(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { const TypeDataTensor* tensor_hint = hint ? hint->try_as() : nullptr; std::vector types_list; types_list.reserve(v->get_items().size()); for (int i = 0; i < v->size(); ++i) { AnyExprV item = v->get_item(i); - infer_any_expr(item, tensor_hint && i < tensor_hint->size() ? tensor_hint->items[i] : nullptr); + flow = infer_any_expr(item, std::move(flow), false, tensor_hint && i < tensor_hint->size() ? tensor_hint->items[i] : nullptr).out_flow; types_list.emplace_back(item->inferred_type); } assign_inferred_type(v, TypeDataTensor::create(std::move(types_list))); + return ExprFlow(std::move(flow), used_as_condition); } - void infer_typed_tuple(V v, TypePtr hint) { + ExprFlow infer_typed_tuple(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { const TypeDataTypedTuple* tuple_hint = hint ? hint->try_as() : nullptr; std::vector types_list; types_list.reserve(v->get_items().size()); for (int i = 0; i < v->size(); ++i) { AnyExprV item = v->get_item(i); - infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); - if (item->inferred_type->get_width_on_stack() != 1) { - fire_error_tuple_cannot_have_non1_stack_width_elem(v->get_item(i)->loc, item->inferred_type); - } + flow = infer_any_expr(item, std::move(flow), false, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr).out_flow; types_list.emplace_back(item->inferred_type); } assign_inferred_type(v, TypeDataTypedTuple::create(std::move(types_list))); + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_null_keyword(V v) { + static ExprFlow infer_null_keyword(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataNullLiteral::create()); + + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_underscore(V v, TypePtr hint) { + static ExprFlow infer_underscore(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { // if execution is here, underscore is either used as lhs of assignment, or incorrectly, like `f(_)` // more precise is to always set unknown here, but for incorrect usages, instead of an error // "can not pass unknown to X" would better be an error it can't be used as a value, at later steps assign_inferred_type(v, hint ? hint : TypeDataUnknown::create()); + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_empty_expression(V v) { + static ExprFlow infer_empty_expression(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataUnknown::create()); + return ExprFlow(std::move(flow), used_as_condition); } - void process_sequence(V v) { + FlowContext process_sequence(V v, FlowContext&& flow) { + // we'll print a warning if after some statement, control flow became unreachable + // (but don't print a warning if it's already unreachable, for example we're inside always-false if) + bool initially_unreachable = flow.is_unreachable(); for (AnyV item : v->get_items()) { - process_any_statement(item); + if (flow.is_unreachable() && !initially_unreachable && !v->first_unreachable && item->type != ast_empty_statement) { + v->mutate()->assign_first_unreachable(item); // a warning will be printed later, after type checking + } + flow = process_any_statement(item, std::move(flow)); } + return flow; } - static bool is_expr_valid_as_return_self(AnyExprV return_expr) { - // `return self` - if (return_expr->type == ast_reference && return_expr->as()->get_name() == "self") { - return true; - } - // `return self.someMethod()` - if (auto v_call = return_expr->try_as(); v_call && v_call->is_dot_call()) { - return v_call->fun_maybe && v_call->fun_maybe->does_return_self() && is_expr_valid_as_return_self(v_call->get_dot_obj()); - } - // `return cond ? ... : ...` - if (auto v_ternary = return_expr->try_as()) { - return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); - } - return false; - } - - void process_return_statement(V v) { + FlowContext process_return_statement(V v, FlowContext&& flow) { if (v->has_return_value()) { - infer_any_expr(v->get_return_value(), current_function->declared_return_type); + flow = infer_any_expr(v->get_return_value(), std::move(flow), false, cur_f->declared_return_type).out_flow; } else { assign_inferred_type(v->get_return_value(), TypeDataVoid::create()); } - if (current_function->does_return_self()) { - return_unifier.unify_with(current_function->parameters[0].declared_type); - if (!is_expr_valid_as_return_self(v->get_return_value())) { - v->error("invalid return from `self` function"); - } - return; - } + flow.mark_unreachable(UnreachableKind::ReturnStatement); - TypePtr expr_type = v->get_return_value()->inferred_type; - if (current_function->declared_return_type) { - if (!current_function->declared_return_type->can_rhs_be_assigned(expr_type)) { - v->get_return_value()->error("can not convert type " + to_string(expr_type) + " to return type " + to_string(current_function->declared_return_type)); - } - } else { - if (!return_unifier.unify_with(expr_type)) { - v->get_return_value()->error("can not unify type " + to_string(expr_type) + " with previous return type " + to_string(return_unifier.get_result())); - } + if (!cur_f->declared_return_type) { + return_statements.push_back(v->get_return_value()); // for future unification } + return flow; } - void process_if_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } - process_any_statement(v->get_if_body()); - process_any_statement(v->get_else_body()); + FlowContext process_if_statement(V v, FlowContext&& flow) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond.get_always_true_false_state()); + + FlowContext true_flow = process_any_statement(v->get_if_body(), std::move(after_cond.true_flow)); + FlowContext false_flow = process_any_statement(v->get_else_body(), std::move(after_cond.false_flow)); + + return FlowContext::merge_flow(std::move(true_flow), std::move(false_flow)); } - void process_repeat_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond)) { - cond->error("condition of `repeat` must be an integer, got " + to_string(cond)); - } - process_any_statement(v->get_body()); + FlowContext process_repeat_statement(V v, FlowContext&& flow) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), false); + + return process_any_statement(v->get_body(), std::move(after_cond.out_flow)); } - void process_while_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } - process_any_statement(v->get_body()); + FlowContext process_while_statement(V v, FlowContext&& flow) { + // loops are inferred twice, to merge body outcome with the state before the loop + // (a more correct approach would be not "twice", but "find a fixed point when state stop changing") + // also remember, we don't have a `break` statement, that's why when loop exits, condition became false + FlowContext loop_entry_facts = flow.clone(); + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + FlowContext body_out = process_any_statement(v->get_body(), std::move(after_cond.true_flow)); + // second time, to refine all types + flow = FlowContext::merge_flow(std::move(loop_entry_facts), std::move(body_out)); + ExprFlow after_cond2 = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond2.get_always_true_false_state()); + + process_any_statement(v->get_body(), std::move(after_cond2.true_flow)); + + return std::move(after_cond2.false_flow); } - void process_do_while_statement(V v) { - process_any_statement(v->get_body()); - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } + FlowContext process_do_while_statement(V v, FlowContext&& flow) { + // do while is also handled twice; read comments above + FlowContext loop_entry_facts = flow.clone(); + flow = process_any_statement(v->get_body(), std::move(flow)); + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + // second time + flow = FlowContext::merge_flow(std::move(loop_entry_facts), std::move(after_cond.true_flow)); + flow = process_any_statement(v->get_body(), std::move(flow)); + ExprFlow after_cond2 = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond2.get_always_true_false_state()); + + return std::move(after_cond2.false_flow); } - void process_throw_statement(V v) { - infer_any_expr(v->get_thrown_code()); - if (!expect_integer(v->get_thrown_code())) { - v->get_thrown_code()->error("excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); - } - infer_any_expr(v->get_thrown_arg()); - if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->get_width_on_stack() != 1) { - v->get_thrown_arg()->error("can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); - } + FlowContext process_throw_statement(V v, FlowContext&& flow) { + flow = infer_any_expr(v->get_thrown_code(), std::move(flow), false).out_flow; + flow = infer_any_expr(v->get_thrown_arg(), std::move(flow), false).out_flow; + return flow; } - void process_assert_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } - infer_any_expr(v->get_thrown_code()); - if (!expect_integer(v->get_thrown_code())) { - v->get_cond()->error("thrown excNo of `assert` must be an integer, got " + to_string(v->get_cond())); - } + FlowContext process_assert_statement(V v, FlowContext&& flow) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond.get_always_true_false_state()); + + ExprFlow after_throw = infer_any_expr(v->get_thrown_code(), std::move(after_cond.false_flow), false); + return std::move(after_cond.true_flow); } - static void process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type) { + static FlowContext process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type, FlowContext&& flow) { if (auto v_ref = catch_var->try_as(); v_ref && v_ref->sym) { // not underscore - assign_inferred_type(v_ref->sym->try_as(), catch_var_type); + LocalVarPtr var_ref = v_ref->sym->try_as(); + assign_inferred_type(var_ref, catch_var_type); + flow.register_known_type(SinkExpression(var_ref), catch_var_type); } assign_inferred_type(catch_var, catch_var_type); + return flow; } - void process_try_catch_statement(V v) { - process_any_statement(v->get_try_body()); + FlowContext process_try_catch_statement(V v, FlowContext&& flow) { + FlowContext before_try = flow.clone(); + FlowContext try_end = process_any_statement(v->get_try_body(), std::move(flow)); // `catch` has exactly 2 variables: excNo and arg (when missing, they are implicit underscores) // `arg` is a curious thing, it can be any TVM primitive, so assign unknown to it // hence, using `fInt(arg)` (int from parameter is a target type) or `arg as slice` works well // it's not truly correct, because `arg as (int,int)` also compiles, but can never happen, but let it be user responsibility + FlowContext catch_flow = std::move(before_try); tolk_assert(v->get_catch_expr()->size() == 2); std::vector types_list = {TypeDataInt::create(), TypeDataUnknown::create()}; - process_catch_variable(v->get_catch_expr()->get_item(0), types_list[0]); - process_catch_variable(v->get_catch_expr()->get_item(1), types_list[1]); + catch_flow = process_catch_variable(v->get_catch_expr()->get_item(0), types_list[0], std::move(catch_flow)); + catch_flow = process_catch_variable(v->get_catch_expr()->get_item(1), types_list[1], std::move(catch_flow)); assign_inferred_type(v->get_catch_expr(), TypeDataTensor::create(std::move(types_list))); - process_any_statement(v->get_catch_body()); + FlowContext catch_end = process_any_statement(v->get_catch_body(), std::move(catch_flow)); + return FlowContext::merge_flow(std::move(try_end), std::move(catch_end)); + } + + FlowContext process_expression_statement(AnyExprV v, FlowContext&& flow) { + ExprFlow after_v = infer_any_expr(v, std::move(flow), false); + return std::move(after_v.out_flow); } public: static void assign_fun_full_type(FunctionPtr fun_ref, TypePtr inferred_return_type) { - // calculate function full type `fun(params) -> ret_type` + // calculate function full type `(params) -> ret_type` std::vector params_types; params_types.reserve(fun_ref->get_num_params()); for (const LocalVarData& param : fun_ref->parameters) { @@ -1250,18 +1195,40 @@ public: assign_inferred_type(fun_ref, inferred_return_type, TypeDataFunCallable::create(std::move(params_types), inferred_return_type)); } - void start_visiting_function(FunctionPtr fun_ref, V v_function) { + void start_visiting_function(FunctionPtr fun_ref, V v_function) { + TypePtr inferred_return_type = fun_ref->declared_return_type; if (fun_ref->is_code_function()) { - current_function = fun_ref; - process_any_statement(v_function->get_body()); - current_function = nullptr; + FlowContext body_start; + for (const LocalVarData& param : fun_ref->parameters) { + body_start.register_known_type(SinkExpression(¶m), param.declared_type); + } - if (fun_ref->is_implicit_return()) { - bool is_ok_with_void = fun_ref->declared_return_type - ? fun_ref->declared_return_type->can_rhs_be_assigned(TypeDataVoid::create()) - : return_unifier.unify_with_implicit_return_void(); - if (!is_ok_with_void || fun_ref->does_return_self()) { - throw ParseError(v_function->get_body()->as()->loc_end, "missing return"); + cur_f = fun_ref; + FlowContext body_end = process_any_statement(v_function->get_body(), std::move(body_start)); + cur_f = nullptr; + + if (!body_end.is_unreachable()) { + fun_ref->mutate()->assign_is_implicit_return(); + } + + if (!fun_ref->declared_return_type) { + TypeInferringUnifyStrategy return_unifier; + if (fun_ref->does_return_self()) { + return_unifier.unify_with(fun_ref->parameters[0].declared_type); + } + for (AnyExprV return_value : return_statements) { + if (!return_unifier.unify_with(return_value->inferred_type)) { + fire(cur_f, return_value->loc, "can not unify type " + to_string(return_value) + " with previous return type " + to_string(return_unifier.get_result())); + } + } + if (!body_end.is_unreachable()) { + if (!return_unifier.unify_with_implicit_return_void()) { + fire(cur_f, v_function->get_body()->as()->loc_end, "missing return"); + } + } + inferred_return_type = return_unifier.get_result(); + if (inferred_return_type == nullptr && body_end.is_unreachable()) { + inferred_return_type = TypeDataVoid::create(); } } } else { @@ -1269,7 +1236,6 @@ public: tolk_assert(fun_ref->declared_return_type); } - TypePtr inferred_return_type = fun_ref->declared_return_type ? fun_ref->declared_return_type : return_unifier.get_result(); assign_fun_full_type(fun_ref, inferred_return_type); fun_ref->mutate()->assign_is_type_inferring_done(); } @@ -1283,7 +1249,7 @@ public: } static void start_visiting_function(FunctionPtr fun_ref, V v_function) { - InferCheckTypesAndCallsAndFieldsVisitor visitor; + InferTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, v_function); } }; @@ -1298,20 +1264,20 @@ static void infer_and_save_return_type_of_function(FunctionPtr fun_ref) { tolk_assert(!fun_ref->is_generic_function() && !fun_ref->is_type_inferring_done()); // if `g` has return type declared, like `fun g(): int { ... }`, don't traverse its body if (fun_ref->declared_return_type) { - InferCheckTypesAndCallsAndFieldsVisitor::assign_fun_full_type(fun_ref, fun_ref->declared_return_type); + InferTypesAndCallsAndFieldsVisitor::assign_fun_full_type(fun_ref, fun_ref->declared_return_type); return; } // prevent recursion of untyped functions, like `fun f() { return g(); } fun g() { return f(); }` bool contains = std::find(called_stack.begin(), called_stack.end(), fun_ref) != called_stack.end(); if (contains) { - fun_ref->ast_root->error("could not infer return type of " + to_string(fun_ref) + ", because it appears in a recursive call chain; specify `: ` manually"); + fire(fun_ref, fun_ref->loc, "could not infer return type of " + to_string(fun_ref) + ", because it appears in a recursive call chain; specify `: ` manually"); } // dig into g's body; it's safe, since the compiler is single-threaded // on finish, fun_ref->inferred_return_type is filled, and won't be called anymore called_stack.push_back(fun_ref); - InferCheckTypesAndCallsAndFieldsVisitor visitor; + InferTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); called_stack.pop_back(); } @@ -1321,7 +1287,7 @@ void pipeline_infer_types_and_calls_and_fields() { } void pipeline_infer_types_and_calls_and_fields(FunctionPtr fun_ref) { - InferCheckTypesAndCallsAndFieldsVisitor visitor; + InferTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); } diff --git a/tolk/pipe-resolve-identifiers.cpp b/tolk/pipe-resolve-identifiers.cpp index 95229d20..5a735885 100644 --- a/tolk/pipe-resolve-identifiers.cpp +++ b/tolk/pipe-resolve-identifiers.cpp @@ -59,20 +59,20 @@ namespace tolk { GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_undefined_symbol(V v) { +static void fire_error_undefined_symbol(FunctionPtr cur_f, V v) { if (v->name == "self") { - v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + throw ParseError(cur_f, v->loc, "using `self` in a non-member function (it does not accept the first `self` parameter)"); } else { - v->error("undefined symbol `" + static_cast(v->name) + "`"); + throw ParseError(cur_f, v->loc, "undefined symbol `" + static_cast(v->name) + "`"); } } GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_unknown_type_name(SrcLocation loc, const std::string &text) { - throw ParseError(loc, "unknown type name `" + text + "`"); +static void fire_error_unknown_type_name(FunctionPtr cur_f, SrcLocation loc, const std::string &text) { + throw ParseError(cur_f, loc, "unknown type name `" + text + "`"); } -static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { +static void check_import_exists_when_using_sym(FunctionPtr cur_f, AnyV v_usage, const Symbol* used_sym) { SrcLocation sym_loc = used_sym->loc; if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { const SrcFile* declared_in = sym_loc.get_src_file(); @@ -83,7 +83,7 @@ static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_ } } if (!has_import) { - v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + throw ParseError(cur_f, v_usage->loc, "Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); } } } @@ -137,38 +137,39 @@ struct NameAndScopeResolver { struct TypeDataResolver { GNU_ATTRIBUTE_NOINLINE - static TypePtr resolve_identifiers_in_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { - return type_data->replace_children_custom([genericTs](TypePtr child) { + static TypePtr resolve_identifiers_in_type_data(FunctionPtr cur_f, TypePtr type_data, const GenericsDeclaration* genericTs) { + return type_data->replace_children_custom([cur_f, genericTs](TypePtr child) { if (const TypeDataUnresolved* un = child->try_as()) { if (genericTs && genericTs->has_nameT(un->text)) { std::string nameT = un->text; return TypeDataGenericT::create(std::move(nameT)); } if (un->text == "auto") { - throw ParseError(un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed"); + throw ParseError(cur_f, un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed"); } if (un->text == "self") { - throw ParseError(un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); + throw ParseError(cur_f, un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); } - fire_error_unknown_type_name(un->loc, un->text); + fire_error_unknown_type_name(cur_f, un->loc, un->text); } return child; }); } }; -static TypePtr finalize_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { +static TypePtr finalize_type_data(FunctionPtr cur_f, TypePtr type_data, const GenericsDeclaration* genericTs) { if (!type_data || !type_data->has_unresolved_inside()) { return type_data; } - return TypeDataResolver::resolve_identifiers_in_type_data(type_data, genericTs); + return TypeDataResolver::resolve_identifiers_in_type_data(cur_f, type_data, genericTs); } class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { // more correctly this field shouldn't be static, but currently there is no need to make it a part of state static NameAndScopeResolver current_scope; - static FunctionPtr current_function; + static FunctionPtr cur_f; + static const GenericsDeclaration* current_genericTs; static LocalVarPtr create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) { LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, declared_type, immutable * LocalVarData::flagImmutable, -1); @@ -188,15 +189,15 @@ protected: if (v->marked_as_redef) { const Symbol* sym = current_scope.lookup_symbol(v->get_name()); if (sym == nullptr) { - v->error("`redef` for unknown variable"); + throw ParseError(cur_f, v->loc, "`redef` for unknown variable"); } LocalVarPtr var_ref = sym->try_as(); if (!var_ref) { - v->error("`redef` for unknown variable"); + throw ParseError(cur_f, v->loc, "`redef` for unknown variable"); } v->mutate()->assign_var_ref(var_ref); } else { - TypePtr declared_type = finalize_type_data(v->declared_type, current_function->genericTs); + TypePtr declared_type = finalize_type_data(cur_f, v->declared_type, current_genericTs); LocalVarPtr var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable); v->mutate()->assign_resolved_type(declared_type); v->mutate()->assign_var_ref(var_ref); @@ -211,20 +212,20 @@ protected: void visit(V v) override { const Symbol* sym = current_scope.lookup_symbol(v->get_name()); if (!sym) { - fire_error_undefined_symbol(v->get_identifier()); + fire_error_undefined_symbol(cur_f, v->get_identifier()); } v->mutate()->assign_sym(sym); // for global functions, global vars and constants, `import` must exist if (!sym->try_as()) { - check_import_exists_when_using_sym(v, sym); + check_import_exists_when_using_sym(cur_f, v, sym); } // for `f` / `f`, resolve "MyAlias" and "T" // (for function call `f()`, this v (ast_reference `f`) is callee) if (auto v_instantiationTs = v->get_instantiationTs()) { for (int i = 0; i < v_instantiationTs->size(); ++i) { - TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + TypePtr substituted_type = finalize_type_data(cur_f, v_instantiationTs->get_item(i)->substituted_type, current_genericTs); v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); } } @@ -235,7 +236,7 @@ protected: // (for function call `t.tupleAt()`, this v (ast_dot_access `t.tupleAt`) is callee) if (auto v_instantiationTs = v->get_instantiationTs()) { for (int i = 0; i < v_instantiationTs->size(); ++i) { - TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + TypePtr substituted_type = finalize_type_data(cur_f, v_instantiationTs->get_item(i)->substituted_type, current_genericTs); v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); } } @@ -243,7 +244,7 @@ protected: } void visit(V v) override { - TypePtr cast_to_type = finalize_type_data(v->cast_to_type, current_function->genericTs); + TypePtr cast_to_type = finalize_type_data(cur_f, v->cast_to_type, current_genericTs); v->mutate()->assign_resolved_type(cast_to_type); parent::visit(v->get_expr()); } @@ -284,16 +285,17 @@ public: } void start_visiting_function(FunctionPtr fun_ref, V v) override { - current_function = fun_ref; + cur_f = fun_ref; + current_genericTs = fun_ref->genericTs; for (int i = 0; i < v->get_num_params(); ++i) { const LocalVarData& param_var = fun_ref->parameters[i]; - TypePtr declared_type = finalize_type_data(param_var.declared_type, fun_ref->genericTs); + TypePtr declared_type = finalize_type_data(cur_f, param_var.declared_type, fun_ref->genericTs); v->get_param(i)->mutate()->assign_param_ref(¶m_var); v->get_param(i)->mutate()->assign_resolved_type(declared_type); param_var.mutate()->assign_resolved_type(declared_type); } - TypePtr return_type = finalize_type_data(fun_ref->declared_return_type, fun_ref->genericTs); + TypePtr return_type = finalize_type_data(cur_f, fun_ref->declared_return_type, fun_ref->genericTs); v->mutate()->assign_resolved_type(return_type); fun_ref->mutate()->assign_resolved_type(return_type); @@ -308,12 +310,14 @@ public: tolk_assert(current_scope.scopes.empty()); } - current_function = nullptr; + current_genericTs = nullptr; + cur_f = nullptr; } }; NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; -FunctionPtr AssignSymInsideFunctionVisitor::current_function = nullptr; +FunctionPtr AssignSymInsideFunctionVisitor::cur_f = nullptr; +const GenericsDeclaration* AssignSymInsideFunctionVisitor::current_genericTs = nullptr; void pipeline_resolve_identifiers_and_assign_symbols() { AssignSymInsideFunctionVisitor visitor; @@ -324,14 +328,16 @@ void pipeline_resolve_identifiers_and_assign_symbols() { visitor.start_visiting_function(v_func->fun_ref, v_func); } else if (auto v_global = v->try_as()) { - TypePtr declared_type = finalize_type_data(v_global->var_ref->declared_type, nullptr); + TypePtr declared_type = finalize_type_data(nullptr, v_global->var_ref->declared_type, nullptr); v_global->mutate()->assign_resolved_type(declared_type); v_global->var_ref->mutate()->assign_resolved_type(declared_type); - } else if (auto v_const = v->try_as(); v_const && v_const->declared_type) { - TypePtr declared_type = finalize_type_data(v_const->const_ref->declared_type, nullptr); - v_const->mutate()->assign_resolved_type(declared_type); - v_const->const_ref->mutate()->assign_resolved_type(declared_type); + } else if (auto v_const = v->try_as()) { + if (v_const->declared_type) { + TypePtr declared_type = finalize_type_data(nullptr, v_const->const_ref->declared_type, nullptr); + v_const->mutate()->assign_resolved_type(declared_type); + v_const->const_ref->mutate()->assign_resolved_type(declared_type); + } } } } diff --git a/tolk/pipeline.h b/tolk/pipeline.h index ab65ef80..0a71d751 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -35,8 +35,8 @@ void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, con void pipeline_register_global_symbols(); void pipeline_resolve_identifiers_and_assign_symbols(); void pipeline_calculate_rvalue_lvalue(); -void pipeline_detect_unreachable_statements(); void pipeline_infer_types_and_calls_and_fields(); +void pipeline_check_inferred_types(); void pipeline_refine_lvalue_for_mutate_arguments(); void pipeline_check_rvalue_lvalue(); void pipeline_check_pure_impure_operations(); diff --git a/tolk/smart-casts-cfg.cpp b/tolk/smart-casts-cfg.cpp new file mode 100644 index 00000000..7b86f519 --- /dev/null +++ b/tolk/smart-casts-cfg.cpp @@ -0,0 +1,472 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "smart-casts-cfg.h" +#include "ast.h" +#include "tolk.h" + +/* + * This file represents internals of AST-level control flow and data flow analysis. + * Data flow is mostly used for smart casts and is calculated AT THE TIME of type inferring. + * Not before, not after, but simultaneously with type inferring, because any local variable can be smart cast, + * which affects other expressions/variables types, generics instantiation, return auto-infer, etc. + * Though it's a part of type inferring, it's extracted as a separate file to keep inferring a bit clearer. + * + * Control flow is represented NOT as a "graph with edges". Instead, it's a "structured DFS" for the AST: + * 1) at every point of inferring, we have "current flow facts" (FlowContext) + * 2) when we see an `if (...)`, we create two derived contexts (by cloning current) + * 3) after `if`, finalize them at the end and unify + * 4) if we detect unreachable code, we mark that path's context as "unreachable" + * In other words, we get the effect of a CFG but in a more direct approach. That's enough for AST-level data-flow. + * + * FlowContext contains "data-flow facts that are definitely known": variables types (original or refined), + * sign state (definitely positive, definitely zero, etc.), boolean state (definitely true, definitely false). + * Each local variable is contained there, and possibly sub-fields of tensors/objects if definitely known: + * // current facts: x is int?, t is (int, int) + * if (x != null && t.0 > 0) + * // current facts: x is int, t is (int, int), t.0 is positive + * else + * // current facts: x is null, t is (int, int), t.0 is not positive + * When branches rejoin, facts are merged back (int+null = int? and so on, here they would be equal to before if). + * Another example: + * // current facts: x is int? + * if (x == null) { + * // current facts: x is null + * x = 1; + * // current facts: x is int + * } // else branch is empty, its facts are: x is int + * // current facts (after rejoin): x is int + * + * Every expression analysis result (performed along with type inferring) returns ExprFlow: + * 1) out_flow: facts after evaluating the whole expression, no matter how it evaluates (true or false) + * 2) true_flow: the environment if expression is definitely true + * 3) false_flow: the environment if expression is definitely false + * + * Note, that globals are NOT analyzed (smart casts work for locals only). The explanation is simple: + * don't encourage to use a global twice, it costs gas, better assign it to a local. + * See SinkExpression. + * + * An important highlight about internal structure of tensors / tuples / objects and `t.1` sink expressions. + * When a tensor/object is assigned, its fields are NOT tracked individually. + * For better understanding, I'll give some examples in TypeScript (having the same behavior): + * interface User { id: number | string, ... } + * var u: User = { id: 123, ... } + * u.id // it's number|string, not number + * u = { id: 'asdf', ... } + * u.id // it's number|string, not string + * if (typeof u.id === 'string') { + * // here `u.id` is string (smart cast) + * } + * u.id = 123; + * u.id // now it's number (smart cast) (until `u.id` or `u` are reassigned) + * // but `u` still has type `{ id: number | string, ... }`, not `{ id: number, ... }`; only `u.id` is refined + * The same example, but with nullable tensor in Tolk: + * var t: (int?, ...) = (123, ...) + * t.0 // it's int?, not int + * t = (null, ...) + * t.0 // it's int?, not null + * if (t.0 == null) { + * // here `t.0` is null (smart cast) + * } + * t.0 = 123; + * t.0 // now it's int (smart cast) (until `t.0` or `t` are reassigned) + * // but `t` still has type `(int?, ...)`, not `(int, ...)`; only `t.0` is refined + * + * In the future, not only smart casts, but other data-flow analysis can be implemented. + * 1) detect signs: `if (x > 0) { ... if (x < 0)` to warn always false + * 2) detect always true/false: `if (x) { return; } ... if (!x)` to warn always true + * These potential improvements are SignState and BoolState. Now they are NOT IMPLEMENTED, though declared. + * Their purpose is to show, that data flow is not only about smart casts, but eventually for other facts also. + * (though it's not obvious whether they should be analyzed at AST level or at IR level, like constants now) + */ + +namespace tolk { + +std::string SinkExpression::to_string() const { + std::string result = var_ref->name; + uint64_t cur_path = index_path; + while (cur_path != 0) { + result += "."; + result += std::to_string((cur_path & 0xFF) - 1); + cur_path >>= 8; + } + return result; +} + +static std::string to_string(SignState s) { + static const char* txt[6 + 1] = {"sign=unknown", ">0", "<0", "=0", ">=0", "<=0", "sign=never"}; + return txt[static_cast(s)]; +} + +static std::string to_string(BoolState s) { + static const char* txt[4 + 1] = {"unknown", "always_true", "always_false", "bool=never"}; + return txt[static_cast(s)]; +} + +// from `expr!` get `expr` +static AnyExprV unwrap_not_null_operator(AnyExprV expr) { + while (auto v_not_null = expr->try_as()) { + expr = v_not_null->get_expr(); + } + return expr; +} + +// "type lca" for a and b is T, so that both are assignable to T +// it's used +// 1) for auto-infer return type of the function if not specified +// example: `fun f(x: int?) { ... return 1; ... return x; }`; lca(`int`,`int?`) = `int?` +// 2) for auto-infer type of ternary and `match` expressions +// example: `cond ? beginCell() : null`; lca(`builder`,`null`) = `builder?` +// 3) when two data flows rejoin +// example: `if (tensorVar != null) ... else ...` rejoin `(int,int)` and `null` into `(int,int)?` +// when lca can't be calculated (example: `(int,int)` and `(int,int,int)`), nullptr is returned +static TypePtr calculate_type_lca(TypePtr a, TypePtr b) { + if (a == b) { + return a; + } + if (a == TypeDataNever::create()) { + return b; + } + if (b == TypeDataNever::create()) { + return a; + } + + if (a->can_rhs_be_assigned(b)) { + return a; + } + if (b->can_rhs_be_assigned(a)) { + return b; + } + + if (a == TypeDataUnknown::create() || b == TypeDataUnknown::create()) { + return TypeDataUnknown::create(); + } + + if (a == TypeDataNullLiteral::create()) { + return TypeDataNullable::create(b); + } + if (b == TypeDataNullLiteral::create()) { + return TypeDataNullable::create(a); + } + + const auto* tensor1 = a->try_as(); + const auto* tensor2 = b->try_as(); + if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { + std::vector types_lca; + types_lca.reserve(tensor1->size()); + for (int i = 0; i < tensor1->size(); ++i) { + TypePtr next = calculate_type_lca(tensor1->items[i], tensor2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTensor::create(std::move(types_lca)); + } + + const auto* tuple1 = a->try_as(); + const auto* tuple2 = b->try_as(); + if (tuple1 && tuple2 && tuple1->size() == tuple2->size()) { + std::vector types_lca; + types_lca.reserve(tuple1->size()); + for (int i = 0; i < tuple1->size(); ++i) { + TypePtr next = calculate_type_lca(tuple1->items[i], tuple2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTypedTuple::create(std::move(types_lca)); + } + + return nullptr; +} + +// merge (unify) of two sign states: what sign do we definitely have +// it's used on data flow rejoin +// example: `if (x > 0) ... else ...`; lca(Positive, NonPositive) = Unknown +SignState calculate_sign_lca(SignState a, SignState b) { + using s = SignState; + // a transformation lookup table, using the following rules: + // 1) if one is Unknown, the result is Unknown ("no definite constraints") + // 2) if one is Never (can't happen), the result is the other + // example: x is known > 0 already, given code `if (x > 0) {} else {}` merges Positive (always true) and Never + // 3) handle all other combinations carefully + static constexpr SignState transformations[7][7] = { + // b= Unknown | Positive | Negative | Zero | NonNegative | NonPositive | Never | + /* a=Unknown */ {s::Unknown, s::Unknown, s::Unknown, s::Unknown, s::Unknown, s::Unknown, s::Unknown }, + /* a=Positive */ {s::Unknown, s::Positive, s::Unknown, s::NonNegative, s::NonNegative, s::Unknown, s::Positive }, + /* a=Negative */ {s::Unknown, s::Unknown, s::Negative, s::NonPositive, s::Unknown, s::NonPositive, s::Negative }, + /* a=Zero */ {s::Unknown, s::NonNegative, s::NonPositive, s::Zero, s::NonNegative, s::NonPositive, s::Zero }, + /* a=NonNegative */ {s::Unknown, s::NonNegative, s::Unknown, s::NonNegative, s::NonNegative, s::Unknown, s::NonNegative}, + /* a=NonPositive */ {s::Unknown, s::Unknown, s::NonPositive, s::NonPositive, s::Unknown, s::NonPositive, s::NonPositive}, + /* a=Never */ {s::Unknown, s::Positive, s::Negative, s::Zero, s::NonNegative, s::NonPositive, s::Never } + }; + + return transformations[static_cast(a)][static_cast(b)]; +} + +// merge (unify) two bool state: what state do we definitely have +// it's used on data flow rejoin +// example: `if (x) ... else ...`; lca(AlwaysTrue, AlwaysFalse) = Unknown +BoolState calculate_bool_lca(BoolState a, BoolState b) { + using s = BoolState; + static constexpr BoolState transformations[4][4] = { + // b= Unknown | AlwaysTrue | AlwaysFalse | Never | + /* a=Unknown */ {s::Unknown, s::Unknown, s::Unknown, s::Unknown }, + /* a=AlwaysTrue */ {s::Unknown, s::AlwaysTrue, s::Unknown, s::AlwaysTrue }, + /* a=AlwaysFalse */ {s::Unknown, s::Unknown, s::AlwaysFalse, s::AlwaysFalse}, + /* a=Never */ {s::Unknown, s::AlwaysTrue, s::AlwaysFalse, s::Never } + }; + + return transformations[static_cast(a)][static_cast(b)]; +} + +// see comments above TypeInferringUnifyStrategy +// this function calculates lca or currently stored result and next +bool TypeInferringUnifyStrategy::unify_with(TypePtr next) { + if (unified_result == nullptr) { + unified_result = next; + return true; + } + if (unified_result == next) { + return true; + } + + TypePtr combined = calculate_type_lca(unified_result, next); + if (!combined) { + return false; + } + + unified_result = combined; + return true; +} + +bool TypeInferringUnifyStrategy::unify_with_implicit_return_void() { + if (unified_result == nullptr) { + unified_result = TypeDataVoid::create(); + return true; + } + + return unified_result == TypeDataVoid::create(); +} + +// invalidate knowledge about sub-fields of a variable or its field +// example: `tensorVar = 2`, invalidate facts about `tensorVar`, `tensorVar.0`, `tensorVar.1.2`, and all others +// example: `user.id = rhs`, invalidate facts about `user.id` (sign, etc.) and `user.id.*` if exist +void FlowContext::invalidate_all_subfields(LocalVarPtr var_ref, uint64_t parent_path, uint64_t parent_mask) { + for (auto it = known_facts.begin(); it != known_facts.end();) { + bool is_self_or_field = it->first.var_ref == var_ref && (it->first.index_path & parent_mask) == parent_path; + if (is_self_or_field) { + it = known_facts.erase(it); + } else { + ++it; + } + } +} + +// update current type of `local_var` / `tensorVar.0` / `obj.field` +// example: `local_var = rhs` +// example: `f(mutate obj.field)` +// example: `if (t.0 != null)`, in true_flow `t.0` assigned to "not-null of current", in false_flow to null +void FlowContext::register_known_type(SinkExpression s_expr, TypePtr assigned_type) { + // having index_path = (some bytes filled in the end), + // calc index_mask: replace every filled byte with 0xFF + // example: `t.0.1`, index_path = (1<<8) + 2, index_mask = 0xFFFF + uint64_t index_path = s_expr.index_path; + uint64_t index_mask = 0; + while (index_path > 0) { + index_mask = index_mask << 8 | 0xFF; + index_path >>= 8; + } + invalidate_all_subfields(s_expr.var_ref, s_expr.index_path, index_mask); + + // if just `int` assigned, we have no considerations about its sign + // so, even if something existed by the key s_expr, drop all knowledge + known_facts[s_expr] = FactsAboutExpr(assigned_type, SignState::Unknown, BoolState::Unknown); +} + +// mark control flow unreachable / interrupted +void FlowContext::mark_unreachable(UnreachableKind reason) { + unreachable = true; + // currently we don't save why control flow became unreachable (it's not obvious how, there may be consequent reasons), + // but it helps debugging and reading outer code + static_cast(reason); +} + + +// "merge" two data-flow contexts occurs on control flow rejoins (if/else branches merging, for example) +// it's generating a new context that describes "knowledge that definitely outcomes from these two" +// example: in one branch x is `int`, in x is `null`, result is `int?` unless any of them is unreachable +FlowContext FlowContext::merge_flow(FlowContext&& c1, FlowContext&& c2) { + if (!c1.unreachable && c2.unreachable) { + return merge_flow(std::move(c2), std::move(c1)); + } + + std::map unified; + + if (c1.unreachable && !c2.unreachable) { + // `if (...) return; else ...;` — copy facts about common variables only from else (c2) + for (const auto& [s_expr, i2] : c2.known_facts) { + auto it1 = c1.known_facts.find(s_expr); + bool need_add = it1 != c1.known_facts.end() || s_expr.index_path != 0; + if (need_add) { + unified.emplace(s_expr, i2); + } + } + + } else { + // either both reachable, or both not — merge types and restrictions of common variables and fields + for (const auto& [s_expr, i1] : c1.known_facts) { + if (auto it2 = c2.known_facts.find(s_expr); it2 != c2.known_facts.end()) { + const FactsAboutExpr& i2 = it2->second; + unified.emplace(s_expr, i1 == i2 ? i1 : FactsAboutExpr( + calculate_type_lca(i1.expr_type, i2.expr_type), + calculate_sign_lca(i1.sign_state, i2.sign_state), + calculate_bool_lca(i1.bool_state, i2.bool_state) + )); + } + } + } + + return FlowContext(std::move(unified), c1.unreachable && c2.unreachable); +} + +// return `T`, so that `T?` = type +// what for: `if (x != null)`, to smart cast x inside if +TypePtr calculate_type_subtract_null(TypePtr type) { + if (const auto* as_nullable = type->try_as()) { + return as_nullable->inner; + } + // union types will be handled here + return TypeDataNever::create(); +} + +// given any expression vertex, extract SinkExpression is possible +// example: `x.0` is { var_ref: x, index_path: 1 } +// example: `x.1` is { var_ref: x, index_path: 2 } +// example: `x!.1` is the same +// example: `x.1.2` is { var_ref: x, index_path: 2<<8 + 3 } +// example: `x!.1!.2` is the same +// not SinkExpressions: `globalVar` / `f()` / `obj.method().1` +SinkExpression extract_sink_expression_from_vertex(AnyExprV v) { + if (auto as_ref = v->try_as()) { + if (LocalVarPtr var_ref = as_ref->sym->try_as()) { + return SinkExpression(var_ref); + } + } + + if (auto as_dot = v->try_as(); as_dot && as_dot->is_target_indexed_access()) { + V cur_dot = as_dot; + uint64_t index_path = 0; + while (cur_dot->is_target_indexed_access()) { + int index_at = std::get(cur_dot->target); + index_path = (index_path << 8) + index_at + 1; + if (auto parent_dot = unwrap_not_null_operator(cur_dot->get_obj())->try_as()) { + cur_dot = parent_dot; + } else { + break; + } + } + if (auto as_ref = unwrap_not_null_operator(cur_dot->get_obj())->try_as()) { + if (LocalVarPtr var_ref = as_ref->sym->try_as()) { + return SinkExpression(var_ref, index_path); + } + } + } + + if (auto as_par = v->try_as()) { + return extract_sink_expression_from_vertex(as_par->get_expr()); + } + + if (auto as_assign = v->try_as()) { + return extract_sink_expression_from_vertex(as_assign->get_lhs()); + } + + return {}; +} + +// given `lhs = rhs`, calculate "original" type of `lhs` +// example: `var x: int? = ...; if (x != null) { x (here) = null; }` +// "(here)" x is `int` (smart cast), but originally declared as `int?` +// example: `if (x is (int,int)?) { x!.0 = rhs }`, here `x!.0` is `int` +TypePtr calc_declared_type_before_smart_cast(AnyExprV v) { + if (auto as_ref = v->try_as()) { + if (LocalVarPtr var_ref = as_ref->sym->try_as()) { + return var_ref->declared_type; + } + } + + if (auto as_dot = v->try_as(); as_dot && as_dot->is_target_indexed_access()) { + TypePtr obj_type = as_dot->get_obj()->inferred_type; // v already inferred; hence, index_at is correct + int index_at = std::get(as_dot->target); + if (const auto* t_tensor = obj_type->try_as()) { + return t_tensor->items[index_at]; + } + if (const auto* t_tuple = obj_type->try_as()) { + return t_tuple->items[index_at]; + } + } + + return v->inferred_type; +} + +// given `lhs = rhs` (and `var x = rhs`), calculate probable smart cast for lhs +// it's NOT directly type of rhs! see comment at the top of the file about internal structure of tensors/tuples. +// obvious example: `var x: int? = 5`, it's `int` (most cases are like this) +// obvious example: `var x: (int,int)? = null`, it's `null` (`x == null` is always true, `x` can be passed to any `T?`) +// not obvious example: `var x: (int?, int?)? = (3,null)`, result is `(int?,int?)`, whereas type of rhs is `(int,null)` +TypePtr calc_smart_cast_type_on_assignment(TypePtr lhs_declared_type, TypePtr rhs_inferred_type) { + // assign `T` to `T?` (or at least "assignable-to-T" to "T?") + // smart cast to `T` + if (const auto* lhs_nullable = lhs_declared_type->try_as()) { + if (lhs_nullable->inner->can_rhs_be_assigned(rhs_inferred_type)) { + return lhs_nullable->inner; + } + } + + // assign `null` to `T?` + // smart cast to `null` + if (lhs_declared_type->try_as() && rhs_inferred_type == TypeDataNullLiteral::create()) { + return TypeDataNullLiteral::create(); + } + + // no smart cast, type is the same as declared + // example: `var x: (int?,slice?) = (1, null)`, it's `(int?,slice?)`, not `(int,null)` + return lhs_declared_type; +} + + +std::ostream& operator<<(std::ostream& os, const FlowContext& flow) { + os << "(" << flow.known_facts.size() << " facts) " << (flow.unreachable ? "(unreachable) " : ""); + for (const auto& [s_expr, facts] : flow.known_facts) { + os << ", " << s_expr.to_string() << ": " << facts; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const FactsAboutExpr& facts) { + os << facts.expr_type; + if (facts.sign_state != SignState::Unknown) { + os << " " << to_string(facts.sign_state); + } + if (facts.bool_state != BoolState::Unknown) { + os << " " << to_string(facts.bool_state); + } + return os; +} + +} // namespace tolk diff --git a/tolk/smart-casts-cfg.h b/tolk/smart-casts-cfg.h new file mode 100644 index 00000000..7321f952 --- /dev/null +++ b/tolk/smart-casts-cfg.h @@ -0,0 +1,207 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "fwd-declarations.h" +#include "type-system.h" +#include +#include + +namespace tolk { + +/* + * TypeInferringUnifyStrategy unifies types from various branches to a common result (lca). + * It's used to auto infer function return type based on return statements, like in TypeScript. + * Example: `fun f() { ... return 1; ... return null; }` inferred as `int?`. + * + * Besides function returns, it's also used for ternary `return cond ? 1 : null` and `match` expression. + * If types can't be unified (a function returns int and cell, for example), `unify()` returns false, handled outside. + * BTW, don't confuse this way of inferring with Hindley-Milner, they have nothing in common. + */ +class TypeInferringUnifyStrategy { + TypePtr unified_result = nullptr; + +public: + bool unify_with(TypePtr next); + bool unify_with_implicit_return_void(); + + TypePtr get_result() const { return unified_result; } +}; + +/* + * SinkExpression is an expression that can be smart cast like `if (x != null)` (x is int inside) + * or analyzed by data flow is some other way like `if (x > 0) ... else ...` (x <= 0 inside else). + * In other words, it "absorbs" data flow facts. + * Examples: `localVar`, `localTensor.1`, `localTuple.1.2.3`, `localObj.field` + * These are NOT sink expressions: `globalVar`, `f()`, `f().1` + * Note, that globals are NOT sink: don't encourage to use a global twice, it costs gas, better assign it to a local. + */ +struct SinkExpression { + LocalVarPtr const var_ref; // smart casts and data flow applies only to locals + const uint64_t index_path; // 0 for just `v`; for `v.N` it's (N+1), for `v.N.M` it's (N+1) + (M+1)<<8, etc. + + SinkExpression() + : var_ref(nullptr), index_path(0) {} + explicit SinkExpression(LocalVarPtr var_ref) + : var_ref(var_ref), index_path(0) {} + explicit SinkExpression(LocalVarPtr var_ref, uint64_t index_path) + : var_ref(var_ref), index_path(index_path) {} + + SinkExpression(const SinkExpression&) = default; + SinkExpression& operator=(const SinkExpression&) = delete; + + bool operator==(const SinkExpression& rhs) const { return var_ref == rhs.var_ref && index_path == rhs.index_path; } + bool operator<(const SinkExpression& rhs) const { return var_ref == rhs.var_ref ? index_path < rhs.index_path : var_ref < rhs.var_ref; } + explicit operator bool() const { return var_ref != nullptr; } + + std::string to_string() const; +}; + +// UnreachableKind is a reason of why control flow is unreachable or interrupted +// example: `return;` interrupts control flow +// example: `if (true) ... else ...` inside "else" flow is unreachable because it can't happen +enum class UnreachableKind { + Unknown, // no definite info or not unreachable + CantHappen, + ReturnStatement, + CallNeverReturnFunction, +}; + +// SignState is "definitely positive", etc. +// example: inside `if (x > 0)`, x is Positive, in `else` it's NonPositive (if x is local, until reassigned) +enum class SignState { + Unknown, // no definite info + Positive, + Negative, + Zero, + NonNegative, + NonPositive, + Never // can't happen, like "never" type +}; + +// BoolState is "definitely true" or "definitely false" +// example: inside `if (x)`, x is AlwaysTrue, in `else` it's AlwaysFalse +enum class BoolState { + Unknown, // no definite info + AlwaysTrue, + AlwaysFalse, + Never // can't happen, like "never" type +}; + +// FactsAboutExpr represents "everything known about SinkExpression at a given execution point" +// example: after `var x = getNullableInt()`, x is `int?`, sign/bool is Unknown +// example: after `x = 2;`, x is `int`, sign is Positive, bool is AlwaysTrue +// example: inside `if (x != null && x > 0)`, x is `int`, sign is Positive (in else, no definite knowledge) +// remember, that indices/fields are also expressions, `t.1 = 2` or `u.id = 2` also store such facts +// WARNING! Detecting data-flow facts about sign state and bool state is NOT IMPLEMENTED +// (e.g. `if (x > 0)` / `if (!t.1)` is NOT analysed, therefore not updated, always Unknown now) +// it's a potential improvement for the future, for example `if (x > 0) { ... if (x < 0)` to warn always false +// their purpose for now is to show, that data flow is not only about smart casts, but eventually for other facts also +struct FactsAboutExpr { + TypePtr expr_type; // originally declared type or smart cast (Unknown if no info) + SignState sign_state; // definitely positive, etc. (Unknown if no info) + BoolState bool_state; // definitely true/false (Unknown if no info) + + FactsAboutExpr() + : expr_type(nullptr), sign_state(SignState::Unknown), bool_state(BoolState::Unknown) {} + FactsAboutExpr(TypePtr smart_cast_type, SignState sign_state, BoolState bool_state) + : expr_type(smart_cast_type), sign_state(sign_state), bool_state(bool_state) {} + + bool operator==(const FactsAboutExpr& rhs) const = default; +}; + +// FlowContext represents "everything known about control flow at a given execution point" +// while traversing AST, each statement node gets "in" FlowContext (prior knowledge) +// and returns "output" FlowContext (representing a state AFTER execution of a statement) +// on branching, like if/else, input context is cloned, two contexts for each branch calculated, and merged to a result +class FlowContext { + // std::map, not std::unordered_map, because LLDB visualises it better, for debugging + std::map known_facts; // all local vars plus (optionally) indices/fields of tensors/tuples/objects + bool unreachable = false; // if execution can't reach this point (after `return`, for example) + + FlowContext(std::map&& known_facts, bool unreachable) + : known_facts(std::move(known_facts)), unreachable(unreachable) {} + + void invalidate_all_subfields(LocalVarPtr var_ref, uint64_t parent_path, uint64_t parent_mask); + + friend std::ostream& operator<<(std::ostream& os, const FlowContext& flow); + +public: + FlowContext() = default; + FlowContext(FlowContext&&) noexcept = default; + FlowContext(const FlowContext&) = delete; + FlowContext& operator=(FlowContext&&) = default; + FlowContext& operator=(const FlowContext&) = delete; + + FlowContext clone() const { + std::map copy = known_facts; + return FlowContext(std::move(copy), unreachable); + } + + bool is_unreachable() const { return unreachable; } + + TypePtr smart_cast_if_exists(SinkExpression s_expr) const { + auto it = known_facts.find(s_expr); + return it == known_facts.end() ? nullptr : it->second.expr_type; + } + + void register_known_type(SinkExpression s_expr, TypePtr assigned_type); + void mark_unreachable(UnreachableKind reason); + + static FlowContext merge_flow(FlowContext&& c1, FlowContext&& c2); +}; + +struct ExprFlow { + FlowContext out_flow; + + // only calculated inside `if`, left of `&&`, etc. — there this expression is immediate condition, empty otherwise + FlowContext true_flow; + FlowContext false_flow; + + ExprFlow(FlowContext&& out_flow, FlowContext&& true_flow, FlowContext&& false_flow) + : out_flow(std::move(out_flow)) + , true_flow(std::move(true_flow)) + , false_flow(std::move(false_flow)) {} + ExprFlow(FlowContext&& out_flow, const bool clone_flow_for_condition) + : out_flow(std::move(out_flow)) { + if (clone_flow_for_condition) { + true_flow = this->out_flow.clone(); + false_flow = this->out_flow.clone(); + } + } + + ExprFlow(ExprFlow&&) noexcept = default; + ExprFlow(const ExprFlow&) = delete; + ExprFlow& operator=(ExprFlow&&) = delete; + ExprFlow& operator=(const ExprFlow&) = delete; + + int get_always_true_false_state() const { + if (true_flow.is_unreachable() != false_flow.is_unreachable()) { + return false_flow.is_unreachable() ? 1 : 2; // 1 is "always true" + } + return 0; + } +}; + +std::ostream& operator<<(std::ostream& os, const FactsAboutExpr& facts); +std::ostream& operator<<(std::ostream& os, const FlowContext& flow); +TypePtr calculate_type_subtract_null(TypePtr type); +SinkExpression extract_sink_expression_from_vertex(AnyExprV v); +TypePtr calc_declared_type_before_smart_cast(AnyExprV v); +TypePtr calc_smart_cast_type_on_assignment(TypePtr lhs_declared_type, TypePtr rhs_inferred_type); + +} // namespace tolk diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index 52ac3821..1286c1f9 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -18,6 +18,7 @@ #include "compiler-state.h" #include #include +#include namespace tolk { @@ -146,9 +147,10 @@ void SrcLocation::show_context(std::ostream& os) const { return; } SrcFile::SrcPosition pos = src_file->convert_offset(char_offset); - os << " " << pos.line_str << "\n"; + os << std::right << std::setw(4) << pos.line_no << " | "; + os << pos.line_str << "\n"; - os << " "; + os << " " << " | "; for (int i = 1; i < pos.char_no; ++i) { os << ' '; } @@ -193,8 +195,11 @@ std::ostream& operator<<(std::ostream& os, const ParseError& error) { } void ParseError::show(std::ostream& os) const { - os << where << ": error: " << message << std::endl; - where.show_context(os); + os << loc << ": error: " << message << std::endl; + if (current_function) { + os << " // in function `" << current_function->as_human_readable() << "`" << std::endl; + } + loc.show_context(os); } } // namespace tolk diff --git a/tolk/src-file.h b/tolk/src-file.h index 0c82bf18..b0f9cba3 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -124,10 +124,14 @@ struct Fatal final : std::exception { std::ostream& operator<<(std::ostream& os, const Fatal& fatal); struct ParseError : std::exception { - SrcLocation where; + FunctionPtr current_function; + SrcLocation loc; std::string message; - ParseError(SrcLocation _where, std::string _msg) : where(_where), message(std::move(_msg)) { - } + + ParseError(SrcLocation loc, std::string message) + : current_function(nullptr), loc(loc), message(std::move(message)) {} + ParseError(FunctionPtr current_function, SrcLocation loc, std::string message) + : current_function(current_function), loc(loc), message(std::move(message)) {} const char* what() const noexcept override { return message.c_str(); diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index 48b0b89d..51dc3440 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -102,9 +102,6 @@ void LocalVarData::assign_resolved_type(TypePtr declared_type) { } void LocalVarData::assign_inferred_type(TypePtr inferred_type) { -#ifdef TOLK_DEBUG - assert(this->declared_type == nullptr); // called when type declaration omitted, inferred from assigned value -#endif this->declared_type = inferred_type; } diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index cc867c52..71d1969d 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -58,8 +58,8 @@ int tolk_proceed(const std::string &entrypoint_filename) { pipeline_register_global_symbols(); pipeline_resolve_identifiers_and_assign_symbols(); pipeline_calculate_rvalue_lvalue(); - pipeline_detect_unreachable_statements(); pipeline_infer_types_and_calls_and_fields(); + pipeline_check_inferred_types(); pipeline_refine_lvalue_for_mutate_arguments(); pipeline_check_rvalue_lvalue(); pipeline_check_pure_impure_operations(); diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp index 6cd353d5..d73625c2 100644 --- a/tolk/type-system.cpp +++ b/tolk/type-system.cpp @@ -84,6 +84,7 @@ TypePtr TypeDataTuple::singleton; TypePtr TypeDataContinuation::singleton; TypePtr TypeDataNullLiteral::singleton; TypePtr TypeDataUnknown::singleton; +TypePtr TypeDataNever::singleton; TypePtr TypeDataVoid::singleton; void type_system_init() { @@ -96,6 +97,7 @@ void type_system_init() { TypeDataContinuation::singleton = new TypeDataContinuation; TypeDataNullLiteral::singleton = new TypeDataNullLiteral; TypeDataUnknown::singleton = new TypeDataUnknown; + TypeDataNever::singleton = new TypeDataNever; TypeDataVoid::singleton = new TypeDataVoid; } @@ -325,53 +327,56 @@ bool TypeDataInt::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataBool::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataCell::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataSlice::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataBuilder::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataTuple::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataContinuation::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataNullLiteral::can_rhs_be_assigned(TypePtr rhs) const { - return rhs == this; + if (rhs == this) { + return true; + } + return rhs == TypeDataNever::create(); } bool TypeDataNullable::can_rhs_be_assigned(TypePtr rhs) const { @@ -384,11 +389,17 @@ bool TypeDataNullable::can_rhs_be_assigned(TypePtr rhs) const { if (const TypeDataNullable* rhs_nullable = rhs->try_as()) { return inner->can_rhs_be_assigned(rhs_nullable->inner); } - return inner->can_rhs_be_assigned(rhs); + if (inner->can_rhs_be_assigned(rhs)) { + return true; + } + return rhs == TypeDataNever::create(); } bool TypeDataFunCallable::can_rhs_be_assigned(TypePtr rhs) const { - return rhs == this; + if (rhs == this) { + return true; + } + return rhs == TypeDataNever::create(); } bool TypeDataGenericT::can_rhs_be_assigned(TypePtr rhs) const { @@ -405,7 +416,7 @@ bool TypeDataTensor::can_rhs_be_assigned(TypePtr rhs) const { } return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { @@ -417,7 +428,7 @@ bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { } return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataUnknown::can_rhs_be_assigned(TypePtr rhs) const { @@ -429,8 +440,15 @@ bool TypeDataUnresolved::can_rhs_be_assigned(TypePtr rhs) const { return false; } +bool TypeDataNever::can_rhs_be_assigned(TypePtr rhs) const { + return true; +} + bool TypeDataVoid::can_rhs_be_assigned(TypePtr rhs) const { - return rhs == this; + if (rhs == this) { + return true; + } + return rhs == TypeDataNever::create(); } @@ -551,6 +569,10 @@ bool TypeDataUnresolved::can_be_casted_with_as_operator(TypePtr cast_to) const { return false; } +bool TypeDataNever::can_be_casted_with_as_operator(TypePtr cast_to) const { + return true; +} + bool TypeDataVoid::can_be_casted_with_as_operator(TypePtr cast_to) const { return cast_to == this; } @@ -584,6 +606,10 @@ bool TypeDataTensor::can_hold_tvm_null_instead() const { return true; } +bool TypeDataNever::can_hold_tvm_null_instead() const { + return false; +} + bool TypeDataVoid::can_hold_tvm_null_instead() const { return false; } @@ -650,6 +676,7 @@ static TypePtr parse_simple_type(Lexer& lex) { case 5: if (str == "slice") return TypeDataSlice::create(); if (str == "tuple") return TypeDataTuple::create(); + if (str == "never") return TypeDataNever::create(); break; case 7: if (str == "builder") return TypeDataBuilder::create(); diff --git a/tolk/type-system.h b/tolk/type-system.h index 02e50fc2..4b671e30 100644 --- a/tolk/type-system.h +++ b/tolk/type-system.h @@ -409,6 +409,27 @@ public: bool can_be_casted_with_as_operator(TypePtr cast_to) const override; }; +/* + * `never` is a special type meaning "no value can be hold". + * Is may appear due to smart casts, for example `if (x == null && x != null)` makes x "never". + * Functions returning "never" assume to never exit, calling them interrupts control flow. + * Such variables can not be cast to any other types, all their usage will trigger type mismatch errors. + */ +class TypeDataNever final : public TypeData { + TypeDataNever() : TypeData(19ULL, 0, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "never"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + bool can_hold_tvm_null_instead() const override; +}; + /* * `void` is TypeDataVoid. * From the type system point of view, `void` functions return nothing.