1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

[Tolk] Rewrite the type system from Hindley-Milner to static typing

FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)

While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.

This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
   (they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
   ("before", as worked previously, was always a wrong approach)
This commit is contained in:
tolk-vm 2024-12-30 22:31:27 +07:00
parent 3540424aa1
commit 799e2d1265
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
101 changed files with 5402 additions and 2713 deletions

View file

@ -17,17 +17,17 @@ fun createEmptyTuple(): tuple
/// Appends a value to tuple, resulting in `Tuple t' = (x1, ..., xn, value)`.
/// If its size exceeds 255, throws a type check exception.
@pure
fun tuplePush<X>(mutate self: tuple, value: X): void
fun tuplePush<T>(mutate self: tuple, value: T): void
asm "TPUSH";
/// Returns the first element of a non-empty tuple.
@pure
fun tupleFirst<X>(t: tuple): X
fun tupleFirst<T>(t: tuple): T
asm "FIRST";
/// Returns the [`index`]-th element of a tuple.
@pure
fun tupleAt<X>(t: tuple, index: int): X
fun tupleAt<T>(t: tuple, index: int): T
builtin;
/// Returns the size of a tuple (elements count in it).
@ -37,7 +37,7 @@ fun tupleSize(t: tuple): int
/// Returns the last element of a non-empty tuple.
@pure
fun tupleLast(t: tuple): int
fun tupleLast<T>(t: tuple): T
asm "LAST";
@ -306,11 +306,11 @@ fun getBuilderDepth(b: builder): int
*/
/// Dump a variable [x] to the debug log.
fun debugPrint<X>(x: X): void
fun debugPrint<T>(x: T): void
builtin;
/// Dump a string [x] to the debug log.
fun debugPrintString<X>(x: X): void
fun debugPrintString<T>(x: T): void
builtin;
/// Dumps the stack (at most the top 255 values) and shows the total stack depth.

View file

@ -61,3 +61,9 @@ fun calculateOriginalMessageFee(workchain: int, incomingFwdFee: int): int
/// If it has no debt, `0` is returned.
fun getMyStorageDuePayment(): int
asm "DUEPAYMENT";
/// Returns the amount of nanotoncoins charged for storage.
/// (during storage phase preceeding to current computation phase)
@pure
fun getMyStoragePaidPayment(): int
asm "STORAGEFEES";

View file

@ -35,7 +35,7 @@ fun test88(x: int) {
}
@method_id(89)
fun test89(last: int) {
fun test89(last: int): (int, int, int, int) {
var t: tuple = createEmptyTuple();
t.tuplePush(1);
t.tuplePush(2);

View file

@ -9,6 +9,7 @@ fun calc_phi(): int {
repeat (70) { n*=10; };
var p= 1;
var `q`=1;
_=`q`;
do {
(p,q)=(q,p+q);
} while (q <= n); //;;
@ -27,7 +28,7 @@ fun calc_sqrt2(): int {
return mulDivRound(p, n, q);
}
fun calc_root(m: auto): auto {
fun calc_root(m: int) {
var base: int=1;
repeat(70) { base *= 10; }
var (a, b, c) = (1,0,-m);

View file

@ -1,5 +1,5 @@
@deprecated
fun twice(f: auto, x: auto): auto {
fun twice(f: int -> int, x: int) {
return f (f (x));
}

View file

@ -138,5 +138,5 @@ fun main() {
inc CALLDICT // self newY
}>
"""
@code_hash 33262590582878205026101577472505372101182291690814957175155528952950621243206
@code_hash 7627024945492125068389905298530400936797031708759561372406088054030801992712
*/

View file

@ -0,0 +1,28 @@
fun extractFromTypedTuple(params: [int]) {
var [payload: int] = params;
return payload + 10;
}
@method_id(101)
fun test101(x: int) {
var params = [x];
return extractFromTypedTuple(params);
}
fun autoInferIntNull(x: int) {
if (x > 10) { return null; }
return x;
}
fun main(value: int) {
var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2));
if (x == null && y == null) { return null; }
return x == null || y == null ? -1 : x + y;
}
/**
@testcase | 0 | 3 | 9
@testcase | 0 | 6 | -1
@testcase | 0 | 11 | (null)
@testcase | 101 | 78 | 88
*/

View file

@ -4,7 +4,7 @@ fun check_assoc(a: int, b: int, c: int): int {
return op(op(a, b), c) == op(a, op(b, c));
}
fun unnamed_args(_: int, _: slice, _: auto): auto {
fun unnamed_args(_: int, _: slice, _: int) {
return true;
}
@ -14,7 +14,7 @@ fun main(x: int, y: int, z: int): int {
}
@method_id(101)
fun test101(x: int, z: int): auto {
fun test101(x: int, z: int) {
return unnamed_args(x, "asdf", z);
}

View file

@ -1,4 +1,4 @@
fun check_assoc(op: auto, a: int, b: int, c: int) {
fun check_assoc(op: (int, int) -> int, a: int, b: int, c: int) {
return op(op(a, b), c) == op(a, op(b, c));
}

View file

@ -0,0 +1,150 @@
fun eq1<X>(value: X): X { return value; }
fun eq2<X>(value: X) { return value; }
fun eq3<X>(value: X): X { var cp: [X] = [eq1(value)]; var ((([v: X]))) = cp; return v; }
fun eq4<X>(value: X) { return eq1<X>(value); }
@method_id(101)
fun test101(x: int) {
var (a, b, c) = (x, (x,x), [x,x]);
return (eq1(a), eq1(b), eq1(c), eq2(a), eq2(b), eq2(c), eq3(a), eq4(b), eq3(createEmptyTuple()));
}
fun getTwo<X>(): X { return 2 as X; }
fun takeInt(a: int) { return a; }
@method_id(102)
fun test102(): (int, int, int, [(int, int)]) {
var a: int = getTwo();
var _: int = getTwo();
var b = getTwo() as int;
var c: int = 1 ? getTwo() : getTwo();
var c redef = getTwo();
return (eq1<int>(a), eq2<int>(b), takeInt(getTwo()), [(getTwo(), getTwo())]);
}
@method_id(103)
fun test103(first: int): (int, int, int) {
var t = createEmptyTuple();
var cs = beginCell().storeInt(100, 32).endCell().beginParse();
t.tuplePush(first);
t.tuplePush(2);
t.tuplePush(cs);
cs = t.tupleAt(2);
cs = t.tupleAt(2) as slice;
return (t.tupleAt(0), cs.loadInt(32), t.tupleAt<slice>(2).loadInt(32));
}
fun manyEq<T1, T2, T3>(a: T1, b: T2, c: T3): [T1, T2, T3] {
return [a, b, c];
}
@method_id(104)
fun test104(f: int) {
return (
manyEq(1 ? 1 : 1, f ? 0 : null, !f ? getTwo() as int : null),
manyEq((f ? null as int : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool()), 0, eq4(f))
);
}
fun calcSum<X>(x: X, y: X) { return x + y; }
@method_id(105)
fun test105() {
if (0) { calcSum(((0)), null); }
return (calcSum(1, 2));
}
fun calcYPlus1<Y>(value: Y) { return value + 1; }
fun calcLoad32(cs: slice) { return cs.loadInt(32); }
fun calcTensorPlus1(tens: (int, int)) { var (f, s) = tens; return (f + 1, s + 1); }
fun calcTensorMul2(tens: (int, int)) { var (f, s) = tens; return (f * 2, s * 2); }
fun cellToSlice(c: cell) { return c.beginParse(); }
fun abstractTransform<X, Y, R>(xToY: (X) -> Y, yToR: (((Y))) -> R, initialX: X): R {
var y = xToY(initialX);
return yToR(y);
}
@method_id(106)
fun test106() {
var c = beginCell().storeInt(106, 32).endCell();
return [
abstractTransform(cellToSlice, calcLoad32, c),
abstractTransform(calcYPlus1<int>, calcYPlus1<int>, 0),
abstractTransform(calcTensorPlus1, calcTensorMul2, (2, 2))
];
}
fun callTupleFirst<X, Y>(t: X): Y { return t.tupleFirst(); }
fun callTuplePush<T, V>(mutate self: T, v1: V, v2: V): self { self.tuplePush(v1); tuplePush(mutate self, v2); return self; }
fun getTupleLastInt(t: tuple) { return t.tupleLast<int>(); }
fun getTupleSize(t: tuple) { return t.tupleSize(); }
fun callAnyFn<TObj, TResult>(f: (TObj) -> TResult, arg: TObj) { return f(arg); }
fun callAnyFn2<TCallback>(f: TCallback, arg: tuple) { return f(arg); }
global t107: tuple;
@method_id(107)
fun test107() {
t107 = createEmptyTuple();
callTuplePush(mutate t107, 1, 2);
t107.callTuplePush(3, 4).callTuplePush(5, 6);
var first: int = t107.callTupleFirst();
return (
callAnyFn<tuple, int>(getTupleSize, t107),
callAnyFn2(getTupleSize, t107),
first,
callTupleFirst(t107) as int,
callAnyFn(getTupleLastInt, t107),
callAnyFn2(getTupleLastInt, t107)
);
}
global g108: int;
fun inc108(by: int) { g108 += by; }
fun getInc108() { return inc108; }
fun returnResult<RetT>(f: () -> RetT): RetT { return f(); }
fun applyAndReturn<ArgT, RetT>(f: () -> (ArgT) -> RetT, arg: ArgT): () -> ArgT -> RetT {
f()(arg);
return f;
}
@method_id(108)
fun test108() {
g108 = 0;
getInc108()(1);
returnResult<(int) -> void>(getInc108)(2);
applyAndReturn<int, void>(getInc108, 10)()(10);
returnResult(getInc108)(2);
applyAndReturn(getInc108, 10)()(10);
return g108;
}
fun main(x: int): (int, [[int, int]]) {
try { if(x) { throw (1, x); } }
catch (excNo, arg) { return (arg as int, [[eq2(arg as int), getTwo()]]); }
return (0, [[x, 1]]);
}
/**
@testcase | 0 | 1 | 1 [ [ 1 2 ] ]
@testcase | 101 | 0 | 0 0 0 [ 0 0 ] 0 0 0 [ 0 0 ] 0 0 0 []
@testcase | 102 | | 2 2 2 [ 2 2 ]
@testcase | 103 | 0 | 0 100 100
@testcase | 104 | 0 | [ 1 (null) 2 ] [ 2 -1 0 0 ]
@testcase | 105 | | 3
@testcase | 106 | | [ 106 2 6 6 ]
@testcase | 107 | | 6 6 1 1 6 6
@testcase | 108 | | 45
@fif_codegen DECLPROC eq1<int>
@fif_codegen DECLPROC eq1<tuple>
@fif_codegen DECLPROC eq1<(int,int)>
@fif_codegen DECLPROC eq1<[int,int]>
@fif_codegen DECLPROC getTwo<int>
@fif_codegen_avoid DECLPROC eq1
@fif_codegen_avoid DECLPROC eq2
@fif_codegen_avoid DECLPROC eq3
*/

View file

@ -1,9 +1,10 @@
fun main() {
return true();
const asdf = 1;
fun main(x: int) {
return x.asdf();
}
/**
@compilation_should_fail
The message is weird now, but later I'll rework error messages anyway.
@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??2 with int
@stderr calling a non-function
*/

View file

@ -0,0 +1,10 @@
fun getOne() { return 1; }
fun main() {
return getOne<int>();
}
/**
@compilation_should_fail
@stderr calling a not generic function with generic T
*/

View file

@ -0,0 +1,13 @@
// this function is declared incorrectly,
// since it should return 2 values onto a stack (1 for returned slice, 1 for mutated int)
// but contains not 2 numbers in asm ret_order
fun loadAddress2(mutate self: int): slice
asm( -> 1 0 2) "LDMSGADDR";
fun main(){}
/**
@compilation_should_fail
@stderr ret_order (after ->) expected to contain 2 numbers
@stderr asm( -> 1 0 2)
*/

View file

@ -0,0 +1,16 @@
fun proxy(x: int) {
return factorial(x);
}
fun factorial(x: int) {
if (x <= 0) {
return 1;
}
return x * proxy(x-1);
}
/**
@compilation_should_fail
@stderr could not infer return type of `factorial`, because it appears in a recursive call chain
@stderr fun factorial
*/

View file

@ -0,0 +1,7 @@
const c: slice = 123 + 456;
/**
@compilation_should_fail
@stderr expression type does not match declared type
@stderr const c
*/

View file

@ -0,0 +1,10 @@
fun f<X>(v: int, x: X) {}
fun failCantDeduceWithoutArgument() {
return f(1);
}
/**
@compilation_should_fail
@stderr can not deduce X for generic function `f<X>`
*/

View file

@ -0,0 +1,9 @@
fun invalidReferencingGenericMethodWithoutGeneric() {
var t = createEmptyTuple();
var cb = t.tupleLast;
}
/**
@compilation_should_fail
@stderr can not use a generic function `tupleLast<T>` as non-call
*/

View file

@ -0,0 +1,11 @@
global gVar: int;
fun main() {
var x = gVar<int>;
return x;
}
/**
@compilation_should_fail
@stderr generic T not expected here
*/

View file

@ -0,0 +1,10 @@
fun f<T>(v: int, x: T) {}
fun failCantDeduceWithPlainNull() {
return f(0, null);
}
/**
@compilation_should_fail
@stderr can not deduce T for generic function `f<T>`
*/

View file

@ -0,0 +1,11 @@
fun f<T>(x: T, y: T) {}
fun failIncompatibleTypesForT() {
return f(32, "");
}
/**
@compilation_should_fail
@stderr T is both int and slice for generic function `f<T>`
@stderr f(32
*/

View file

@ -0,0 +1,10 @@
fun f<T>(x: T): void asm "NOP";
fun failInstantiatingAsmFunctionWithNon1Slot() {
f((1, 2));
}
/**
@compilation_should_fail
@stderr can not call `f<T>` with T=(int, int), because it occupies 2 stack slots in TVM, not 1
*/

View file

@ -0,0 +1,10 @@
fun f<T>(x: T): void asm "NOP";
fun failUsingGenericFunctionPartially() {
var cb = f;
}
/**
@compilation_should_fail
@stderr can not use a generic function `f<T>` as non-call
*/

View file

@ -0,0 +1,10 @@
fun eq<X>(t: X) { return t; }
fun failUsingGenericFunctionPartially() {
var cb = createEmptyTuple().eq().eq().tuplePush;
}
/**
@compilation_should_fail
@stderr can not use a generic function `tuplePush<T>` as non-call
*/

View file

@ -0,0 +1,18 @@
fun failOnInstantiation(a: slice) {
var b: slice = foo(a);
}
fun bar<X>(value: X) : X {
return 1;
}
fun foo<X>(value: X) : X {
return bar(value);
}
/**
@compilation_should_fail
@stderr while instantiating generic function `foo<slice>`
@stderr while instantiating generic function `bar<slice>`
@stderr can not convert type `int` to return type `slice`
@stderr return 1
*/

View file

@ -0,0 +1,11 @@
fun withT1T2<T1, T2>(a: (T1, T2)) {}
fun wrongTCountPassed() {
withT1T2<int>((5, ""));
}
/**
@compilation_should_fail
@stderr wrong count of generic T: expected 2, got 1
@stderr <int>
*/

View file

@ -0,0 +1,8 @@
fun invalidProvidingGenericTsToNotGeneric() {
beginCell<builder>();
}
/**
@compilation_should_fail
@stderr calling a not generic function with generic T
*/

View file

@ -0,0 +1,9 @@
fun cantCallMutatingFunctionWithAssignmentLValue() {
var t: tuple = createEmptyTuple();
(t = createEmptyTuple()).tuplePush(1);
}
/**
@compilation_should_fail
@stderr assignment can not be used as lvalue
*/

View file

@ -0,0 +1,13 @@
@pure
fun tupleMut(mutate self: tuple): int
asm "TLEN";
fun main() {
var t = createEmptyTuple();
return [[t.tupleMut]];
}
/**
@compilation_should_fail
@stderr saving `tupleMut` into a variable is impossible, since it has `mutate` parameters
*/

View file

@ -4,5 +4,5 @@ fun load_u32(cs: slice): (slice, int) {
/**
@compilation_should_fail
@stderr expected `(`, got `32`
@stderr expected `;`, got `32`
*/

View file

@ -1,16 +0,0 @@
global set: int;
@pure
fun someF(): int {
var set redef = 0;
return set;
}
/**
@compilation_should_fail
@stderr
"""
an impure operation in a pure function
var set
"""
*/

View file

@ -4,6 +4,6 @@ fun cantReturnNothingFromSelf(mutate self: int): self {
/**
@compilation_should_fail
@stderr missing return; forgot `return self`?
@stderr missing return
@stderr }
*/

View file

@ -4,5 +4,5 @@ fun main(x: int) {
/**
@compilation_should_fail
@stderr null is not a function: use `null`, not `null()`
@stderr calling a non-function
*/

View file

@ -0,0 +1,9 @@
fun main() {
var a = 1;
(a += 1) += 2;
}
/**
@compilation_should_fail
@stderr assignment can not be used as lvalue
*/

View file

@ -0,0 +1,9 @@
fun main() {
var x = 1;
x += (var y = 2);
}
/**
@compilation_should_fail
@stderr expected <expression>, got `var`
*/

View file

@ -6,5 +6,5 @@ fun main() {
/**
@compilation_should_fail
@stderr .tolk:2
@stderr expected <type>, got `scli`
@stderr unknown type name `scli`
*/

View file

@ -0,0 +1,10 @@
fun failAssignNullToTensor() {
var ab = (1, 2);
ab = null;
return ab;
}
/**
@compilation_should_fail
@stderr can not assign `null` to variable of type `(int, int)`
*/

View file

@ -15,5 +15,5 @@ fun cantMixDifferentThis() {
/**
@compilation_should_fail
@stderr cannot apply function appendBuilder : builder -> (builder, ()) to arguments of type int: cannot unify type int with builder
@stderr can not call method for `builder` with object of type `int`
*/

View file

@ -7,8 +7,6 @@ fun cantCallNotChainedMethodsInAChain(x: int) {
}
/**
The error is very weird, but nevertheless, the type system prevents of doing such errors.
@compilation_should_fail
@stderr cannot apply function incNotChained : int -> (int, ()) to arguments of type (): cannot unify type () with int
@stderr can not call method for `int` with object of type `void`
*/

View file

@ -7,8 +7,7 @@ fun failWhenReturnANotChainedValue(x: int): int {
}
/**
The error is very weird, but nevertheless, the type system prevents of doing such errors.
@compilation_should_fail
@stderr previous function return type int cannot be unified with return statement expression type (): cannot unify type () with int
@stderr x.incNotChained()
@stderr can not convert type `void` to return type `int`
*/

View file

@ -4,5 +4,5 @@ fun failWhenTernaryConditionNotInt(cs: slice) {
/**
@compilation_should_fail
@stderr condition of ternary ?: operator must be an integer
@stderr condition of ternary operator must be an integer
*/

View file

@ -0,0 +1,9 @@
fun failAssignPlainNullToVariable() {
var x = null;
}
/**
@compilation_should_fail
@stderr can not infer type of `x`, it's always null
@stderr specify its type with `x: <type>` or use `null as <type>`
*/

View file

@ -0,0 +1,8 @@
fun failExplicitCastIncompatible(c: cell) {
return c as slice;
}
/**
@compilation_should_fail
@stderr type `cell` can not be cast to `slice`
*/

View file

@ -0,0 +1,13 @@
fun getTupleLastGetter<X>(): tuple -> X {
return tupleLast<X>;
}
fun failTypeMismatch() {
var t = createEmptyTuple();
var c: cell = getTupleLastGetter<int>()(t);
}
/**
@compilation_should_fail
@stderr can not assign `int` to variable of type `cell`
*/

View file

@ -54,7 +54,8 @@ fun testDict(last: int) {
@method_id(105)
fun testNotNull(x: int) {
return [x == null, null == x, !(x == null), null == null, +(null != null)];
// return [x == null, null == x, !(x == null), null == null, +(null != null)];
return [x == null, null == x, !(x == null)];
}
@method_id(106)
@ -144,8 +145,8 @@ fun main() {
@testcase | 104 | 50 | 3 5 -1
@testcase | 104 | 100 | 3 5 5
@testcase | 104 | 0 | 3 -1 5
@testcase | 105 | 0 | [ 0 0 -1 -1 0 ]
@testcase | 105 | null | [ -1 -1 0 -1 0 ]
@testcase | 105 | 0 | [ 0 0 -1 ]
@testcase | 105 | null | [ -1 -1 0 ]
@testcase | 106 | | [ 0 0 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ 0 -1 ]
@testcase | 107 | | [ -1 -1 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ -1 0 ]
@testcase | 108 | 1 2 | -1

View file

@ -154,7 +154,7 @@ fun getSumOfNumbersInCell(c: cell): int {
@method_id(110)
fun testStoreChaining() {
var b = beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32);
var b = ((beginCell()).storeUint(1, 32)).storeUint(2, 32).storeUint(3, 32);
b.storeUint(4, 32);
b.myStoreUint(5, 32).storeUint(6, 32);
storeUint(mutate b, 7, 32);
@ -198,7 +198,7 @@ fun testStoreAndMutateBoth() {
b.myStoreU32_and_mutate_x(mutate x);
var cs: slice = b.endCell().beginParse();
var (n1,n2,n3,n4,n5) = (cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32));
var (n1,n2,n3,n4,n5) = (cs.loadUint(32),((cs)).loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32));
assert(n5 == x) throw 100;
return [n1,n2,n3,n4,n5];

View file

@ -7,12 +7,14 @@ fun test1() {
numbers = listPrepend(2, numbers);
numbers = listPrepend(3, numbers);
numbers = listPrepend(4, numbers);
var (h, numbers redef) = listSplit(numbers);
var (h: int, numbers redef) = listSplit(numbers);
h += listGetHead(numbers);
_ = null;
(_, _) = (null, null);
var t = createEmptyTuple();
do {
var num = numbers.listNext();
var num: int = numbers.listNext();
t.tuplePush(num);
} while (numbers != null);
@ -44,7 +46,7 @@ fun test3(x: int) {
}
fun getUntypedNull() {
var untyped = null;
var untyped: null = null;
if (true) {
return untyped;
}
@ -52,8 +54,8 @@ fun getUntypedNull() {
}
@method_id(104)
fun test4() {
var (_, (_, untyped)) = (3, (createEmptyTuple, null));
fun test4(): null {
var (_, (_, untyped: null)) = (3, (createEmptyTuple, null));
if (true) {
return untyped;
}
@ -62,15 +64,10 @@ fun test4() {
@method_id(105)
fun test5() {
var n = getUntypedNull();
var n: slice = getUntypedNull();
return !(null == n) ? n.loadInt(32) : 100;
}
@method_id(106)
fun test6(x: int) {
return x > null; // this compiles (for now), but fails at runtime
}
@method_id(107)
fun test7() {
var b = beginCell().storeMaybeRef(null);
@ -132,15 +129,6 @@ fun main() {
}>
"""
@fif_codegen
"""
test6 PROC:<{
// x
PUSHNULL // x _1
GREATER // _2
}>
"""
@fif_codegen
"""
test7 PROC:<{

View file

@ -56,7 +56,7 @@ fun test8(b: int): int {
return a;
}
fun `_<p`(a: auto, b: auto): int { return true; }
fun `_<p`(a: int, b: int): int { return true; }
fun main() {
// ok to parse

View file

@ -32,7 +32,7 @@ fun test1(): int {
@method_id(102)
fun test2(value: int): int {
save_contract_data(value);
var (_, restored: auto) = get_contract_data();
var (_, restored) = get_contract_data();
return restored;
}

View file

@ -10,9 +10,9 @@ fun used_from_noncall2(): int { return int20; }
fun used_as_noncall2(): int { return 0 * 0 + used_from_noncall2() + (0 << 0); }
global unused_gv: int;
global used_gv: auto;
global used_gv: int;
fun receiveGetter(): (() -> int) { return used_as_noncall2; }
fun receiveGetter(): () -> int { return used_as_noncall2; }
@pure
fun usedButOptimizedOut(x: int): int { return x + 2; }

View file

@ -187,7 +187,7 @@ fun myTupleAt<T>(self: tuple, idx: int): T {
global tup111: tuple;
@method_id(111)
fun testForallFunctionsWithSelf() {
fun testForallFunctionsWithSelf(): (int, int, tuple) {
var t = createEmptyTuple();
tup111 = createEmptyTuple();
t.myTuplePush(10);

View file

@ -218,7 +218,7 @@ fun fixed248_log2_const(): int {
@pure
@inline
fun Pi_const_f254(): int {
var (c: auto, _) = Pi_xconst_f254();
var (c, _) = Pi_xconst_f254();
return c;
}
@ -1019,7 +1019,8 @@ fun test_nrand(n: int): tuple {
repeat (n) {
var x: int = fixed248_nrand();
var bucket: int = (abs(x) >> 243); // 255 buckets starting from x=0, each 1/32 wide
t.tset(bucket, t.tupleAt(bucket) + 1);
var at_bucket: int = t.tupleAt(bucket);
t.tset(bucket, at_bucket + 1);
}
return t;
}

View file

@ -1,6 +1,3 @@
fun unsafeGetInt<X>(any: X): int
asm "NOP";
fun foo(x: int): int {
try {
if (x == 7) {
@ -28,7 +25,7 @@ fun foo_inlineref(x: int): int {
if (x == 7) { throw (44, 2); }
return x;
} catch (_, arg) {
return unsafeGetInt(arg);
return arg as int;
}
}
@ -83,7 +80,7 @@ fun foo_big(
}
return x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + x20;
} catch (code, arg) {
return unsafeGetInt(arg);
return arg as int;
}
}

View file

@ -92,7 +92,7 @@ fun always_throw2(x: int) {
throw 239 + x;
}
global global_f: int -> ();
global global_f: int -> void;
@method_id(104)
fun testGlobalVarApply() {
@ -105,6 +105,30 @@ fun testGlobalVarApply() {
}
}
@method_id(105)
fun testVarApply2() {
var creator = createEmptyTuple;
var t = creator();
t.tuplePush(1);
var sizer = t.tupleSize;
return sizer(t);
}
fun getTupleLastGetter<X>(): (tuple) -> X {
return tupleLast<X>;
}
@method_id(106)
fun testVarApply3() {
var t = createEmptyTuple();
t.tuplePush(1);
t.tuplePush([2]);
var getIntAt = t.tupleAt<int>;
var getTupleFirstInt = createEmptyTuple().tupleFirst<int>;
var getTupleLastTuple = getTupleLastGetter<tuple>();
return (getIntAt(t, 0), getTupleFirstInt(t), getTupleLastTuple(t), getTupleLastGetter<tuple>()(t));
}
fun main() {}
/**
@ -112,4 +136,6 @@ fun main() {}
@testcase | 102 | | 1000
@testcase | 103 | | [ 1000 1000 0 1001 ]
@testcase | 104 | | 240
@testcase | 105 | | 1
@testcase | 106 | | 1 1 [ 2 ] [ 2 ]
*/

View file

@ -10,10 +10,10 @@ set(TOLK_SOURCE
constant-evaluator.cpp
pipe-discover-parse-sources.cpp
pipe-register-symbols.cpp
pipe-resolve-symbols.cpp
pipe-resolve-identifiers.cpp
pipe-calc-rvalue-lvalue.cpp
pipe-detect-unreachable.cpp
pipe-infer-check-types.cpp
pipe-infer-types-and-calls.cpp
pipe-refine-lvalue-for-mutate.cpp
pipe-check-rvalue-lvalue.cpp
pipe-check-pure-impure.cpp
@ -21,7 +21,8 @@ set(TOLK_SOURCE
pipe-ast-to-legacy.cpp
pipe-find-unused-symbols.cpp
pipe-generate-fif-output.cpp
unify-types.cpp
type-system.cpp
generics-helpers.cpp
abscode.cpp
analyzer.cpp
asmops.cpp

View file

@ -16,6 +16,7 @@
*/
#include "tolk.h"
#include "compiler-state.h"
#include "type-system.h"
namespace tolk {
@ -28,7 +29,7 @@ namespace tolk {
void TmpVar::dump(std::ostream& os) const {
show(os);
os << " : " << v_type << " (width ";
v_type->show_width(os);
os << v_type->calc_width_on_stack();
os << ")";
if (coord > 0) {
os << " = _" << (coord >> 8) << '.' << (coord & 255);
@ -443,7 +444,7 @@ void CodeBlob::print(std::ostream& os, int flags) const {
os << "-------- END ---------\n\n";
}
var_idx_t CodeBlob::create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation location) {
var_idx_t CodeBlob::create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation location) {
vars.emplace_back(var_cnt, var_type, v_sym, location);
return var_cnt++;
}
@ -454,7 +455,7 @@ bool CodeBlob::import_params(FormalArgList&& arg_list) {
}
std::vector<var_idx_t> list;
for (const auto& par : arg_list) {
TypeExpr* arg_type;
TypePtr arg_type;
const LocalVarData* arg_sym;
SrcLocation arg_loc;
std::tie(arg_type, arg_sym, arg_loc) = par;

View file

@ -16,6 +16,7 @@
*/
#include "tolk.h"
#include "compiler-state.h"
#include "type-system.h"
namespace tolk {
@ -25,38 +26,30 @@ namespace tolk {
*
*/
void CodeBlob::simplify_var_types() {
for (TmpVar& var : vars) {
TypeExpr::remove_indirect(var.v_type);
var.v_type->recompute_width();
}
}
int CodeBlob::split_vars(bool strict) {
int n = var_cnt, changes = 0;
for (int j = 0; j < var_cnt; j++) {
TmpVar& var = vars[j];
if (strict && var.v_type->minw != var.v_type->maxw) {
int width_j = var.v_type->calc_width_on_stack();
if (strict && width_j < 0) {
throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"};
}
std::vector<TypeExpr*> comp_types;
int k = var.v_type->extract_components(comp_types);
tolk_assert(k <= 254 && n <= 0x7fff00);
tolk_assert((unsigned)k == comp_types.size());
if (k != 1) {
var.coord = ~((n << 8) + k);
for (int i = 0; i < k; i++) {
if (width_j == 1) {
continue;
}
std::vector<TypePtr> comp_types;
var.v_type->extract_components(comp_types);
tolk_assert(width_j <= 254 && n <= 0x7fff00);
tolk_assert((unsigned)width_j == comp_types.size());
var.coord = ~((n << 8) + width_j);
for (int i = 0; i < width_j; i++) {
auto v = create_var(comp_types[i], vars[j].v_sym, vars[j].where);
tolk_assert(v == n + i);
tolk_assert(vars[v].idx == v);
vars[v].coord = ((int)j << 8) + i + 1;
}
n += k;
n += width_j;
++changes;
} else if (strict && var.v_type->minw != 1) {
throw ParseError{var.where,
"cannot work with variable or variable component of width greater than one"};
}
}
if (!changes) {
return 0;
@ -687,7 +680,7 @@ void CodeBlob::fwd_analyze() {
tolk_assert(ops && ops->cl == Op::_Import);
for (var_idx_t i : ops->left) {
values += i;
if (vars[i].v_type->is_int()) {
if (vars[i].v_type == TypeDataInt::create()) {
values[i]->val |= VarDescr::_Int;
}
}
@ -732,7 +725,7 @@ VarDescrList Op::fwd_analyze(VarDescrList values) {
}
case _Call: {
prepare_args(values);
if (!f_sym->is_regular_function()) {
if (!f_sym->is_code_function()) {
std::vector<VarDescr> res;
res.reserve(left.size());
for (var_idx_t i : left) {

View file

@ -16,8 +16,8 @@
*/
#include "ast-from-tokens.h"
#include "ast.h"
#include "type-system.h"
#include "platform-utils.h"
#include "type-expr.h"
#include "tolk-version.h"
/*
@ -130,9 +130,10 @@ static AnyExprV maybe_replace_eq_null_with_isNull_call(V<ast_binary_operator> v)
}
auto v_ident = createV<ast_identifier>(v->loc, "__isNull"); // built-in function
auto v_ref = createV<ast_reference>(v->loc, v_ident, nullptr);
AnyExprV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs();
AnyExprV v_arg = createV<ast_argument>(v->loc, v_null, false);
AnyExprV v_isNull = createV<ast_function_call>(v->loc, v_ident, createV<ast_argument_list>(v->loc, {v_arg}));
AnyExprV v_isNull = createV<ast_function_call>(v->loc, v_ref, createV<ast_argument_list>(v->loc, {v_arg}));
if (v->tok == tok_neq) {
v_isNull = createV<ast_unary_operator>(v->loc, "!", tok_logical_not, v_isNull);
}
@ -146,98 +147,14 @@ static AnyExprV maybe_replace_eq_null_with_isNull_call(V<ast_binary_operator> v)
*
*/
// TE ::= TA | TA -> TE
// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ]
static TypeExpr* parse_type(Lexer& lex, V<ast_genericsT_list> genericsT_list);
static TypeExpr* parse_type1(Lexer& lex, V<ast_genericsT_list> genericsT_list) {
switch (lex.tok()) {
case tok_int:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Int);
case tok_cell:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Cell);
case tok_slice:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Slice);
case tok_builder:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Builder);
case tok_continuation:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Continutaion);
case tok_tuple:
lex.next();
return TypeExpr::new_atomic(TypeExpr::_Tuple);
case tok_auto:
lex.next();
return TypeExpr::new_hole();
case tok_void:
lex.next();
return TypeExpr::new_tensor({});
case tok_bool:
lex.error("bool type is not supported yet");
case tok_self:
lex.error("`self` type can be used only as a return type of a function (enforcing it to be chainable)");
case tok_identifier:
if (int idx = genericsT_list ? genericsT_list->lookup_idx(lex.cur_str()) : -1; idx != -1) {
lex.next();
return genericsT_list->get_item(idx)->created_type;
}
break;
case tok_oppar: {
lex.next();
if (lex.tok() == tok_clpar) {
lex.next();
return TypeExpr::new_unit();
}
std::vector<TypeExpr*> sub{1, parse_type(lex, genericsT_list)};
while (lex.tok() == tok_comma) {
lex.next();
sub.push_back(parse_type(lex, genericsT_list));
}
lex.expect(tok_clpar, "`)`");
return TypeExpr::new_tensor(std::move(sub));
}
case tok_opbracket: {
lex.next();
if (lex.tok() == tok_clbracket) {
lex.next();
return TypeExpr::new_tuple({});
}
std::vector<TypeExpr*> sub{1, parse_type(lex, genericsT_list)};
while (lex.tok() == tok_comma) {
lex.next();
sub.push_back(parse_type(lex, genericsT_list));
}
lex.expect(tok_clbracket, "`]`");
return TypeExpr::new_tuple(std::move(sub));
}
default:
break;
}
lex.unexpected("<type>");
}
static TypeExpr* parse_type(Lexer& lex, V<ast_genericsT_list> genericsT_list) {
TypeExpr* res = parse_type1(lex, genericsT_list);
if (lex.tok() == tok_arrow) {
lex.next();
TypeExpr* to = parse_type(lex, genericsT_list);
return TypeExpr::new_map(res, to);
}
return res;
}
AnyExprV parse_expr(Lexer& lex);
static AnyV parse_parameter(Lexer& lex, V<ast_genericsT_list> genericsT_list, bool is_first) {
static AnyV parse_parameter(Lexer& lex, bool is_first) {
SrcLocation loc = lex.cur_location();
// optional keyword `mutate` meaning that a function will mutate a passed argument (like passed by reference)
bool declared_as_mutate = false;
bool is_param_self = false;
if (lex.tok() == tok_mutate) {
lex.next();
declared_as_mutate = true;
@ -252,21 +169,14 @@ static AnyV parse_parameter(Lexer& lex, V<ast_genericsT_list> genericsT_list, bo
lex.error("`self` can only be the first parameter");
}
param_name = "self";
is_param_self = true;
} else if (lex.tok() != tok_underscore) {
lex.unexpected("parameter name");
}
lex.next();
// parameter type after colon, also mandatory (even explicit ":auto")
// parameter type after colon are mandatory
lex.expect(tok_colon, "`: <parameter_type>`");
TypeExpr* param_type = parse_type(lex, genericsT_list);
if (declared_as_mutate && !param_type->has_fixed_width()) {
throw ParseError(loc, "`mutate` parameter must be strictly typed");
}
if (is_param_self && !param_type->has_fixed_width()) {
throw ParseError(loc, "`self` parameter must be strictly typed");
}
TypePtr param_type = parse_type_from_tokens(lex);
return createV<ast_parameter>(loc, param_name, param_type, declared_as_mutate);
}
@ -281,7 +191,7 @@ static AnyV parse_global_var_declaration(Lexer& lex, const std::vector<V<ast_ann
auto v_ident = createV<ast_identifier>(lex.cur_location(), lex.cur_str());
lex.next();
lex.expect(tok_colon, "`:`");
TypeExpr* declared_type = parse_type(lex, nullptr);
TypePtr declared_type = parse_type_from_tokens(lex);
if (lex.tok() == tok_comma) {
lex.error("multiple declarations are not allowed, split globals on separate lines");
}
@ -301,18 +211,10 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector<V<ast_annot
lex.check(tok_identifier, "constant name");
auto v_ident = createV<ast_identifier>(lex.cur_location(), lex.cur_str());
lex.next();
TypeExpr *declared_type = nullptr;
TypePtr declared_type = nullptr;
if (lex.tok() == tok_colon) {
lex.next();
if (lex.tok() == tok_int) {
declared_type = TypeExpr::new_atomic(TypeExpr::_Int);
lex.next();
} else if (lex.tok() == tok_slice) {
declared_type = TypeExpr::new_atomic(TypeExpr::_Slice);
lex.next();
} else {
lex.error("a constant can be int or slice only");
}
declared_type = parse_type_from_tokens(lex);
}
lex.expect(tok_assign, "`=`");
AnyExprV init_value = parse_expr(lex);
@ -324,15 +226,15 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector<V<ast_annot
}
// "parameters" are at function declaration: `fun f(param1: int, mutate param2: slice)`
static V<ast_parameter_list> parse_parameter_list(Lexer& lex, V<ast_genericsT_list> genericsT_list) {
static V<ast_parameter_list> parse_parameter_list(Lexer& lex) {
SrcLocation loc = lex.cur_location();
std::vector<AnyV> params;
lex.expect(tok_oppar, "parameter list");
if (lex.tok() != tok_clpar) {
params.push_back(parse_parameter(lex, genericsT_list, true));
params.push_back(parse_parameter(lex, true));
while (lex.tok() == tok_comma) {
lex.next();
params.push_back(parse_parameter(lex, genericsT_list, false));
params.push_back(parse_parameter(lex, false));
}
}
lex.expect(tok_clpar, "`)`");
@ -369,6 +271,26 @@ static V<ast_argument_list> parse_argument_list(Lexer& lex) {
return createV<ast_argument_list>(loc, std::move(args));
}
static V<ast_instantiationT_list> parse_maybe_instantiationTs_after_identifier(Lexer& lex) {
lex.check(tok_lt, "`<`");
Lexer::SavedPositionForLookahead backup = lex.save_parsing_position();
try {
SrcLocation loc = lex.cur_location();
lex.next();
std::vector<AnyV> instantiationTs;
instantiationTs.push_back(createV<ast_instantiationT_item>(lex.cur_location(), parse_type_from_tokens(lex)));
while (lex.tok() == tok_comma) {
lex.next();
instantiationTs.push_back(createV<ast_instantiationT_item>(lex.cur_location(), parse_type_from_tokens(lex)));
}
lex.expect(tok_gt, "`>`");
return createV<ast_instantiationT_list>(loc, std::move(instantiationTs));
} catch (const ParseError&) {
lex.restore_position(backup);
return nullptr;
}
}
// parse (expr) / [expr] / identifier / number
static AnyExprV parse_expr100(Lexer& lex) {
SrcLocation loc = lex.cur_location();
@ -396,7 +318,7 @@ static AnyExprV parse_expr100(Lexer& lex) {
lex.next();
if (lex.tok() == tok_clbracket) {
lex.next();
return createV<ast_tensor_square>(loc, {});
return createV<ast_typed_tuple>(loc, {});
}
std::vector<AnyExprV> items(1, parse_expr(lex));
while (lex.tok() == tok_comma) {
@ -404,7 +326,7 @@ static AnyExprV parse_expr100(Lexer& lex) {
items.emplace_back(parse_expr(lex));
}
lex.expect(tok_clbracket, "`]`");
return createV<ast_tensor_square>(loc, std::move(items));
return createV<ast_typed_tuple>(loc, std::move(items));
}
case tok_int_const: {
std::string_view orig_str = lex.cur_str();
@ -443,12 +365,17 @@ static AnyExprV parse_expr100(Lexer& lex) {
}
case tok_self: {
lex.next();
return createV<ast_self_keyword>(loc);
auto v_ident = createV<ast_identifier>(loc, "self");
return createV<ast_reference>(loc, v_ident, nullptr);
}
case tok_identifier: {
std::string_view str_val = lex.cur_str();
auto v_ident = createV<ast_identifier>(loc, lex.cur_str());
V<ast_instantiationT_list> v_instantiationTs = nullptr;
lex.next();
return createV<ast_identifier>(loc, str_val);
if (lex.tok() == tok_lt) {
v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex);
}
return createV<ast_reference>(loc, v_ident, v_instantiationTs);
}
default: {
// show a proper error for `int i` (FunC-style declarations)
@ -461,25 +388,36 @@ static AnyExprV parse_expr100(Lexer& lex) {
}
}
// parse E(args)
// parse E(...) (left-to-right)
static AnyExprV parse_expr90(Lexer& lex) {
AnyExprV res = parse_expr100(lex);
if (lex.tok() == tok_oppar) {
return createV<ast_function_call>(res->loc, res, parse_argument_list(lex));
while (lex.tok() == tok_oppar) {
res = createV<ast_function_call>(res->loc, res, parse_argument_list(lex));
}
return res;
}
// parse E.method(...) (left-to-right)
// parse E.field and E.method(...) (left-to-right)
static AnyExprV parse_expr80(Lexer& lex) {
AnyExprV lhs = parse_expr90(lex);
while (lex.tok() == tok_dot) {
SrcLocation loc = lex.cur_location();
lex.next();
lex.check(tok_identifier, "method name");
std::string_view method_name = lex.cur_str();
V<ast_identifier> v_ident = nullptr;
V<ast_instantiationT_list> v_instantiationTs = nullptr;
if (lex.tok() == tok_identifier) {
v_ident = createV<ast_identifier>(lex.cur_location(), lex.cur_str());
lex.next();
lhs = createV<ast_dot_method_call>(loc, method_name, lhs, parse_argument_list(lex));
if (lex.tok() == tok_lt) {
v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex);
}
} else {
lex.unexpected("method name");
}
lhs = createV<ast_dot_access>(loc, lhs, v_ident, v_instantiationTs);
while (lex.tok() == tok_oppar) {
lhs = createV<ast_function_call>(lex.cur_location(), lhs, parse_argument_list(lex));
}
}
return lhs;
}
@ -497,15 +435,27 @@ static AnyExprV parse_expr75(Lexer& lex) {
return parse_expr80(lex);
}
// parse E as <type>
static AnyExprV parse_expr40(Lexer& lex) {
AnyExprV lhs = parse_expr75(lex);
if (lex.tok() == tok_as) {
SrcLocation loc = lex.cur_location();
lex.next();
TypePtr cast_to_type = parse_type_from_tokens(lex);
lhs = createV<ast_cast_as_operator>(loc, lhs, cast_to_type);
}
return lhs;
}
// parse E * / % ^/ ~/ E (left-to-right)
static AnyExprV parse_expr30(Lexer& lex) {
AnyExprV lhs = parse_expr75(lex);
AnyExprV lhs = parse_expr40(lex);
TokenType t = lex.tok();
while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str();
lex.next();
AnyExprV rhs = parse_expr75(lex);
AnyExprV rhs = parse_expr40(lex);
lhs = createV<ast_binary_operator>(loc, operator_name, t, lhs, rhs);
t = lex.tok();
}
@ -597,15 +547,20 @@ static AnyExprV parse_expr13(Lexer& lex) {
static AnyExprV parse_expr10(Lexer& lex) {
AnyExprV lhs = parse_expr13(lex);
TokenType t = lex.tok();
if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div ||
t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift ||
t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor ||
t == tok_assign) {
if (t == tok_assign) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str();
lex.next();
AnyExprV rhs = parse_expr10(lex);
return createV<ast_binary_operator>(loc, operator_name, t, lhs, rhs);
return createV<ast_assign>(loc, lhs, rhs);
}
if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div ||
t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift ||
t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) {
SrcLocation loc = lex.cur_location();
std::string_view operator_name = lex.cur_str().substr(0, lex.cur_str().size() - 1); // "+" for +=
lex.next();
AnyExprV rhs = parse_expr10(lex);
return createV<ast_set_assign>(loc, operator_name, t, lhs, rhs);
}
if (t == tok_question) {
SrcLocation loc = lex.cur_location();
@ -631,7 +586,7 @@ static AnyExprV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) {
AnyExprV first = parse_var_declaration_lhs(lex, is_immutable);
if (lex.tok() == tok_clpar) {
lex.next();
return createV<ast_parenthesized_expression>(loc, first);
return first;
}
std::vector<AnyExprV> args(1, first);
while (lex.tok() == tok_comma) {
@ -649,51 +604,51 @@ static AnyExprV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) {
args.push_back(parse_var_declaration_lhs(lex, is_immutable));
}
lex.expect(tok_clbracket, "`]`");
return createV<ast_tensor_square>(loc, std::move(args));
return createV<ast_typed_tuple>(loc, std::move(args));
}
if (lex.tok() == tok_identifier) {
auto v_ident = createV<ast_identifier>(loc, lex.cur_str());
TypeExpr* declared_type = nullptr;
TypePtr declared_type = nullptr;
bool marked_as_redef = false;
lex.next();
if (lex.tok() == tok_colon) {
lex.next();
declared_type = parse_type(lex, nullptr);
declared_type = parse_type_from_tokens(lex);
} else if (lex.tok() == tok_redef) {
lex.next();
marked_as_redef = true;
}
return createV<ast_local_var>(loc, v_ident, declared_type, is_immutable, marked_as_redef);
return createV<ast_local_var_lhs>(loc, v_ident, declared_type, is_immutable, marked_as_redef);
}
if (lex.tok() == tok_underscore) {
TypeExpr* declared_type = nullptr;
TypePtr declared_type = nullptr;
lex.next();
if (lex.tok() == tok_colon) {
lex.next();
declared_type = parse_type(lex, nullptr);
declared_type = parse_type_from_tokens(lex);
}
return createV<ast_local_var>(loc, createV<ast_underscore>(loc), declared_type, true, false);
return createV<ast_local_var_lhs>(loc, createV<ast_identifier>(loc, ""), declared_type, true, false);
}
lex.unexpected("variable name");
}
static AnyV parse_local_vars_declaration(Lexer& lex) {
static AnyV parse_local_vars_declaration_assignment(Lexer& lex) {
SrcLocation loc = lex.cur_location();
bool is_immutable = lex.tok() == tok_val;
lex.next();
AnyExprV lhs = parse_var_declaration_lhs(lex, is_immutable);
AnyExprV lhs = createV<ast_local_vars_declaration>(loc, parse_var_declaration_lhs(lex, is_immutable));
if (lex.tok() != tok_assign) {
lex.error("variables declaration must be followed by assignment: `var xxx = ...`");
}
lex.next();
AnyExprV assigned_val = parse_expr(lex);
AnyExprV rhs = parse_expr(lex);
if (lex.tok() == tok_comma) {
lex.error("multiple declarations are not allowed, split variables on separate lines");
}
lex.expect(tok_semicolon, "`;`");
return createV<ast_local_vars_declaration>(loc, lhs, assigned_val);
return createV<ast_assign>(loc, lhs, rhs);
}
static V<ast_sequence> parse_sequence(Lexer& lex) {
@ -711,8 +666,8 @@ static V<ast_sequence> parse_sequence(Lexer& lex) {
static AnyV parse_return_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_return, "`return`");
AnyExprV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void)
? createV<ast_tensor>(lex.cur_location(), {})
AnyExprV child = lex.tok() == tok_semicolon // `return;` actually means "nothing" (inferred as void)
? createV<ast_empty_expression>(lex.cur_location())
: parse_expr(lex);
lex.expect(tok_semicolon, "`;`");
return createV<ast_return_statement>(loc, child);
@ -784,15 +739,22 @@ static AnyExprV parse_catch_variable(Lexer& lex) {
if (lex.tok() == tok_identifier) {
std::string_view var_name = lex.cur_str();
lex.next();
return createV<ast_identifier>(loc, var_name);
auto v_ident = createV<ast_identifier>(loc, var_name);
return createV<ast_reference>(loc, v_ident, nullptr);
}
if (lex.tok() == tok_underscore) {
lex.next();
return createV<ast_underscore>(loc);
auto v_ident = createV<ast_identifier>(loc, "");
return createV<ast_reference>(loc, v_ident, nullptr);
}
lex.unexpected("identifier");
}
static AnyExprV create_catch_underscore_variable(const Lexer& lex) {
auto v_ident = createV<ast_identifier>(lex.cur_location(), "");
return createV<ast_reference>(lex.cur_location(), v_ident, nullptr);
}
static AnyV parse_throw_statement(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_throw, "`throw`");
@ -853,12 +815,12 @@ static AnyV parse_try_catch_statement(Lexer& lex) {
lex.next();
catch_args.push_back(parse_catch_variable(lex));
} else { // catch (excNo) -> catch (excNo, _)
catch_args.push_back(createV<ast_underscore>(catch_loc));
catch_args.push_back(create_catch_underscore_variable(lex));
}
lex.expect(tok_clpar, "`)`");
} else { // catch -> catch (_, _)
catch_args.push_back(createV<ast_underscore>(catch_loc));
catch_args.push_back(createV<ast_underscore>(catch_loc));
catch_args.push_back(create_catch_underscore_variable(lex));
catch_args.push_back(create_catch_underscore_variable(lex));
}
V<ast_tensor> catch_expr = createV<ast_tensor>(catch_loc, std::move(catch_args));
@ -868,9 +830,9 @@ static AnyV parse_try_catch_statement(Lexer& lex) {
AnyV parse_statement(Lexer& lex) {
switch (lex.tok()) {
case tok_var:
case tok_val:
return parse_local_vars_declaration(lex);
case tok_var: // `var x = 0` is technically an expression, but can not appear in "any place",
case tok_val: // only as a separate declaration
return parse_local_vars_declaration_assignment(lex);
case tok_opbrace:
return parse_sequence(lex);
case tok_return:
@ -952,12 +914,10 @@ static AnyV parse_genericsT_list(Lexer& lex) {
SrcLocation loc = lex.cur_location();
std::vector<AnyV> genericsT_items;
lex.expect(tok_lt, "`<`");
int idx = 0;
while (true) {
lex.check(tok_identifier, "T");
std::string_view nameT = lex.cur_str();
TypeExpr* type = TypeExpr::new_var(idx++);
genericsT_items.emplace_back(createV<ast_genericsT_item>(lex.cur_location(), type, nameT));
genericsT_items.emplace_back(createV<ast_genericsT_item>(lex.cur_location(), nameT));
lex.next();
if (lex.tok() != tok_comma) {
break;
@ -1040,11 +1000,11 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector<V<ast_annot
genericsT_list = parse_genericsT_list(lex)->as<ast_genericsT_list>();
}
V<ast_parameter_list> v_param_list = parse_parameter_list(lex, genericsT_list)->as<ast_parameter_list>();
V<ast_parameter_list> v_param_list = parse_parameter_list(lex)->as<ast_parameter_list>();
bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->param_name == "self";
int n_mutate_params = v_param_list->get_mutate_params_count();
TypeExpr* ret_type = nullptr;
TypePtr ret_type = nullptr;
bool returns_self = false;
if (lex.tok() == tok_colon) { // : <ret_type> (if absent, it means "auto infer", not void)
lex.next();
@ -1054,9 +1014,9 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector<V<ast_annot
}
lex.next();
returns_self = true;
ret_type = TypeExpr::new_unit();
ret_type = TypeDataVoid::create();
} else {
ret_type = parse_type(lex, genericsT_list);
ret_type = parse_type_from_tokens(lex);
}
}
@ -1067,18 +1027,6 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector<V<ast_annot
throw ParseError(loc, "get methods can't have `mutate` and `self` params");
}
if (n_mutate_params) {
std::vector<TypeExpr*> ret_tensor_items;
ret_tensor_items.reserve(1 + n_mutate_params);
for (AnyV v_param : v_param_list->get_params()) {
if (v_param->as<ast_parameter>()->declared_as_mutate) {
ret_tensor_items.emplace_back(v_param->as<ast_parameter>()->declared_type);
}
}
ret_tensor_items.emplace_back(ret_type ? ret_type : TypeExpr::new_hole());
ret_type = TypeExpr::new_tensor(std::move(ret_tensor_items));
}
AnyV v_body = nullptr;
if (lex.tok() == tok_builtin) {
@ -1096,32 +1044,43 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector<V<ast_annot
lex.unexpected("{ function body }");
}
auto f_declaration = createV<ast_function_declaration>(loc, v_ident, v_param_list, v_body);
f_declaration->ret_type = ret_type ? ret_type : TypeExpr::new_hole();
f_declaration->is_entrypoint = is_entrypoint;
f_declaration->genericsT_list = genericsT_list;
f_declaration->marked_as_get_method = is_get_method;
f_declaration->marked_as_builtin = v_body->type == ast_empty_statement;
f_declaration->accepts_self = accepts_self;
f_declaration->returns_self = returns_self;
int flags = 0;
if (is_entrypoint) {
flags |= FunctionData::flagIsEntrypoint;
}
if (is_get_method) {
flags |= FunctionData::flagGetMethod;
}
if (accepts_self) {
flags |= FunctionData::flagAcceptsSelf;
}
if (returns_self) {
flags |= FunctionData::flagReturnsSelf;
}
td::RefInt256 method_id;
for (auto v_annotation : annotations) {
switch (v_annotation->kind) {
case AnnotationKind::inline_simple:
f_declaration->marked_as_inline = true;
flags |= FunctionData::flagInline;
break;
case AnnotationKind::inline_ref:
f_declaration->marked_as_inline_ref = true;
flags |= FunctionData::flagInlineRef;
break;
case AnnotationKind::pure:
f_declaration->marked_as_pure = true;
flags |= FunctionData::flagMarkedAsPure;
break;
case AnnotationKind::method_id:
case AnnotationKind::method_id: {
if (is_get_method || genericsT_list || is_entrypoint || n_mutate_params || accepts_self) {
v_annotation->error("@method_id can be specified only for regular functions");
}
f_declaration->method_id = v_annotation->get_arg()->get_item(0)->as<ast_int_const>();
auto v_int = v_annotation->get_arg()->get_item(0)->as<ast_int_const>();
if (v_int->intval.is_null() || !v_int->intval->signed_fits_bits(32)) {
v_int->error("invalid integer constant");
}
method_id = v_int->intval;
break;
}
case AnnotationKind::deprecated:
// no special handling
break;
@ -1131,7 +1090,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector<V<ast_annot
}
}
return f_declaration;
return createV<ast_function_declaration>(loc, v_ident, v_param_list, v_body, ret_type, genericsT_list, std::move(method_id), flags);
}
static AnyV parse_tolk_required_version(Lexer& lex) {
@ -1148,7 +1107,7 @@ static AnyV parse_tolk_required_version(Lexer& lex) {
return createV<ast_tolk_required_version>(loc, semver); // semicolon is not necessary
}
static AnyV parse_import_statement(Lexer& lex) {
static AnyV parse_import_directive(Lexer& lex) {
SrcLocation loc = lex.cur_location();
lex.expect(tok_import, "`import`");
lex.check(tok_string_const, "source file name");
@ -1158,7 +1117,7 @@ static AnyV parse_import_statement(Lexer& lex) {
}
auto v_str = createV<ast_string_const>(lex.cur_location(), rel_filename, 0);
lex.next();
return createV<ast_import_statement>(loc, v_str); // semicolon is not necessary
return createV<ast_import_directive>(loc, v_str); // semicolon is not necessary
}
// the main (exported) function
@ -1179,7 +1138,7 @@ AnyV parse_src_file_to_ast(const SrcFile* file) {
if (!annotations.empty()) {
lex.unexpected("declaration after @annotations");
}
toplevel_declarations.push_back(parse_import_statement(lex));
toplevel_declarations.push_back(parse_import_directive(lex));
break;
case tok_semicolon:
if (!annotations.empty()) {

View file

@ -85,60 +85,65 @@ class ASTReplacerInFunctionBody : public ASTReplacer {
protected:
using parent = ASTReplacerInFunctionBody;
// expressions
virtual AnyExprV replace(V<ast_empty_expression> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_parenthesized_expression> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_tensor> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_typed_tuple> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_reference> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_local_var_lhs> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_local_vars_declaration> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_int_const> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_string_const> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_bool_const> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_null_keyword> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_argument> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_argument_list> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_dot_access> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_function_call> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_underscore> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_assign> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_set_assign> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_unary_operator> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_binary_operator> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_ternary_operator> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_cast_as_operator> v) { return replace_children(v); }
// statements
virtual AnyV replace(V<ast_empty_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_return_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_sequence> v) { return replace_children(v); }
virtual AnyV replace(V<ast_return_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_if_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_repeat_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_while_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_do_while_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_throw_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_assert_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_try_catch_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_if_statement> v) { return replace_children(v); }
virtual AnyV replace(V<ast_local_vars_declaration> v) { return replace_children(v); }
virtual AnyV replace(V<ast_asm_body> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_empty_expression> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_parenthesized_expression> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_tensor> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_tensor_square> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_identifier> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_int_const> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_string_const> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_bool_const> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_null_keyword> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_self_keyword> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_argument> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_argument_list> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_function_call> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_dot_method_call> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_underscore> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_unary_operator> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_binary_operator> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_ternary_operator> v) { return replace_children(v); }
virtual AnyExprV replace(V<ast_local_var> v) { return replace_children(v); }
AnyExprV replace(AnyExprV v) final {
switch (v->type) {
case ast_empty_expression: return replace(v->as<ast_empty_expression>());
case ast_parenthesized_expression: return replace(v->as<ast_parenthesized_expression>());
case ast_tensor: return replace(v->as<ast_tensor>());
case ast_tensor_square: return replace(v->as<ast_tensor_square>());
case ast_identifier: return replace(v->as<ast_identifier>());
case ast_typed_tuple: return replace(v->as<ast_typed_tuple>());
case ast_reference: return replace(v->as<ast_reference>());
case ast_local_var_lhs: return replace(v->as<ast_local_var_lhs>());
case ast_local_vars_declaration: return replace(v->as<ast_local_vars_declaration>());
case ast_int_const: return replace(v->as<ast_int_const>());
case ast_string_const: return replace(v->as<ast_string_const>());
case ast_bool_const: return replace(v->as<ast_bool_const>());
case ast_null_keyword: return replace(v->as<ast_null_keyword>());
case ast_self_keyword: return replace(v->as<ast_self_keyword>());
case ast_argument: return replace(v->as<ast_argument>());
case ast_argument_list: return replace(v->as<ast_argument_list>());
case ast_dot_access: return replace(v->as<ast_dot_access>());
case ast_function_call: return replace(v->as<ast_function_call>());
case ast_dot_method_call: return replace(v->as<ast_dot_method_call>());
case ast_underscore: return replace(v->as<ast_underscore>());
case ast_assign: return replace(v->as<ast_assign>());
case ast_set_assign: return replace(v->as<ast_set_assign>());
case ast_unary_operator: return replace(v->as<ast_unary_operator>());
case ast_binary_operator: return replace(v->as<ast_binary_operator>());
case ast_ternary_operator: return replace(v->as<ast_ternary_operator>());
case ast_local_var: return replace(v->as<ast_local_var>());
case ast_cast_as_operator: return replace(v->as<ast_cast_as_operator>());
default:
throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::replace");
}
@ -147,17 +152,19 @@ protected:
AnyV replace(AnyV v) final {
switch (v->type) {
case ast_empty_statement: return replace(v->as<ast_empty_statement>());
case ast_return_statement: return replace(v->as<ast_return_statement>());
case ast_sequence: return replace(v->as<ast_sequence>());
case ast_return_statement: return replace(v->as<ast_return_statement>());
case ast_if_statement: return replace(v->as<ast_if_statement>());
case ast_repeat_statement: return replace(v->as<ast_repeat_statement>());
case ast_while_statement: return replace(v->as<ast_while_statement>());
case ast_do_while_statement: return replace(v->as<ast_do_while_statement>());
case ast_throw_statement: return replace(v->as<ast_throw_statement>());
case ast_assert_statement: return replace(v->as<ast_assert_statement>());
case ast_try_catch_statement: return replace(v->as<ast_try_catch_statement>());
case ast_if_statement: return replace(v->as<ast_if_statement>());
case ast_local_vars_declaration: return replace(v->as<ast_local_vars_declaration>());
case ast_asm_body: return replace(v->as<ast_asm_body>());
#ifdef TOLK_DEBUG
case ast_asm_body:
throw UnexpectedASTNodeType(v, "ASTReplacer::replace");
#endif
default: {
// be very careful, don't forget to handle all statements (not expressions) above!
AnyExprV as_expr = reinterpret_cast<const ASTNodeExpressionBase*>(v);
@ -167,21 +174,22 @@ protected:
}
public:
void start_replacing_in_function(V<ast_function_declaration> v) {
replace(v->get_body());
virtual bool should_visit_function(const FunctionData* fun_ref) = 0;
void start_replacing_in_function(const FunctionData* fun_ref, V<ast_function_declaration> v_function) {
replace(v_function->get_body());
}
};
const std::vector<const FunctionData*>& get_all_not_builtin_functions();
template<class BodyReplacerT>
void replace_ast_of_all_functions(const AllSrcFiles& all_files) {
for (const SrcFile* file : all_files) {
for (AnyV v : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
if (auto v_func = v->try_as<ast_function_declaration>()) {
if (v_func->is_regular_function()) {
void replace_ast_of_all_functions() {
BodyReplacerT visitor;
visitor.start_replacing_in_function(v_func);
}
}
for (const FunctionData* fun_ref : get_all_not_builtin_functions()) {
if (visitor.should_visit_function(fun_ref)) {
visitor.start_replacing_in_function(fun_ref, fun_ref->ast_root->as<ast_function_declaration>());
}
}
}

255
tolk/ast-replicator.h Normal file
View file

@ -0,0 +1,255 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "ast.h"
#include "platform-utils.h"
namespace tolk {
class ASTReplicator {
protected:
virtual AnyV clone(AnyV v) = 0;
virtual AnyExprV clone(AnyExprV v) = 0;
virtual TypePtr clone(TypePtr) = 0;
public:
virtual ~ASTReplicator() = default;
};
class ASTReplicatorFunction : public ASTReplicator {
protected:
using parent = ASTReplicatorFunction;
std::vector<AnyV> clone(const std::vector<AnyV>& items) {
std::vector<AnyV> result;
result.reserve(items.size());
for (AnyV item : items) {
result.push_back(clone(item));
}
return result;
}
std::vector<AnyExprV> clone(const std::vector<AnyExprV>& items) {
std::vector<AnyExprV> result;
result.reserve(items.size());
for (AnyExprV item : items) {
result.push_back(clone(item));
}
return result;
}
// expressions
virtual V<ast_empty_expression> clone(V<ast_empty_expression> v) {
return createV<ast_empty_expression>(v->loc);
}
virtual V<ast_parenthesized_expression> clone(V<ast_parenthesized_expression> v) {
return createV<ast_parenthesized_expression>(v->loc, clone(v->get_expr()));
}
virtual V<ast_tensor> clone(V<ast_tensor> v) {
return createV<ast_tensor>(v->loc, clone(v->get_items()));
}
virtual V<ast_typed_tuple> clone(V<ast_typed_tuple> v) {
return createV<ast_typed_tuple>(v->loc, clone(v->get_items()));
}
virtual V<ast_reference> clone(V<ast_reference> v) {
return createV<ast_reference>(v->loc, clone(v->get_identifier()), v->has_instantiationTs() ? clone(v->get_instantiationTs()) : nullptr);
}
virtual V<ast_local_var_lhs> clone(V<ast_local_var_lhs> v) {
return createV<ast_local_var_lhs>(v->loc, clone(v->get_identifier()), clone(v->declared_type), v->is_immutable, v->marked_as_redef);
}
virtual V<ast_local_vars_declaration> clone(V<ast_local_vars_declaration> v) {
return createV<ast_local_vars_declaration>(v->loc, clone(v->get_expr()));
}
virtual V<ast_int_const> clone(V<ast_int_const> v) {
return createV<ast_int_const>(v->loc, v->intval, v->orig_str);
}
virtual V<ast_string_const> clone(V<ast_string_const> v) {
return createV<ast_string_const>(v->loc, v->str_val, v->modifier);
}
virtual V<ast_bool_const> clone(V<ast_bool_const> v) {
return createV<ast_bool_const>(v->loc, v->bool_val);
}
virtual V<ast_null_keyword> clone(V<ast_null_keyword> v) {
return createV<ast_null_keyword>(v->loc);
}
virtual V<ast_argument> clone(V<ast_argument> v) {
return createV<ast_argument>(v->loc, clone(v->get_expr()), v->passed_as_mutate);
}
virtual V<ast_argument_list> clone(V<ast_argument_list> v) {
return createV<ast_argument_list>(v->loc, clone(v->get_arguments()));
}
virtual V<ast_dot_access> clone(V<ast_dot_access> v) {
return createV<ast_dot_access>(v->loc, clone(v->get_obj()), clone(v->get_identifier()), v->has_instantiationTs() ? clone(v->get_instantiationTs()) : nullptr);
}
virtual V<ast_function_call> clone(V<ast_function_call> v) {
return createV<ast_function_call>(v->loc, clone(v->get_callee()), clone(v->get_arg_list()));
}
virtual V<ast_underscore> clone(V<ast_underscore> v) {
return createV<ast_underscore>(v->loc);
}
virtual V<ast_assign> clone(V<ast_assign> v) {
return createV<ast_assign>(v->loc, clone(v->get_lhs()), clone(v->get_rhs()));
}
virtual V<ast_set_assign> clone(V<ast_set_assign> v) {
return createV<ast_set_assign>(v->loc, v->operator_name, v->tok, clone(v->get_lhs()), clone(v->get_rhs()));
}
virtual V<ast_unary_operator> clone(V<ast_unary_operator> v) {
return createV<ast_unary_operator>(v->loc, v->operator_name, v->tok, clone(v->get_rhs()));
}
virtual V<ast_binary_operator> clone(V<ast_binary_operator> v) {
return createV<ast_binary_operator>(v->loc, v->operator_name, v->tok, clone(v->get_lhs()), clone(v->get_rhs()));
}
virtual V<ast_ternary_operator> clone(V<ast_ternary_operator> v) {
return createV<ast_ternary_operator>(v->loc, clone(v->get_cond()), clone(v->get_when_true()), clone(v->get_when_false()));
}
virtual V<ast_cast_as_operator> clone(V<ast_cast_as_operator> v) {
return createV<ast_cast_as_operator>(v->loc, clone(v->get_expr()), clone(v->cast_to_type));
}
// statements
virtual V<ast_empty_statement> clone(V<ast_empty_statement> v) {
return createV<ast_empty_statement>(v->loc);
}
virtual V<ast_sequence> clone(V<ast_sequence> v) {
return createV<ast_sequence>(v->loc, v->loc_end, clone(v->get_items()));
}
virtual V<ast_return_statement> clone(V<ast_return_statement> v) {
return createV<ast_return_statement>(v->loc, clone(v->get_return_value()));
}
virtual V<ast_if_statement> clone(V<ast_if_statement> v) {
return createV<ast_if_statement>(v->loc, v->is_ifnot, clone(v->get_cond()), clone(v->get_if_body()), clone(v->get_else_body()));
}
virtual V<ast_repeat_statement> clone(V<ast_repeat_statement> v) {
return createV<ast_repeat_statement>(v->loc, clone(v->get_cond()), clone(v->get_body()));
}
virtual V<ast_while_statement> clone(V<ast_while_statement> v) {
return createV<ast_while_statement>(v->loc, clone(v->get_cond()), clone(v->get_body()));
}
virtual V<ast_do_while_statement> clone(V<ast_do_while_statement> v) {
return createV<ast_do_while_statement>(v->loc, clone(v->get_body()), clone(v->get_cond()));
}
virtual V<ast_throw_statement> clone(V<ast_throw_statement> v) {
return createV<ast_throw_statement>(v->loc, clone(v->get_thrown_code()), clone(v->get_thrown_arg()));
}
virtual V<ast_assert_statement> clone(V<ast_assert_statement> v) {
return createV<ast_assert_statement>(v->loc, clone(v->get_cond()), clone(v->get_thrown_code()));
}
virtual V<ast_try_catch_statement> clone(V<ast_try_catch_statement> v) {
return createV<ast_try_catch_statement>(v->loc, clone(v->get_try_body()), clone(v->get_catch_expr()), clone(v->get_catch_body()));
}
virtual V<ast_asm_body> clone(V<ast_asm_body> v) {
return createV<ast_asm_body>(v->loc, v->arg_order, v->ret_order, clone(v->get_asm_commands()));
}
// other
virtual V<ast_identifier> clone(V<ast_identifier> v) {
return createV<ast_identifier>(v->loc, v->name);
}
virtual V<ast_instantiationT_item> clone(V<ast_instantiationT_item> v) {
return createV<ast_instantiationT_item>(v->loc, clone(v->substituted_type));
}
virtual V<ast_instantiationT_list> clone(V<ast_instantiationT_list> v) {
return createV<ast_instantiationT_list>(v->loc, clone(v->get_items()));
}
virtual V<ast_parameter> clone(V<ast_parameter> v) {
return createV<ast_parameter>(v->loc, v->param_name, clone(v->declared_type), v->declared_as_mutate);
}
virtual V<ast_parameter_list> clone(V<ast_parameter_list> v) {
return createV<ast_parameter_list>(v->loc, clone(v->get_params()));
}
AnyExprV clone(AnyExprV v) final {
switch (v->type) {
case ast_empty_expression: return clone(v->as<ast_empty_expression>());
case ast_parenthesized_expression: return clone(v->as<ast_parenthesized_expression>());
case ast_tensor: return clone(v->as<ast_tensor>());
case ast_typed_tuple: return clone(v->as<ast_typed_tuple>());
case ast_reference: return clone(v->as<ast_reference>());
case ast_local_var_lhs: return clone(v->as<ast_local_var_lhs>());
case ast_local_vars_declaration: return clone(v->as<ast_local_vars_declaration>());
case ast_int_const: return clone(v->as<ast_int_const>());
case ast_string_const: return clone(v->as<ast_string_const>());
case ast_bool_const: return clone(v->as<ast_bool_const>());
case ast_null_keyword: return clone(v->as<ast_null_keyword>());
case ast_argument: return clone(v->as<ast_argument>());
case ast_argument_list: return clone(v->as<ast_argument_list>());
case ast_dot_access: return clone(v->as<ast_dot_access>());
case ast_function_call: return clone(v->as<ast_function_call>());
case ast_underscore: return clone(v->as<ast_underscore>());
case ast_assign: return clone(v->as<ast_assign>());
case ast_set_assign: return clone(v->as<ast_set_assign>());
case ast_unary_operator: return clone(v->as<ast_unary_operator>());
case ast_binary_operator: return clone(v->as<ast_binary_operator>());
case ast_ternary_operator: return clone(v->as<ast_ternary_operator>());
case ast_cast_as_operator: return clone(v->as<ast_cast_as_operator>());
default:
throw UnexpectedASTNodeType(v, "ASTReplicatorFunction::clone");
}
}
AnyV clone(AnyV v) final {
switch (v->type) {
case ast_empty_statement: return clone(v->as<ast_empty_statement>());
case ast_sequence: return clone(v->as<ast_sequence>());
case ast_return_statement: return clone(v->as<ast_return_statement>());
case ast_if_statement: return clone(v->as<ast_if_statement>());
case ast_repeat_statement: return clone(v->as<ast_repeat_statement>());
case ast_while_statement: return clone(v->as<ast_while_statement>());
case ast_do_while_statement: return clone(v->as<ast_do_while_statement>());
case ast_throw_statement: return clone(v->as<ast_throw_statement>());
case ast_assert_statement: return clone(v->as<ast_assert_statement>());
case ast_try_catch_statement: return clone(v->as<ast_try_catch_statement>());
case ast_asm_body: return clone(v->as<ast_asm_body>());
// other AST nodes that can be children of ast nodes of function body
case ast_identifier: return clone(v->as<ast_identifier>());
case ast_instantiationT_item: return clone(v->as<ast_instantiationT_item>());
case ast_instantiationT_list: return clone(v->as<ast_instantiationT_list>());
case ast_parameter: return clone(v->as<ast_parameter>());
case ast_parameter_list: return clone(v->as<ast_parameter_list>());
default: {
// be very careful, don't forget to handle all statements/other (not expressions) above!
AnyExprV as_expr = reinterpret_cast<const ASTNodeExpressionBase*>(v);
return clone(as_expr);
}
}
}
TypePtr clone(TypePtr t) override {
return t;
}
public:
virtual V<ast_function_declaration> clone_function_body(V<ast_function_declaration> v_function) {
return createV<ast_function_declaration>(
v_function->loc,
clone(v_function->get_identifier()),
clone(v_function->get_param_list()),
clone(v_function->get_body()->as<ast_sequence>()),
clone(v_function->declared_return_type),
v_function->genericsT_list,
v_function->method_id,
v_function->flags
);
}
};
} // namespace tolk

View file

@ -20,6 +20,7 @@
#include "ast.h"
#include "ast-visitor.h"
#include "type-system.h"
#include <sstream>
/*
@ -31,47 +32,55 @@ namespace tolk {
class ASTStringifier final : public ASTVisitor {
constexpr static std::pair<ASTNodeType, const char*> name_pairs[] = {
{ast_empty_statement, "ast_empty_statement"},
{ast_identifier, "ast_identifier"},
// expressions
{ast_empty_expression, "ast_empty_expression"},
{ast_parenthesized_expression, "ast_parenthesized_expression"},
{ast_tensor, "ast_tensor"},
{ast_tensor_square, "ast_tensor_square"},
{ast_identifier, "ast_identifier"},
{ast_typed_tuple, "ast_typed_tuple"},
{ast_reference, "ast_reference"},
{ast_local_var_lhs, "ast_local_var_lhs"},
{ast_local_vars_declaration, "ast_local_vars_declaration"},
{ast_int_const, "ast_int_const"},
{ast_string_const, "ast_string_const"},
{ast_bool_const, "ast_bool_const"},
{ast_null_keyword, "ast_null_keyword"},
{ast_self_keyword, "ast_self_keyword"},
{ast_argument, "ast_argument"},
{ast_argument_list, "ast_argument_list"},
{ast_dot_access, "ast_dot_access"},
{ast_function_call, "ast_function_call"},
{ast_dot_method_call, "ast_dot_method_call"},
{ast_global_var_declaration, "ast_global_var_declaration"},
{ast_constant_declaration, "ast_constant_declaration"},
{ast_underscore, "ast_underscore"},
{ast_assign, "ast_assign"},
{ast_set_assign, "ast_set_assign"},
{ast_unary_operator, "ast_unary_operator"},
{ast_binary_operator, "ast_binary_operator"},
{ast_ternary_operator, "ast_ternary_operator"},
{ast_return_statement, "ast_return_statement"},
{ast_cast_as_operator, "ast_cast_as_operator"},
// statements
{ast_empty_statement, "ast_empty_statement"},
{ast_sequence, "ast_sequence"},
{ast_return_statement, "ast_return_statement"},
{ast_if_statement, "ast_if_statement"},
{ast_repeat_statement, "ast_repeat_statement"},
{ast_while_statement, "ast_while_statement"},
{ast_do_while_statement, "ast_do_while_statement"},
{ast_throw_statement, "ast_throw_statement"},
{ast_assert_statement, "ast_assert_statement"},
{ast_try_catch_statement, "ast_try_catch_statement"},
{ast_if_statement, "ast_if_statement"},
{ast_asm_body, "ast_asm_body"},
// other
{ast_genericsT_item, "ast_genericsT_item"},
{ast_genericsT_list, "ast_genericsT_list"},
{ast_instantiationT_item, "ast_instantiationT_item"},
{ast_instantiationT_list, "ast_instantiationT_list"},
{ast_parameter, "ast_parameter"},
{ast_parameter_list, "ast_parameter_list"},
{ast_asm_body, "ast_asm_body"},
{ast_annotation, "ast_annotation"},
{ast_function_declaration, "ast_function_declaration"},
{ast_local_var, "ast_local_var"},
{ast_local_vars_declaration, "ast_local_vars_declaration"},
{ast_global_var_declaration, "ast_global_var_declaration"},
{ast_constant_declaration, "ast_constant_declaration"},
{ast_tolk_required_version, "ast_tolk_required_version"},
{ast_import_statement, "ast_import_statement"},
{ast_import_directive, "ast_import_directive"},
{ast_tolk_file, "ast_tolk_file"},
};
@ -115,6 +124,13 @@ class ASTStringifier final : public ASTVisitor {
switch (v->type) {
case ast_identifier:
return static_cast<std::string>(v->as<ast_identifier>()->name);
case ast_reference: {
std::string result(v->as<ast_reference>()->get_name());
if (v->as<ast_reference>()->has_instantiationTs()) {
result += specific_str(v->as<ast_reference>()->get_instantiationTs());
}
return result;
}
case ast_int_const:
return static_cast<std::string>(v->as<ast_int_const>()->orig_str);
case ast_string_const:
@ -123,24 +139,40 @@ class ASTStringifier final : public ASTVisitor {
} else {
return "\"" + static_cast<std::string>(v->as<ast_string_const>()->str_val) + "\"";
}
case ast_bool_const:
return v->as<ast_bool_const>()->bool_val ? "true" : "false";
case ast_dot_access: {
std::string result = "." + static_cast<std::string>(v->as<ast_dot_access>()->get_field_name());
if (v->as<ast_dot_access>()->has_instantiationTs()) {
result += specific_str(v->as<ast_dot_access>()->get_instantiationTs());
}
return result;
}
case ast_function_call: {
if (auto v_lhs = v->as<ast_function_call>()->get_called_f()->try_as<ast_identifier>()) {
return static_cast<std::string>(v_lhs->name) + "()";
std::string inner = specific_str(v->as<ast_function_call>()->get_callee());
if (int n_args = v->as<ast_function_call>()->get_num_args()) {
return inner + "(..." + std::to_string(n_args) + ")";
}
return {};
return inner + "()";
}
case ast_dot_method_call:
return static_cast<std::string>(v->as<ast_dot_method_call>()->method_name);
case ast_global_var_declaration:
return static_cast<std::string>(v->as<ast_global_var_declaration>()->get_identifier()->name);
case ast_constant_declaration:
return static_cast<std::string>(v->as<ast_constant_declaration>()->get_identifier()->name);
case ast_assign:
return "=";
case ast_set_assign:
return static_cast<std::string>(v->as<ast_set_assign>()->operator_name) + "=";
case ast_unary_operator:
return static_cast<std::string>(v->as<ast_unary_operator>()->operator_name);
case ast_binary_operator:
return static_cast<std::string>(v->as<ast_binary_operator>()->operator_name);
case ast_cast_as_operator:
return v->as<ast_cast_as_operator>()->cast_to_type->as_human_readable();
case ast_sequence:
return "" + std::to_string(v->as<ast_sequence>()->get_items().size());
case ast_instantiationT_item:
return v->as<ast_instantiationT_item>()->substituted_type->as_human_readable();
case ast_if_statement:
return v->as<ast_if_statement>()->is_ifnot ? "ifnot" : "";
case ast_annotation:
@ -159,18 +191,27 @@ class ASTStringifier final : public ASTVisitor {
}
return "fun " + static_cast<std::string>(v->as<ast_function_declaration>()->get_identifier()->name) + "(" + param_names + ")";
}
case ast_local_var: {
case ast_local_var_lhs: {
std::ostringstream os;
os << (v->as<ast_local_var>()->inferred_type ? v->as<ast_local_var>()->inferred_type : v->as<ast_local_var>()->declared_type);
if (auto v_ident = v->as<ast_local_var>()->get_identifier()->try_as<ast_identifier>()) {
return static_cast<std::string>(v_ident->name) + ":" + os.str();
}
os << (v->as<ast_local_var_lhs>()->inferred_type ? v->as<ast_local_var_lhs>()->inferred_type : v->as<ast_local_var_lhs>()->declared_type);
if (v->as<ast_local_var_lhs>()->get_name().empty()) {
return "_: " + os.str();
}
return static_cast<std::string>(v->as<ast_local_var_lhs>()->get_name()) + ":" + os.str();
}
case ast_instantiationT_list: {
std::string result = "<";
for (AnyV item : v->as<ast_instantiationT_list>()->get_items()) {
if (result.size() > 1)
result += ",";
result += item->as<ast_instantiationT_item>()->substituted_type->as_human_readable();
}
return result + ">";
}
case ast_tolk_required_version:
return static_cast<std::string>(v->as<ast_tolk_required_version>()->semver);
case ast_import_statement:
return static_cast<std::string>(v->as<ast_import_statement>()->get_file_leaf()->str_val);
case ast_import_directive:
return static_cast<std::string>(v->as<ast_import_directive>()->get_file_leaf()->str_val);
case ast_tolk_file:
return v->as<ast_tolk_file>()->file->rel_filename;
default:
@ -203,47 +244,55 @@ public:
void visit(AnyV v) override {
switch (v->type) {
case ast_empty_statement: return handle_vertex(v->as<ast_empty_statement>());
case ast_identifier: return handle_vertex(v->as<ast_identifier>());
// expressions
case ast_empty_expression: return handle_vertex(v->as<ast_empty_expression>());
case ast_parenthesized_expression: return handle_vertex(v->as<ast_parenthesized_expression>());
case ast_tensor: return handle_vertex(v->as<ast_tensor>());
case ast_tensor_square: return handle_vertex(v->as<ast_tensor_square>());
case ast_identifier: return handle_vertex(v->as<ast_identifier>());
case ast_typed_tuple: return handle_vertex(v->as<ast_typed_tuple>());
case ast_reference: return handle_vertex(v->as<ast_reference>());
case ast_local_var_lhs: return handle_vertex(v->as<ast_local_var_lhs>());
case ast_local_vars_declaration: return handle_vertex(v->as<ast_local_vars_declaration>());
case ast_int_const: return handle_vertex(v->as<ast_int_const>());
case ast_string_const: return handle_vertex(v->as<ast_string_const>());
case ast_bool_const: return handle_vertex(v->as<ast_bool_const>());
case ast_null_keyword: return handle_vertex(v->as<ast_null_keyword>());
case ast_self_keyword: return handle_vertex(v->as<ast_self_keyword>());
case ast_argument: return handle_vertex(v->as<ast_argument>());
case ast_argument_list: return handle_vertex(v->as<ast_argument_list>());
case ast_dot_access: return handle_vertex(v->as<ast_dot_access>());
case ast_function_call: return handle_vertex(v->as<ast_function_call>());
case ast_dot_method_call: return handle_vertex(v->as<ast_dot_method_call>());
case ast_global_var_declaration: return handle_vertex(v->as<ast_global_var_declaration>());
case ast_constant_declaration: return handle_vertex(v->as<ast_constant_declaration>());
case ast_underscore: return handle_vertex(v->as<ast_underscore>());
case ast_assign: return handle_vertex(v->as<ast_assign>());
case ast_set_assign: return handle_vertex(v->as<ast_set_assign>());
case ast_unary_operator: return handle_vertex(v->as<ast_unary_operator>());
case ast_binary_operator: return handle_vertex(v->as<ast_binary_operator>());
case ast_ternary_operator: return handle_vertex(v->as<ast_ternary_operator>());
case ast_return_statement: return handle_vertex(v->as<ast_return_statement>());
case ast_cast_as_operator: return handle_vertex(v->as<ast_cast_as_operator>());
// statements
case ast_empty_statement: return handle_vertex(v->as<ast_empty_statement>());
case ast_sequence: return handle_vertex(v->as<ast_sequence>());
case ast_return_statement: return handle_vertex(v->as<ast_return_statement>());
case ast_if_statement: return handle_vertex(v->as<ast_if_statement>());
case ast_repeat_statement: return handle_vertex(v->as<ast_repeat_statement>());
case ast_while_statement: return handle_vertex(v->as<ast_while_statement>());
case ast_do_while_statement: return handle_vertex(v->as<ast_do_while_statement>());
case ast_throw_statement: return handle_vertex(v->as<ast_throw_statement>());
case ast_assert_statement: return handle_vertex(v->as<ast_assert_statement>());
case ast_try_catch_statement: return handle_vertex(v->as<ast_try_catch_statement>());
case ast_if_statement: return handle_vertex(v->as<ast_if_statement>());
case ast_asm_body: return handle_vertex(v->as<ast_asm_body>());
// other
case ast_genericsT_item: return handle_vertex(v->as<ast_genericsT_item>());
case ast_genericsT_list: return handle_vertex(v->as<ast_genericsT_list>());
case ast_instantiationT_item: return handle_vertex(v->as<ast_instantiationT_item>());
case ast_instantiationT_list: return handle_vertex(v->as<ast_instantiationT_list>());
case ast_parameter: return handle_vertex(v->as<ast_parameter>());
case ast_parameter_list: return handle_vertex(v->as<ast_parameter_list>());
case ast_asm_body: return handle_vertex(v->as<ast_asm_body>());
case ast_annotation: return handle_vertex(v->as<ast_annotation>());
case ast_function_declaration: return handle_vertex(v->as<ast_function_declaration>());
case ast_local_var: return handle_vertex(v->as<ast_local_var>());
case ast_local_vars_declaration: return handle_vertex(v->as<ast_local_vars_declaration>());
case ast_global_var_declaration: return handle_vertex(v->as<ast_global_var_declaration>());
case ast_constant_declaration: return handle_vertex(v->as<ast_constant_declaration>());
case ast_tolk_required_version: return handle_vertex(v->as<ast_tolk_required_version>());
case ast_import_statement: return handle_vertex(v->as<ast_import_statement>());
case ast_import_directive: return handle_vertex(v->as<ast_import_directive>());
case ast_tolk_file: return handle_vertex(v->as<ast_tolk_file>());
default:
throw UnexpectedASTNodeType(v, "ASTStringifier::visit");

View file

@ -86,92 +86,103 @@ class ASTVisitorFunctionBody : public ASTVisitor {
protected:
using parent = ASTVisitorFunctionBody;
virtual void visit(V<ast_empty_statement> v) { return visit_children(v); }
// expressions
virtual void visit(V<ast_empty_expression> v) { return visit_children(v); }
virtual void visit(V<ast_parenthesized_expression> v) { return visit_children(v); }
virtual void visit(V<ast_tensor> v) { return visit_children(v); }
virtual void visit(V<ast_tensor_square> v) { return visit_children(v); }
virtual void visit(V<ast_identifier> v) { return visit_children(v); }
virtual void visit(V<ast_typed_tuple> v) { return visit_children(v); }
virtual void visit(V<ast_reference> v) { return visit_children(v); }
virtual void visit(V<ast_local_var_lhs> v) { return visit_children(v); }
virtual void visit(V<ast_local_vars_declaration> v) { return visit_children(v); }
virtual void visit(V<ast_int_const> v) { return visit_children(v); }
virtual void visit(V<ast_string_const> v) { return visit_children(v); }
virtual void visit(V<ast_bool_const> v) { return visit_children(v); }
virtual void visit(V<ast_null_keyword> v) { return visit_children(v); }
virtual void visit(V<ast_self_keyword> v) { return visit_children(v); }
virtual void visit(V<ast_argument> v) { return visit_children(v); }
virtual void visit(V<ast_argument_list> v) { return visit_children(v); }
virtual void visit(V<ast_dot_access> v) { return visit_children(v); }
virtual void visit(V<ast_function_call> v) { return visit_children(v); }
virtual void visit(V<ast_dot_method_call> v) { return visit_children(v); }
virtual void visit(V<ast_underscore> v) { return visit_children(v); }
virtual void visit(V<ast_assign> v) { return visit_children(v); }
virtual void visit(V<ast_set_assign> v) { return visit_children(v); }
virtual void visit(V<ast_unary_operator> v) { return visit_children(v); }
virtual void visit(V<ast_binary_operator> v) { return visit_children(v); }
virtual void visit(V<ast_ternary_operator> v) { return visit_children(v); }
virtual void visit(V<ast_return_statement> v) { return visit_children(v); }
virtual void visit(V<ast_cast_as_operator> v) { return visit_children(v); }
// statements
virtual void visit(V<ast_empty_statement> v) { return visit_children(v); }
virtual void visit(V<ast_sequence> v) { return visit_children(v); }
virtual void visit(V<ast_return_statement> v) { return visit_children(v); }
virtual void visit(V<ast_if_statement> v) { return visit_children(v); }
virtual void visit(V<ast_repeat_statement> v) { return visit_children(v); }
virtual void visit(V<ast_while_statement> v) { return visit_children(v); }
virtual void visit(V<ast_do_while_statement> v) { return visit_children(v); }
virtual void visit(V<ast_throw_statement> v) { return visit_children(v); }
virtual void visit(V<ast_assert_statement> v) { return visit_children(v); }
virtual void visit(V<ast_try_catch_statement> v) { return visit_children(v); }
virtual void visit(V<ast_if_statement> v) { return visit_children(v); }
virtual void visit(V<ast_local_var> v) { return visit_children(v); }
virtual void visit(V<ast_local_vars_declaration> v) { return visit_children(v); }
virtual void visit(V<ast_asm_body> v) { return visit_children(v); }
void visit(AnyV v) final {
switch (v->type) {
case ast_empty_statement: return visit(v->as<ast_empty_statement>());
// expressions
case ast_empty_expression: return visit(v->as<ast_empty_expression>());
case ast_parenthesized_expression: return visit(v->as<ast_parenthesized_expression>());
case ast_tensor: return visit(v->as<ast_tensor>());
case ast_tensor_square: return visit(v->as<ast_tensor_square>());
case ast_identifier: return visit(v->as<ast_identifier>());
case ast_typed_tuple: return visit(v->as<ast_typed_tuple>());
case ast_reference: return visit(v->as<ast_reference>());
case ast_local_var_lhs: return visit(v->as<ast_local_var_lhs>());
case ast_local_vars_declaration: return visit(v->as<ast_local_vars_declaration>());
case ast_int_const: return visit(v->as<ast_int_const>());
case ast_string_const: return visit(v->as<ast_string_const>());
case ast_bool_const: return visit(v->as<ast_bool_const>());
case ast_null_keyword: return visit(v->as<ast_null_keyword>());
case ast_self_keyword: return visit(v->as<ast_self_keyword>());
case ast_argument: return visit(v->as<ast_argument>());
case ast_argument_list: return visit(v->as<ast_argument_list>());
case ast_dot_access: return visit(v->as<ast_dot_access>());
case ast_function_call: return visit(v->as<ast_function_call>());
case ast_dot_method_call: return visit(v->as<ast_dot_method_call>());
case ast_underscore: return visit(v->as<ast_underscore>());
case ast_assign: return visit(v->as<ast_assign>());
case ast_set_assign: return visit(v->as<ast_set_assign>());
case ast_unary_operator: return visit(v->as<ast_unary_operator>());
case ast_binary_operator: return visit(v->as<ast_binary_operator>());
case ast_ternary_operator: return visit(v->as<ast_ternary_operator>());
case ast_return_statement: return visit(v->as<ast_return_statement>());
case ast_cast_as_operator: return visit(v->as<ast_cast_as_operator>());
// statements
case ast_empty_statement: return visit(v->as<ast_empty_statement>());
case ast_sequence: return visit(v->as<ast_sequence>());
case ast_return_statement: return visit(v->as<ast_return_statement>());
case ast_if_statement: return visit(v->as<ast_if_statement>());
case ast_repeat_statement: return visit(v->as<ast_repeat_statement>());
case ast_while_statement: return visit(v->as<ast_while_statement>());
case ast_do_while_statement: return visit(v->as<ast_do_while_statement>());
case ast_throw_statement: return visit(v->as<ast_throw_statement>());
case ast_assert_statement: return visit(v->as<ast_assert_statement>());
case ast_try_catch_statement: return visit(v->as<ast_try_catch_statement>());
case ast_if_statement: return visit(v->as<ast_if_statement>());
case ast_local_var: return visit(v->as<ast_local_var>());
case ast_local_vars_declaration: return visit(v->as<ast_local_vars_declaration>());
case ast_asm_body: return visit(v->as<ast_asm_body>());
#ifdef TOLK_DEBUG
case ast_asm_body:
throw UnexpectedASTNodeType(v, "ASTVisitor; forgot to filter out asm functions in should_visit_function()?");
#endif
default:
throw UnexpectedASTNodeType(v, "ASTVisitorFunctionBody::visit");
}
}
public:
virtual void start_visiting_function(V<ast_function_declaration> v_function) {
virtual bool should_visit_function(const FunctionData* fun_ref) = 0;
virtual void start_visiting_function(const FunctionData* fun_ref, V<ast_function_declaration> v_function) {
visit(v_function->get_body());
}
};
const std::vector<const FunctionData*>& get_all_not_builtin_functions();
template<class BodyVisitorT>
void visit_ast_of_all_functions(const AllSrcFiles& all_files) {
for (const SrcFile* file : all_files) {
for (AnyV v : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
if (auto v_func = v->try_as<ast_function_declaration>()) {
if (v_func->is_regular_function()) {
void visit_ast_of_all_functions() {
BodyVisitorT visitor;
visitor.start_visiting_function(v_func);
}
}
for (const FunctionData* fun_ref : get_all_not_builtin_functions()) {
if (visitor.should_visit_function(fun_ref)) {
visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as<ast_function_declaration>());
}
}
}

View file

@ -15,8 +15,9 @@
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ast.h"
#ifdef TOLK_DEBUG
#include "ast-stringifier.h"
#include <iostream>
#endif
namespace tolk {
@ -104,7 +105,7 @@ int Vertex<ast_parameter_list>::get_mutate_params_count() const {
// Therefore, there is a guarantee, that all AST mutations are done via these methods,
// easily searched by usages, and there is no another way to modify any other field.
void ASTNodeExpressionBase::assign_inferred_type(TypeExpr* type) {
void ASTNodeExpressionBase::assign_inferred_type(TypePtr type) {
this->inferred_type = type;
}
@ -116,43 +117,79 @@ void ASTNodeExpressionBase::assign_lvalue_true() {
this->is_lvalue = true;
}
void Vertex<ast_identifier>::assign_sym(const Symbol* sym) {
void Vertex<ast_reference>::assign_sym(const Symbol* sym) {
this->sym = sym;
}
void Vertex<ast_self_keyword>::assign_param_ref(const LocalVarData* self_param) {
this->param_ref = self_param;
}
void Vertex<ast_function_call>::assign_fun_ref(const FunctionData* fun_ref) {
this->fun_maybe = fun_ref;
}
void Vertex<ast_dot_method_call>::assign_fun_ref(const FunctionData* fun_ref) {
this->fun_ref = fun_ref;
void Vertex<ast_cast_as_operator>::assign_resolved_type(TypePtr cast_to_type) {
this->cast_to_type = cast_to_type;
}
void Vertex<ast_global_var_declaration>::assign_var_ref(const GlobalVarData* var_ref) {
this->var_ref = var_ref;
}
void Vertex<ast_global_var_declaration>::assign_resolved_type(TypePtr declared_type) {
this->declared_type = declared_type;
}
void Vertex<ast_constant_declaration>::assign_const_ref(const GlobalConstData* const_ref) {
this->const_ref = const_ref;
}
void Vertex<ast_constant_declaration>::assign_resolved_type(TypePtr declared_type) {
this->declared_type = declared_type;
}
void Vertex<ast_instantiationT_item>::assign_resolved_type(TypePtr substituted_type) {
this->substituted_type = substituted_type;
}
void Vertex<ast_parameter>::assign_param_ref(const LocalVarData* param_ref) {
this->param_ref = param_ref;
}
void Vertex<ast_parameter>::assign_resolved_type(TypePtr declared_type) {
this->declared_type = declared_type;
}
void Vertex<ast_set_assign>::assign_fun_ref(const FunctionData* fun_ref) {
this->fun_ref = fun_ref;
}
void Vertex<ast_unary_operator>::assign_fun_ref(const FunctionData* fun_ref) {
this->fun_ref = fun_ref;
}
void Vertex<ast_binary_operator>::assign_fun_ref(const FunctionData* fun_ref) {
this->fun_ref = fun_ref;
}
void Vertex<ast_dot_access>::assign_target(const DotTarget& target) {
this->target = target;
}
void Vertex<ast_function_declaration>::assign_fun_ref(const FunctionData* fun_ref) {
this->fun_ref = fun_ref;
}
void Vertex<ast_local_var>::assign_var_ref(const Symbol* var_ref) {
this->var_maybe = var_ref;
void Vertex<ast_function_declaration>::assign_resolved_type(TypePtr declared_return_type) {
this->declared_return_type = declared_return_type;
}
void Vertex<ast_import_statement>::assign_src_file(const SrcFile* file) {
void Vertex<ast_local_var_lhs>::assign_var_ref(const LocalVarData* var_ref) {
this->var_ref = var_ref;
}
void Vertex<ast_local_var_lhs>::assign_resolved_type(TypePtr declared_type) {
this->declared_type = declared_type;
}
void Vertex<ast_import_directive>::assign_src_file(const SrcFile* file) {
this->file = file;
}

View file

@ -20,7 +20,6 @@
#include "fwd-declarations.h"
#include "platform-utils.h"
#include "src-file.h"
#include "type-expr.h"
#include "lexer.h"
#include "symtable.h"
@ -65,47 +64,55 @@
namespace tolk {
enum ASTNodeType {
ast_empty_statement,
ast_identifier,
// expressions
ast_empty_expression,
ast_parenthesized_expression,
ast_tensor,
ast_tensor_square,
ast_identifier,
ast_typed_tuple,
ast_reference,
ast_local_var_lhs,
ast_local_vars_declaration,
ast_int_const,
ast_string_const,
ast_bool_const,
ast_null_keyword,
ast_self_keyword,
ast_argument,
ast_argument_list,
ast_dot_access,
ast_function_call,
ast_dot_method_call,
ast_global_var_declaration,
ast_constant_declaration,
ast_underscore,
ast_assign,
ast_set_assign,
ast_unary_operator,
ast_binary_operator,
ast_ternary_operator,
ast_return_statement,
ast_cast_as_operator,
// statements
ast_empty_statement,
ast_sequence,
ast_return_statement,
ast_if_statement,
ast_repeat_statement,
ast_while_statement,
ast_do_while_statement,
ast_throw_statement,
ast_assert_statement,
ast_try_catch_statement,
ast_if_statement,
ast_asm_body,
// other
ast_genericsT_item,
ast_genericsT_list,
ast_instantiationT_item,
ast_instantiationT_list,
ast_parameter,
ast_parameter_list,
ast_asm_body,
ast_annotation,
ast_function_declaration,
ast_local_var,
ast_local_vars_declaration,
ast_global_var_declaration,
ast_constant_declaration,
ast_tolk_required_version,
ast_import_statement,
ast_import_directive,
ast_tolk_file,
};
@ -144,6 +151,7 @@ struct ASTNodeBase {
const SrcLocation loc;
ASTNodeBase(ASTNodeType type, SrcLocation loc) : type(type), loc(loc) {}
ASTNodeBase(const ASTNodeBase&) = delete;
template<ASTNodeType node_type>
V<node_type> as() const {
@ -171,12 +179,14 @@ struct ASTNodeBase {
};
struct ASTNodeExpressionBase : ASTNodeBase {
TypeExpr* inferred_type = nullptr; // todo make it const
friend class ASTDuplicatorFunction;
TypePtr inferred_type = nullptr;
bool is_rvalue: 1 = false;
bool is_lvalue: 1 = false;
ASTNodeExpressionBase* mutate() const { return const_cast<ASTNodeExpressionBase*>(this); }
void assign_inferred_type(TypeExpr* type);
void assign_inferred_type(TypePtr type);
void assign_rvalue_true();
void assign_lvalue_true();
@ -226,6 +236,8 @@ struct ASTExprVararg : ASTNodeExpressionBase {
protected:
std::vector<AnyExprV> children;
AnyExprV child(int i) const { return children.at(i); }
ASTExprVararg(ASTNodeType type, SrcLocation loc, std::vector<AnyExprV> children)
: ASTNodeExpressionBase(type, loc), children(std::move(children)) {}
@ -254,7 +266,6 @@ struct ASTStatementVararg : ASTNodeStatementBase {
protected:
std::vector<AnyV> children;
AnyV child(int i) const { return children.at(i); }
AnyExprV child_as_expr(int i) const { return reinterpret_cast<AnyExprV>(children.at(i)); }
ASTStatementVararg(ASTNodeType type, SrcLocation loc, std::vector<AnyV> children)
@ -281,7 +292,7 @@ struct ASTOtherVararg : ASTNodeBase {
protected:
std::vector<AnyV> children;
AnyV child(int i) const { return children.at(i); }
AnyExprV child_as_expr(int i) const { return reinterpret_cast<AnyExprV>(children.at(i)); }
ASTOtherVararg(ASTNodeType type, SrcLocation loc, std::vector<AnyV> children)
: ASTNodeBase(type, loc), children(std::move(children)) {}
@ -291,21 +302,42 @@ public:
bool empty() const { return children.empty(); }
};
// ---------------------------------------------------------
template<>
struct Vertex<ast_empty_statement> final : ASTStatementVararg {
explicit Vertex(SrcLocation loc)
: ASTStatementVararg(ast_empty_statement, loc, {}) {}
// ast_identifier is "a name" in AST structure
// it's NOT a standalone expression, it's "implementation details" of other AST vertices
// example: `var x = 5` then "x" is identifier (inside local var declaration)
// example: `global g: int` then "g" is identifier
// example: `someF` is a reference, which contains identifier
// example: `someF<int>` is a reference which contains identifier and generics instantiation
// example: `fun f<T>()` then "f" is identifier, "<T>" is a generics declaration
struct Vertex<ast_identifier> final : ASTOtherLeaf {
std::string_view name; // empty for underscore
Vertex(SrcLocation loc, std::string_view name)
: ASTOtherLeaf(ast_identifier, loc)
, name(name) {}
};
//
// ---------------------------------------------------------
// expressions
//
template<>
// ast_empty_expression is "nothing" in context of expression, it has "unknown" type
// example: `throw 123;` then "throw arg" is empty expression (opposed to `throw (123, arg)`)
struct Vertex<ast_empty_expression> final : ASTExprLeaf {
explicit Vertex(SrcLocation loc)
: ASTExprLeaf(ast_empty_expression, loc) {}
};
template<>
// ast_parenthesized_expression is something surrounded embraced by (parenthesis)
// example: `(1)`, `((f()))` (two nested)
struct Vertex<ast_parenthesized_expression> final : ASTExprUnary {
AnyExprV get_expr() const { return child; }
@ -314,37 +346,101 @@ struct Vertex<ast_parenthesized_expression> final : ASTExprUnary {
};
template<>
// ast_tensor is a set of expressions embraced by (parenthesis)
// in most languages, it's called "tuple", but in TVM, "tuple" is a TVM primitive, that's why "tensor"
// example: `(1, 2)`, `(1, (2, 3))` (nested), `()` (empty tensor)
// note, that `(1)` is not a tensor, it's a parenthesized expression
// a tensor of N elements occupies N slots on a stack (opposed to TVM tuple primitive, 1 slot)
struct Vertex<ast_tensor> final : ASTExprVararg {
const std::vector<AnyExprV>& get_items() const { return children; }
AnyExprV get_item(int i) const { return children.at(i); }
AnyExprV get_item(int i) const { return child(i); }
Vertex(SrcLocation loc, std::vector<AnyExprV> items)
: ASTExprVararg(ast_tensor, loc, std::move(items)) {}
};
template<>
struct Vertex<ast_tensor_square> final : ASTExprVararg {
// ast_typed_tuple is a set of expressions in [square brackets]
// in TVM, it's a TVM tuple, that occupies 1 slot, but the compiler knows its "typed structure"
// example: `[1, x]`, `[[0]]` (nested)
// typed tuples can be assigned to N variables, like `[one, _, three] = [1,2,3]`
struct Vertex<ast_typed_tuple> final : ASTExprVararg {
const std::vector<AnyExprV>& get_items() const { return children; }
AnyExprV get_item(int i) const { return children.at(i); }
AnyExprV get_item(int i) const { return child(i); }
Vertex(SrcLocation loc, std::vector<AnyExprV> items)
: ASTExprVararg(ast_tensor_square, loc, std::move(items)) {}
: ASTExprVararg(ast_typed_tuple, loc, std::move(items)) {}
};
template<>
struct Vertex<ast_identifier> final : ASTExprLeaf {
const Symbol* sym = nullptr; // always filled (after resolved); points to local / global / function / constant
std::string_view name;
// ast_reference is "something that references a symbol"
// examples: `x` / `someF` / `someF<int>`
// it's a leaf expression from traversing point of view, but actually, has children (not expressions)
// note, that both `someF()` and `someF<int>()` are function calls, where a callee is just a reference
struct Vertex<ast_reference> final : ASTExprLeaf {
private:
V<ast_identifier> identifier; // its name, `x` / `someF`
V<ast_instantiationT_list> instantiationTs; // not null if `<int>`, otherwise nullptr
public:
const Symbol* sym = nullptr; // filled on resolve or type inferring; points to local / global / function / constant
auto get_identifier() const { return identifier; }
bool has_instantiationTs() const { return instantiationTs != nullptr; }
auto get_instantiationTs() const { return instantiationTs; }
std::string_view get_name() const { return identifier->name; }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_sym(const Symbol* sym);
Vertex(SrcLocation loc, std::string_view name)
: ASTExprLeaf(ast_identifier, loc)
, name(name) {}
Vertex(SrcLocation loc, V<ast_identifier> name_identifier, V<ast_instantiationT_list> instantiationTs)
: ASTExprLeaf(ast_reference, loc)
, identifier(name_identifier), instantiationTs(instantiationTs) {}
};
template<>
// ast_local_var_lhs is one variable inside `var` declaration
// example: `var x = 0;` then "x" is local var lhs
// example: `val (x: int, [y redef], _) = rhs` then "x" and "y" and "_" are
// it's a leaf from expression's point of view, though technically has an "identifier" child
struct Vertex<ast_local_var_lhs> final : ASTExprLeaf {
private:
V<ast_identifier> identifier;
public:
const LocalVarData* var_ref = nullptr; // filled on resolve identifiers; for `redef` points to declared above; for underscore, name is empty
TypePtr declared_type; // not null for `var x: int = rhs`, otherwise nullptr
bool is_immutable; // declared via 'val', not 'var'
bool marked_as_redef; // var (existing_var redef, new_var: int) = ...
V<ast_identifier> get_identifier() const { return identifier; }
std::string_view get_name() const { return identifier->name; } // empty for underscore
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_var_ref(const LocalVarData* var_ref);
void assign_resolved_type(TypePtr declared_type);
Vertex(SrcLocation loc, V<ast_identifier> identifier, TypePtr declared_type, bool is_immutable, bool marked_as_redef)
: ASTExprLeaf(ast_local_var_lhs, loc)
, identifier(identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {}
};
template<>
// ast_local_vars_declaration is an expression declaring local variables on the left side of assignment
// examples: see above
// for `var (x, [y])` its expr is "tensor (local var, typed tuple (local var))"
// for assignment `var x = 5`, this node is `var x`, lhs of assignment
struct Vertex<ast_local_vars_declaration> final : ASTExprUnary {
AnyExprV get_expr() const { return child; } // ast_local_var_lhs / ast_tensor / ast_typed_tuple
Vertex(SrcLocation loc, AnyExprV expr)
: ASTExprUnary(ast_local_vars_declaration, loc, expr) {}
};
template<>
// ast_int_const is an integer literal
// examples: `0` / `0xFF`
// note, that `-1` is unary minus of `1` int const
struct Vertex<ast_int_const> final : ASTExprLeaf {
td::RefInt256 intval; // parsed value, 255 for "0xFF"
std::string_view orig_str; // original "0xFF"; empty for nodes generated by compiler (e.g. in constant folding)
@ -356,6 +452,10 @@ struct Vertex<ast_int_const> final : ASTExprLeaf {
};
template<>
// ast_string_const is a string literal in double quotes or """ when multiline
// examples: "asdf" / "Ef8zMz..."a / "to_calc_crc32_from"c
// an optional modifier specifies how a string is parsed (probably, like an integer)
// note, that TVM doesn't have strings, it has only slices, so "hello" has type slice
struct Vertex<ast_string_const> final : ASTExprLeaf {
std::string_view str_val;
char modifier;
@ -375,6 +475,7 @@ struct Vertex<ast_string_const> final : ASTExprLeaf {
};
template<>
// ast_bool_const is either `true` or `false`
struct Vertex<ast_bool_const> final : ASTExprLeaf {
bool bool_val;
@ -384,25 +485,20 @@ struct Vertex<ast_bool_const> final : ASTExprLeaf {
};
template<>
// ast_null_keyword is the `null` literal
// it should be handled with care; for instance, `null` takes special place in the type system
struct Vertex<ast_null_keyword> final : ASTExprLeaf {
explicit Vertex(SrcLocation loc)
: ASTExprLeaf(ast_null_keyword, loc) {}
};
template<>
struct Vertex<ast_self_keyword> final : ASTExprLeaf {
const LocalVarData* param_ref = nullptr; // filled after resolve identifiers, points to `self` parameter
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_param_ref(const LocalVarData* self_param);
explicit Vertex(SrcLocation loc)
: ASTExprLeaf(ast_self_keyword, loc) {}
};
template<>
// ast_argument is an element of an argument list of a function/method call
// example: `f(1, x)` has 2 arguments, `t.tupleFirst()` has no arguments (though `t` is passed as `self`)
// example: `f(mutate arg)` has 1 argument with `passed_as_mutate` flag
// (without `mutate` keyword, the entity "argument" could be replaced just by "any expression")
struct Vertex<ast_argument> final : ASTExprUnary {
bool passed_as_mutate; // when called `f(mutate arg)`, not `f(arg)`
bool passed_as_mutate;
AnyExprV get_expr() const { return child; }
@ -412,19 +508,57 @@ struct Vertex<ast_argument> final : ASTExprUnary {
};
template<>
// ast_argument_list contains N arguments of a function/method call
struct Vertex<ast_argument_list> final : ASTExprVararg {
const std::vector<AnyExprV>& get_arguments() const { return children; }
auto get_arg(int i) const { return children.at(i)->as<ast_argument>(); }
auto get_arg(int i) const { return child(i)->as<ast_argument>(); }
Vertex(SrcLocation loc, std::vector<AnyExprV> arguments)
: ASTExprVararg(ast_argument_list, loc, std::move(arguments)) {}
};
template<>
struct Vertex<ast_function_call> final : ASTExprBinary {
const FunctionData* fun_maybe = nullptr; // filled after resolve; remains nullptr for `localVar()` / `getF()()`
// ast_dot_access is "object before dot, identifier + optional <T> after dot"
// examples: `tensorVar.0` / `obj.field` / `getObj().method` / `t.tupleFirst<int>`
// from traversing point of view, it's an unary expression: only obj is expression, field name is not
// note, that `obj.method()` is a function call with "dot access `obj.method`" callee
struct Vertex<ast_dot_access> final : ASTExprUnary {
private:
V<ast_identifier> identifier; // `0` / `field` / `method`
V<ast_instantiationT_list> instantiationTs; // not null if `<int>`, otherwise nullptr
AnyExprV get_called_f() const { return lhs; }
public:
typedef const FunctionData* DotTarget; // for `t.tupleAt` target is `tupleAt` global function
DotTarget target = nullptr; // filled at type inferring
AnyExprV get_obj() const { return child; }
auto get_identifier() const { return identifier; }
bool has_instantiationTs() const { return instantiationTs != nullptr; }
auto get_instantiationTs() const { return instantiationTs; }
std::string_view get_field_name() const { return identifier->name; }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_target(const DotTarget& target);
Vertex(SrcLocation loc, AnyExprV obj, V<ast_identifier> identifier, V<ast_instantiationT_list> instantiationTs)
: ASTExprUnary(ast_dot_access, loc, obj)
, identifier(identifier), instantiationTs(instantiationTs) {}
};
template<>
// ast_function_call is "calling some lhs with parenthesis", lhs is arbitrary expression (callee)
// example: `globalF()` then callee is reference
// example: `globalF<int>()` then callee is reference (with instantiation Ts filled)
// example: `local_var()` then callee is reference (points to local var, filled at resolve identifiers)
// example: `getF()()` then callee is another func call (which type is TypeDataFunCallable)
// example: `obj.method()` then callee is dot access (resolved while type inferring)
struct Vertex<ast_function_call> final : ASTExprBinary {
const FunctionData* fun_maybe = nullptr; // filled while type inferring for `globalF()` / `obj.f()`; remains nullptr for `local_var()` / `getF()()`
AnyExprV get_callee() const { return lhs; }
bool is_dot_call() const { return lhs->type == ast_dot_access; }
AnyExprV get_dot_obj() const { return lhs->as<ast_dot_access>()->get_obj(); }
auto get_arg_list() const { return rhs->as<ast_argument_list>(); }
int get_num_args() const { return rhs->as<ast_argument_list>()->size(); }
auto get_arg(int i) const { return rhs->as<ast_argument_list>()->get_arg(i); }
@ -437,90 +571,79 @@ struct Vertex<ast_function_call> final : ASTExprBinary {
};
template<>
struct Vertex<ast_dot_method_call> final : ASTExprBinary {
const FunctionData* fun_ref = nullptr; // points to global function (after resolve)
std::string_view method_name;
AnyExprV get_obj() const { return lhs; }
auto get_arg_list() const { return rhs->as<ast_argument_list>(); }
int get_num_args() const { return rhs->as<ast_argument_list>()->size(); }
auto get_arg(int i) const { return rhs->as<ast_argument_list>()->get_arg(i); }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_fun_ref(const FunctionData* fun_ref);
Vertex(SrcLocation loc, std::string_view method_name, AnyExprV lhs, V<ast_argument_list> arguments)
: ASTExprBinary(ast_dot_method_call, loc, lhs, arguments)
, method_name(method_name) {}
};
template<>
struct Vertex<ast_global_var_declaration> final : ASTStatementUnary {
const GlobalVarData* var_ref = nullptr; // filled after register
TypeExpr* declared_type;
auto get_identifier() const { return child->as<ast_identifier>(); }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_var_ref(const GlobalVarData* var_ref);
Vertex(SrcLocation loc, V<ast_identifier> name_identifier, TypeExpr* declared_type)
: ASTStatementUnary(ast_global_var_declaration, loc, name_identifier)
, declared_type(declared_type) {}
};
template<>
struct Vertex<ast_constant_declaration> final : ASTStatementVararg {
const GlobalConstData* const_ref = nullptr; // filled after register
TypeExpr* declared_type; // may be nullptr
auto get_identifier() const { return child(0)->as<ast_identifier>(); }
AnyExprV get_init_value() const { return child_as_expr(1); }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_const_ref(const GlobalConstData* const_ref);
Vertex(SrcLocation loc, V<ast_identifier> name_identifier, TypeExpr* declared_type, AnyExprV init_value)
: ASTStatementVararg(ast_constant_declaration, loc, {name_identifier, init_value})
, declared_type(declared_type) {}
};
template<>
// ast_underscore represents `_` symbol used for left side of assignment
// example: `(cs, _) = cs.loadAndReturn()`
// though it's the only correct usage, using _ as rvalue like `var x = _;` is correct from AST point of view
// note, that for declaration `var _ = 1` underscore is a regular local var declared (with empty name)
// but for `_ = 1` (not declaration) it's underscore; it's because `var _:int` is also correct
struct Vertex<ast_underscore> final : ASTExprLeaf {
explicit Vertex(SrcLocation loc)
: ASTExprLeaf(ast_underscore, loc) {}
};
template<>
// ast_assign represents assignment "lhs = rhs"
// examples: `a = 4` / `var a = 4` / `(cs, b, mode) = rhs` / `f() = g()`
// note, that `a = 4` lhs is ast_reference, `var a = 4` lhs is ast_local_vars_declaration
struct Vertex<ast_assign> final : ASTExprBinary {
AnyExprV get_lhs() const { return lhs; }
AnyExprV get_rhs() const { return rhs; }
explicit Vertex(SrcLocation loc, AnyExprV lhs, AnyExprV rhs)
: ASTExprBinary(ast_assign, loc, lhs, rhs) {}
};
template<>
// ast_set_assign represents assignment-and-set operation "lhs <op>= rhs"
// examples: `a += 4` / `b <<= c`
struct Vertex<ast_set_assign> final : ASTExprBinary {
const FunctionData* fun_ref = nullptr; // filled at type inferring, points to `_+_` built-in for +=
std::string_view operator_name; // without equal sign, "+" for operator +=
TokenType tok; // tok_set_*
AnyExprV get_lhs() const { return lhs; }
AnyExprV get_rhs() const { return rhs; }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_fun_ref(const FunctionData* fun_ref);
Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs)
: ASTExprBinary(ast_set_assign, loc, lhs, rhs)
, operator_name(operator_name), tok(tok) {}
};
template<>
// ast_unary_operator is "some operator over one expression"
// examples: `-1` / `~found`
struct Vertex<ast_unary_operator> final : ASTExprUnary {
const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function
std::string_view operator_name;
TokenType tok;
AnyExprV get_rhs() const { return child; }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_fun_ref(const FunctionData* fun_ref);
Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV rhs)
: ASTExprUnary(ast_unary_operator, loc, rhs)
, operator_name(operator_name), tok(tok) {}
};
template<>
// ast_binary_operator is "some operator over two expressions"
// examples: `a + b` / `x & true` / `(a, b) << g()`
// note, that `a = b` is NOT a binary operator, it's ast_assign, also `a += b`, it's ast_set_assign
struct Vertex<ast_binary_operator> final : ASTExprBinary {
const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function
std::string_view operator_name;
TokenType tok;
AnyExprV get_lhs() const { return lhs; }
AnyExprV get_rhs() const { return rhs; }
bool is_set_assign() const {
TokenType t = tok;
return t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div ||
t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift ||
t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor;
}
bool is_assign() const {
return tok == tok_assign;
}
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_fun_ref(const FunctionData* fun_ref);
Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs)
: ASTExprBinary(ast_binary_operator, loc, lhs, rhs)
@ -528,24 +651,53 @@ struct Vertex<ast_binary_operator> final : ASTExprBinary {
};
template<>
// ast_ternary_operator is a traditional ternary construction
// example: `cond ? a : b`
struct Vertex<ast_ternary_operator> final : ASTExprVararg {
AnyExprV get_cond() const { return children.at(0); }
AnyExprV get_when_true() const { return children.at(1); }
AnyExprV get_when_false() const { return children.at(2); }
AnyExprV get_cond() const { return child(0); }
AnyExprV get_when_true() const { return child(1); }
AnyExprV get_when_false() const { return child(2); }
Vertex(SrcLocation loc, AnyExprV cond, AnyExprV when_true, AnyExprV when_false)
: ASTExprVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {}
};
template<>
struct Vertex<ast_return_statement> : ASTStatementUnary {
AnyExprV get_return_value() const { return child_as_expr(); }
// ast_cast_as_operator is explicit casting with "as" keyword
// examples: `arg as int` / `null as cell` / `t.tupleAt(2) as slice`
struct Vertex<ast_cast_as_operator> final : ASTExprUnary {
AnyExprV get_expr() const { return child; }
Vertex(SrcLocation loc, AnyExprV child)
: ASTStatementUnary(ast_return_statement, loc, child) {}
TypePtr cast_to_type;
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_resolved_type(TypePtr cast_to_type);
Vertex(SrcLocation loc, AnyExprV expr, TypePtr cast_to_type)
: ASTExprUnary(ast_cast_as_operator, loc, expr)
, cast_to_type(cast_to_type) {}
};
//
// ---------------------------------------------------------
// statements
//
template<>
// ast_empty_statement is very similar to "empty sequence" but has a special treatment
// example: `;` (just semicolon)
// example: body of `builtin` function is empty statement (not a zero sequence)
struct Vertex<ast_empty_statement> final : ASTStatementVararg {
explicit Vertex(SrcLocation loc)
: ASTStatementVararg(ast_empty_statement, loc, {}) {}
};
template<>
// ast_sequence is "some sequence of statements"
// example: function body is a sequence
// example: do while body is a sequence
struct Vertex<ast_sequence> final : ASTStatementVararg {
SrcLocation loc_end;
@ -558,26 +710,61 @@ struct Vertex<ast_sequence> final : ASTStatementVararg {
};
template<>
// ast_return_statement is "return something from a function"
// examples: `return a` / `return any_expr()()` / `return;`
// note, that for `return;` (without a value, meaning "void"), in AST, it's stored as empty expression
struct Vertex<ast_return_statement> : ASTStatementUnary {
AnyExprV get_return_value() const { return child_as_expr(); }
bool has_return_value() const { return child->type != ast_empty_expression; }
Vertex(SrcLocation loc, AnyExprV child)
: ASTStatementUnary(ast_return_statement, loc, child) {}
};
template<>
// ast_if_statement is a traditional if statement, probably followed by an else branch
// examples: `if (cond) { ... } else { ... }` / `if (cond) { ... }`
// when else branch is missing, it's stored as empty statement
// for "else if", it's just "if statement" inside a sequence of else branch
struct Vertex<ast_if_statement> final : ASTStatementVararg {
bool is_ifnot; // if(!cond), to generate more optimal fift code
AnyExprV get_cond() const { return child_as_expr(0); }
auto get_if_body() const { return children.at(1)->as<ast_sequence>(); }
auto get_else_body() const { return children.at(2)->as<ast_sequence>(); } // always exists (when else omitted, it's empty)
Vertex(SrcLocation loc, bool is_ifnot, AnyExprV cond, V<ast_sequence> if_body, V<ast_sequence> else_body)
: ASTStatementVararg(ast_if_statement, loc, {cond, if_body, else_body})
, is_ifnot(is_ifnot) {}
};
template<>
// ast_repeat_statement is "repeat something N times"
// example: `repeat (10) { ... }`
struct Vertex<ast_repeat_statement> final : ASTStatementVararg {
AnyExprV get_cond() const { return child_as_expr(0); }
auto get_body() const { return child(1)->as<ast_sequence>(); }
auto get_body() const { return children.at(1)->as<ast_sequence>(); }
Vertex(SrcLocation loc, AnyExprV cond, V<ast_sequence> body)
: ASTStatementVararg(ast_repeat_statement, loc, {cond, body}) {}
};
template<>
// ast_while_statement is a standard "while" loop
// example: `while (x > 0) { ... }`
struct Vertex<ast_while_statement> final : ASTStatementVararg {
AnyExprV get_cond() const { return child_as_expr(0); }
auto get_body() const { return child(1)->as<ast_sequence>(); }
auto get_body() const { return children.at(1)->as<ast_sequence>(); }
Vertex(SrcLocation loc, AnyExprV cond, V<ast_sequence> body)
: ASTStatementVararg(ast_while_statement, loc, {cond, body}) {}
};
template<>
// ast_do_while_statement is a standard "do while" loop
// example: `do { ... } while (x > 0);`
struct Vertex<ast_do_while_statement> final : ASTStatementVararg {
auto get_body() const { return child(0)->as<ast_sequence>(); }
auto get_body() const { return children.at(0)->as<ast_sequence>(); }
AnyExprV get_cond() const { return child_as_expr(1); }
Vertex(SrcLocation loc, V<ast_sequence> body, AnyExprV cond)
@ -585,16 +772,21 @@ struct Vertex<ast_do_while_statement> final : ASTStatementVararg {
};
template<>
// ast_throw_statement is throwing an exception, it accepts excNo and optional arg
// examples: `throw 10` / `throw (ERR_LOW_BALANCE)` / `throw (1001, incomingAddr)`
// when thrown arg is missing, it's stored as empty expression
struct Vertex<ast_throw_statement> final : ASTStatementVararg {
AnyExprV get_thrown_code() const { return child_as_expr(0); }
AnyExprV get_thrown_arg() const { return child_as_expr(1); } // may be ast_empty
bool has_thrown_arg() const { return child_as_expr(1)->type != ast_empty_expression; }
AnyExprV get_thrown_arg() const { return child_as_expr(1); }
Vertex(SrcLocation loc, AnyExprV thrown_code, AnyExprV thrown_arg)
: ASTStatementVararg(ast_throw_statement, loc, {thrown_code, thrown_arg}) {}
};
template<>
// ast_assert_statement is "assert that cond is true, otherwise throw an exception"
// examples: `assert (balance > 0, ERR_ZERO_BALANCE)` / `assert (balance > 0) throw (ERR_ZERO_BALANCE)`
struct Vertex<ast_assert_statement> final : ASTStatementVararg {
AnyExprV get_cond() const { return child_as_expr(0); }
AnyExprV get_thrown_code() const { return child_as_expr(1); }
@ -604,6 +796,10 @@ struct Vertex<ast_assert_statement> final : ASTStatementVararg {
};
template<>
// ast_try_catch_statement is a standard try catch (finally block doesn't exist)
// example: `try { ... } catch (excNo) { ... }`
// there are two formal "arguments" of catch: excNo and arg, but both can be omitted
// when omitted, they are stored as underscores, so len of a catch tensor is always 2
struct Vertex<ast_try_catch_statement> final : ASTStatementVararg {
auto get_try_body() const { return children.at(0)->as<ast_sequence>(); }
auto get_catch_expr() const { return children.at(1)->as<ast_tensor>(); } // (excNo, arg), always len 2
@ -614,29 +810,42 @@ struct Vertex<ast_try_catch_statement> final : ASTStatementVararg {
};
template<>
struct Vertex<ast_if_statement> final : ASTStatementVararg {
bool is_ifnot; // if(!cond), to generate more optimal fift code
// ast_asm_body is a body of `asm` function — a set of strings, and optionally stack order manipulations
// example: `fun skipMessageOp... asm "32 PUSHINT" "SDSKIPFIRST";`
// user can specify "arg order"; example: `fun store(self: builder, op: int) asm (op self)` then [1, 0]
// user can specify "ret order"; example: `fun modDiv... asm(-> 1 0) "DIVMOD";` then [1, 0]
struct Vertex<ast_asm_body> final : ASTStatementVararg {
std::vector<int> arg_order;
std::vector<int> ret_order;
AnyExprV get_cond() const { return child_as_expr(0); }
auto get_if_body() const { return child(1)->as<ast_sequence>(); }
auto get_else_body() const { return child(2)->as<ast_sequence>(); } // always exists (when else omitted, it's empty)
const std::vector<AnyV>& get_asm_commands() const { return children; } // ast_string_const[]
Vertex(SrcLocation loc, bool is_ifnot, AnyExprV cond, V<ast_sequence> if_body, V<ast_sequence> else_body)
: ASTStatementVararg(ast_if_statement, loc, {cond, if_body, else_body})
, is_ifnot(is_ifnot) {}
Vertex(SrcLocation loc, std::vector<int> arg_order, std::vector<int> ret_order, std::vector<AnyV> asm_commands)
: ASTStatementVararg(ast_asm_body, loc, std::move(asm_commands))
, arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {}
};
//
// ---------------------------------------------------------
// other
//
template<>
// ast_genericsT_item is generics T at declaration
// example: `fun f<T1, T2>` has a list of 2 generic Ts
struct Vertex<ast_genericsT_item> final : ASTOtherLeaf {
TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates
std::string_view nameT;
Vertex(SrcLocation loc, TypeExpr* created_type, std::string_view nameT)
Vertex(SrcLocation loc, std::string_view nameT)
: ASTOtherLeaf(ast_genericsT_item, loc)
, created_type(created_type), nameT(nameT) {}
, nameT(nameT) {}
};
template<>
// ast_genericsT_list is a container for generics T at declaration
// example: see above
struct Vertex<ast_genericsT_list> final : ASTOtherVararg {
std::vector<AnyV> get_items() const { return children; }
auto get_item(int i) const { return children.at(i)->as<ast_genericsT_item>(); }
@ -647,24 +856,55 @@ struct Vertex<ast_genericsT_list> final : ASTOtherVararg {
int lookup_idx(std::string_view nameT) const;
};
template<>
// ast_instantiationT_item is manual substitution of generic T used in code, mostly for func calls
// examples: `g<int>()` / `t.tupleFirst<slice>()` / `f<(int, slice), builder>()`
struct Vertex<ast_instantiationT_item> final : ASTOtherLeaf {
TypePtr substituted_type;
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_resolved_type(TypePtr substituted_type);
Vertex(SrcLocation loc, TypePtr substituted_type)
: ASTOtherLeaf(ast_instantiationT_item, loc)
, substituted_type(substituted_type) {}
};
template<>
// ast_instantiationT_list is a container for generic T substitutions used in code
// examples: see above
struct Vertex<ast_instantiationT_list> final : ASTOtherVararg {
std::vector<AnyV> get_items() const { return children; }
auto get_item(int i) const { return children.at(i)->as<ast_instantiationT_item>(); }
Vertex(SrcLocation loc, std::vector<AnyV> instantiationTs)
: ASTOtherVararg(ast_instantiationT_list, loc, std::move(instantiationTs)) {}
};
template<>
// ast_parameter is a parameter of a function in its declaration
// example: `fun f(a: int, mutate b: slice)` has 2 parameters
struct Vertex<ast_parameter> final : ASTOtherLeaf {
const LocalVarData* param_ref = nullptr; // filled after resolved
const LocalVarData* param_ref = nullptr; // filled on resolve identifiers
std::string_view param_name;
TypeExpr* declared_type;
TypePtr declared_type;
bool declared_as_mutate; // declared as `mutate param_name`
bool is_underscore() const { return param_name.empty(); }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_param_ref(const LocalVarData* param_ref);
void assign_resolved_type(TypePtr declared_type);
Vertex(SrcLocation loc, std::string_view param_name, TypeExpr* declared_type, bool declared_as_mutate)
Vertex(SrcLocation loc, std::string_view param_name, TypePtr declared_type, bool declared_as_mutate)
: ASTOtherLeaf(ast_parameter, loc)
, param_name(param_name), declared_type(declared_type), declared_as_mutate(declared_as_mutate) {}
};
template<>
// ast_parameter_list is a container of parameters
// example: see above
struct Vertex<ast_parameter_list> final : ASTOtherVararg {
const std::vector<AnyV>& get_params() const { return children; }
auto get_param(int i) const { return children.at(i)->as<ast_parameter>(); }
@ -678,22 +918,12 @@ struct Vertex<ast_parameter_list> final : ASTOtherVararg {
};
template<>
struct Vertex<ast_asm_body> final : ASTStatementVararg {
std::vector<int> arg_order;
std::vector<int> ret_order;
const std::vector<AnyV>& get_asm_commands() const { return children; } // ast_string_const[]
Vertex(SrcLocation loc, std::vector<int> arg_order, std::vector<int> ret_order, std::vector<AnyV> asm_commands)
: ASTStatementVararg(ast_asm_body, loc, std::move(asm_commands))
, arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {}
};
template<>
// ast_annotation is @annotation above a declaration
// example: `@pure fun ...`
struct Vertex<ast_annotation> final : ASTOtherVararg {
AnnotationKind kind;
auto get_arg() const { return child(0)->as<ast_tensor>(); }
auto get_arg() const { return children.at(0)->as<ast_tensor>(); }
static AnnotationKind parse_kind(std::string_view name);
@ -703,63 +933,79 @@ struct Vertex<ast_annotation> final : ASTOtherVararg {
};
template<>
struct Vertex<ast_local_var> final : ASTExprUnary {
const Symbol* var_maybe = nullptr; // typically local var; can be global var if `var g_v redef`; remains nullptr for underscore
TypeExpr* declared_type;
bool is_immutable; // declared via 'val', not 'var'
bool marked_as_redef; // var (existing_var redef, new_var: int) = ...
AnyExprV get_identifier() const { return child; } // ast_identifier / ast_underscore
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_var_ref(const Symbol* var_ref);
Vertex(SrcLocation loc, AnyExprV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef)
: ASTExprUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {}
};
template<>
struct Vertex<ast_local_vars_declaration> final : ASTStatementVararg {
AnyExprV get_lhs() const { return child_as_expr(0); } // ast_local_var / ast_tensor / ast_tensor_square
AnyExprV get_assigned_val() const { return child_as_expr(1); }
Vertex(SrcLocation loc, AnyExprV lhs, AnyExprV assigned_val)
: ASTStatementVararg(ast_local_vars_declaration, loc, {lhs, assigned_val}) {}
};
template<>
// ast_function_declaration is declaring a function/method
// methods are still global functions, just accepting "self" first parameter
// example: `fun f() { ... }`
// functions can be generic, `fun f<T>(params) { ... }`
// their body is either sequence (regular code function), or `asm`, or `builtin`
struct Vertex<ast_function_declaration> final : ASTOtherVararg {
auto get_identifier() const { return child(0)->as<ast_identifier>(); }
int get_num_params() const { return child(1)->as<ast_parameter_list>()->size(); }
auto get_param_list() const { return child(1)->as<ast_parameter_list>(); }
auto get_param(int i) const { return child(1)->as<ast_parameter_list>()->get_param(i); }
AnyV get_body() const { return child(2); } // ast_sequence / ast_asm_body
auto get_identifier() const { return children.at(0)->as<ast_identifier>(); }
int get_num_params() const { return children.at(1)->as<ast_parameter_list>()->size(); }
auto get_param_list() const { return children.at(1)->as<ast_parameter_list>(); }
auto get_param(int i) const { return children.at(1)->as<ast_parameter_list>()->get_param(i); }
AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body
const FunctionData* fun_ref = nullptr; // filled after register
TypeExpr* ret_type = nullptr;
V<ast_genericsT_list> genericsT_list = nullptr;
bool is_entrypoint = false;
bool marked_as_pure = false;
bool marked_as_builtin = false;
bool marked_as_get_method = false;
bool marked_as_inline = false;
bool marked_as_inline_ref = false;
bool accepts_self = false;
bool returns_self = false;
V<ast_int_const> method_id = nullptr;
TypePtr declared_return_type; // filled at ast parsing; if unspecified (nullptr), means "auto infer"
V<ast_genericsT_list> genericsT_list; // for non-generics it's nullptr
td::RefInt256 method_id; // specified via @method_id annotation
int flags; // from enum in FunctionData
bool is_asm_function() const { return children.at(2)->type == ast_asm_body; }
bool is_regular_function() const { return children.at(2)->type == ast_sequence; }
bool is_builtin_function() const { return marked_as_builtin; }
bool is_code_function() const { return children.at(2)->type == ast_sequence; }
bool is_builtin_function() const { return children.at(2)->type == ast_empty_statement; }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_fun_ref(const FunctionData* fun_ref);
void assign_resolved_type(TypePtr declared_return_type);
Vertex(SrcLocation loc, V<ast_identifier> name_identifier, V<ast_parameter_list> parameters, AnyV body)
: ASTOtherVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {}
Vertex(SrcLocation loc, V<ast_identifier> name_identifier, V<ast_parameter_list> parameters, AnyV body, TypePtr declared_return_type, V<ast_genericsT_list> genericsT_list, td::RefInt256 method_id, int flags)
: ASTOtherVararg(ast_function_declaration, loc, {name_identifier, parameters, body})
, declared_return_type(declared_return_type), genericsT_list(genericsT_list), method_id(std::move(method_id)), flags(flags) {}
};
template<>
// ast_global_var_declaration is declaring a global var, outside a function
// example: `global g: int;`
// note, that globals don't have default values, since there is no single "entrypoint" for a contract
struct Vertex<ast_global_var_declaration> final : ASTOtherVararg {
const GlobalVarData* var_ref = nullptr; // filled after register
TypePtr declared_type; // filled always, typing globals is mandatory
auto get_identifier() const { return children.at(0)->as<ast_identifier>(); }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_var_ref(const GlobalVarData* var_ref);
void assign_resolved_type(TypePtr declared_type);
Vertex(SrcLocation loc, V<ast_identifier> name_identifier, TypePtr declared_type)
: ASTOtherVararg(ast_global_var_declaration, loc, {name_identifier})
, declared_type(declared_type) {}
};
template<>
// ast_constant_declaration is declaring a global constant, outside a function
// example: `const op = 0x123;`
struct Vertex<ast_constant_declaration> final : ASTOtherVararg {
const GlobalConstData* const_ref = nullptr; // filled after register
TypePtr declared_type; // not null for `const op: int = ...`
auto get_identifier() const { return children.at(0)->as<ast_identifier>(); }
AnyExprV get_init_value() const { return child_as_expr(1); }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_const_ref(const GlobalConstData* const_ref);
void assign_resolved_type(TypePtr declared_type);
Vertex(SrcLocation loc, V<ast_identifier> name_identifier, TypePtr declared_type, AnyExprV init_value)
: ASTOtherVararg(ast_constant_declaration, loc, {name_identifier, init_value})
, declared_type(declared_type) {}
};
template<>
// ast_tolk_required_version is a preamble fixating compiler's version at the top of the file
// example: `tolk 0.6`
// when compiler version mismatches, it means, that another compiler was earlier for that sources, a warning is emitted
struct Vertex<ast_tolk_required_version> final : ASTOtherLeaf {
std::string_view semver;
@ -769,21 +1015,27 @@ struct Vertex<ast_tolk_required_version> final : ASTOtherLeaf {
};
template<>
struct Vertex<ast_import_statement> final : ASTOtherVararg {
const SrcFile* file = nullptr; // assigned after imports have been resolved
// ast_import_directive is an import at the top of the file
// examples: `import "another.tolk"` / `import "@stdlib/tvm-dicts"`
struct Vertex<ast_import_directive> final : ASTOtherVararg {
const SrcFile* file = nullptr; // assigned after imports have been resolved, just after parsing a file to ast
auto get_file_leaf() const { return child(0)->as<ast_string_const>(); }
auto get_file_leaf() const { return children.at(0)->as<ast_string_const>(); }
std::string get_file_name() const { return static_cast<std::string>(child(0)->as<ast_string_const>()->str_val); }
std::string get_file_name() const { return static_cast<std::string>(children.at(0)->as<ast_string_const>()->str_val); }
Vertex* mutate() const { return const_cast<Vertex*>(this); }
void assign_src_file(const SrcFile* file);
Vertex(SrcLocation loc, V<ast_string_const> file_name)
: ASTOtherVararg(ast_import_statement, loc, {file_name}) {}
: ASTOtherVararg(ast_import_directive, loc, {file_name}) {}
};
template<>
// ast_tolk_file represents a whole parsed input .tolk file
// with functions, constants, etc.
// particularly, it contains imports that lead to loading other files
// a whole program consists of multiple parsed files, each of them has a parsed ast tree (stdlib is also parsed)
struct Vertex<ast_tolk_file> final : ASTOtherVararg {
const SrcFile* const file;

View file

@ -16,6 +16,8 @@
*/
#include "tolk.h"
#include "compiler-state.h"
#include "type-system.h"
#include "generics-helpers.h"
namespace tolk {
using namespace std::literals::string_literals;
@ -23,46 +25,33 @@ using namespace std::literals::string_literals;
// given func_type = `(slice, int) -> slice` and func flags, create SymLocalVarOrParameter
// currently (see at the bottom) parameters of built-in functions are unnamed:
// built-in functions are created using a resulting type
static std::vector<LocalVarData> define_builtin_parameters(const TypeExpr* func_type, int func_flags) {
static std::vector<LocalVarData> define_builtin_parameters(const std::vector<TypePtr>& params_types, int func_flags) {
// `loadInt()`, `storeInt()`: they accept `self` and mutate it; no other options available in built-ins for now
bool is_mutate_self = func_flags & FunctionData::flagHasMutateParams;
// func_type a map (params_type -> ret_type), probably surrounded by forall (internal representation of <T>)
TypeExpr* params_type = func_type->constr == TypeExpr::te_ForAll ? func_type->args[0]->args[0] : func_type->args[0];
std::vector<LocalVarData> parameters;
parameters.reserve(params_types.size());
if (params_type->constr == TypeExpr::te_Tensor) { // multiple parameters: it's a tensor
parameters.reserve(params_type->args.size());
for (int i = 0; i < static_cast<int>(params_type->args.size()); ++i) {
LocalVarData p_sym("", {}, i, params_type->args[i]);
if (i == 0 && is_mutate_self) {
p_sym.flags |= LocalVarData::flagMutateParameter;
}
parameters.push_back(std::move(p_sym));
}
} else { // single parameter
LocalVarData p_sym("", {}, 0, params_type);
if (is_mutate_self) {
p_sym.flags |= LocalVarData::flagMutateParameter;
}
for (int i = 0; i < static_cast<int>(params_types.size()); ++i) {
LocalVarData p_sym("", {}, params_types[i], (i == 0 && is_mutate_self) * LocalVarData::flagMutateParameter, i);
parameters.push_back(std::move(p_sym));
}
return parameters;
}
static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) {
auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func));
static void define_builtin_func(const std::string& name, const std::vector<TypePtr>& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const simple_compile_func_t& func, int flags) {
auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(func), nullptr);
G.symtable.add_function(f_sym);
}
static void define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) {
auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(make_simple_compile(macro)));
static void define_builtin_func(const std::string& name, const std::vector<TypePtr>& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const AsmOp& macro, int flags) {
auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(make_simple_compile(macro)), nullptr);
G.symtable.add_function(f_sym);
}
static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags,
static void define_builtin_func(const std::string& name, const std::vector<TypePtr>& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const simple_compile_func_t& func, int flags,
std::initializer_list<int> arg_order, std::initializer_list<int> ret_order) {
auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func));
auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(func), nullptr);
f_sym->arg_order = arg_order;
f_sym->ret_order = ret_order;
G.symtable.add_function(f_sym);
@ -1080,128 +1069,169 @@ AsmOp compile_is_null(std::vector<VarDescr>& res, std::vector<VarDescr>& args, S
void define_builtins() {
using namespace std::placeholders;
TypeExpr* Unit = TypeExpr::new_unit();
TypeExpr* Int = TypeExpr::new_atomic(TypeExpr::_Int);
TypeExpr* Slice = TypeExpr::new_atomic(TypeExpr::_Slice);
TypeExpr* Builder = TypeExpr::new_atomic(TypeExpr::_Builder);
TypeExpr* Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple);
TypeExpr* Int2 = TypeExpr::new_tensor({Int, Int});
TypeExpr* Int3 = TypeExpr::new_tensor({Int, Int, Int});
TypeExpr* TupleInt = TypeExpr::new_tensor({Tuple, Int});
TypeExpr* SliceInt = TypeExpr::new_tensor({Slice, Int});
TypeExpr* X = TypeExpr::new_var(0);
TypeExpr* arith_bin_op = TypeExpr::new_map(Int2, Int);
TypeExpr* arith_un_op = TypeExpr::new_map(Int, Int);
TypeExpr* impure_un_op = TypeExpr::new_map(Int, Unit);
TypeExpr* fetch_int_op_mutate = TypeExpr::new_map(SliceInt, SliceInt);
TypeExpr* prefetch_int_op = TypeExpr::new_map(SliceInt, Int);
TypeExpr* store_int_mutate = TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), TypeExpr::new_tensor({Builder, Unit}));
TypeExpr* fetch_slice_op_mutate = TypeExpr::new_map(SliceInt, TypeExpr::new_tensor({Slice, Slice}));
TypeExpr* prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice);
TypeExpr* throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit));
TypePtr Unit = TypeDataVoid::create();
TypePtr Int = TypeDataInt::create();
TypePtr Slice = TypeDataSlice::create();
TypePtr Builder = TypeDataBuilder::create();
TypePtr Tuple = TypeDataTuple::create();
define_builtin_func("_+_", arith_bin_op, compile_add,
std::vector<GenericsDeclaration::GenericsItem> itemsT;
itemsT.emplace_back("T");
TypePtr typeT = TypeDataGenericT::create("T");
const GenericsDeclaration* declGenericT = new GenericsDeclaration(std::move(itemsT));
std::vector ParamsInt1 = {Int};
std::vector ParamsInt2 = {Int, Int};
std::vector ParamsInt3 = {Int, Int, Int};
std::vector ParamsSliceInt = {Slice, Int};
define_builtin_func("_+_", ParamsInt2, Int, nullptr,
compile_add,
FunctionData::flagMarkedAsPure);
define_builtin_func("_-_", arith_bin_op, compile_sub,
define_builtin_func("_-_", ParamsInt2, Int, nullptr,
compile_sub,
FunctionData::flagMarkedAsPure);
define_builtin_func("-_", arith_un_op, compile_unary_minus,
define_builtin_func("-_", ParamsInt1, Int, nullptr,
compile_unary_minus,
FunctionData::flagMarkedAsPure);
define_builtin_func("+_", arith_un_op, compile_unary_plus,
define_builtin_func("+_", ParamsInt1, Int, nullptr,
compile_unary_plus,
FunctionData::flagMarkedAsPure);
define_builtin_func("_*_", arith_bin_op, compile_mul,
define_builtin_func("_*_", ParamsInt2, Int, nullptr,
compile_mul,
FunctionData::flagMarkedAsPure);
define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1),
define_builtin_func("_/_", ParamsInt2, Int, nullptr,
std::bind(compile_div, _1, _2, _3, -1),
FunctionData::flagMarkedAsPure);
define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0),
define_builtin_func("_~/_", ParamsInt2, Int, nullptr,
std::bind(compile_div, _1, _2, _3, 0),
FunctionData::flagMarkedAsPure);
define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1),
define_builtin_func("_^/_", ParamsInt2, Int, nullptr,
std::bind(compile_div, _1, _2, _3, 1),
FunctionData::flagMarkedAsPure);
define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1),
define_builtin_func("_%_", ParamsInt2, Int, nullptr,
std::bind(compile_mod, _1, _2, _3, -1),
FunctionData::flagMarkedAsPure);
define_builtin_func("_<<_", arith_bin_op, compile_lshift,
define_builtin_func("_<<_", ParamsInt2, Int, nullptr,
compile_lshift,
FunctionData::flagMarkedAsPure);
define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1),
define_builtin_func("_>>_", ParamsInt2, Int, nullptr,
std::bind(compile_rshift, _1, _2, _3, -1),
FunctionData::flagMarkedAsPure);
define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0),
define_builtin_func("_~>>_", ParamsInt2, Int, nullptr,
std::bind(compile_rshift, _1, _2, _3, 0),
FunctionData::flagMarkedAsPure);
define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1),
define_builtin_func("_^>>_", ParamsInt2, Int, nullptr,
std::bind(compile_rshift, _1, _2, _3, 1),
FunctionData::flagMarkedAsPure);
define_builtin_func("!_", arith_un_op, compile_logical_not,
define_builtin_func("!_", ParamsInt1, Int, nullptr,
compile_logical_not,
FunctionData::flagMarkedAsPure);
define_builtin_func("~_", arith_un_op, compile_bitwise_not,
define_builtin_func("~_", ParamsInt1, Int, nullptr,
compile_bitwise_not,
FunctionData::flagMarkedAsPure);
define_builtin_func("_&_", arith_bin_op, compile_bitwise_and,
define_builtin_func("_&_", ParamsInt2, Int, nullptr,
compile_bitwise_and,
FunctionData::flagMarkedAsPure);
define_builtin_func("_|_", arith_bin_op, compile_bitwise_or,
define_builtin_func("_|_", ParamsInt2, Int, nullptr,
compile_bitwise_or,
FunctionData::flagMarkedAsPure);
define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor,
define_builtin_func("_^_", ParamsInt2, Int, nullptr,
compile_bitwise_xor,
FunctionData::flagMarkedAsPure);
define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2),
define_builtin_func("_==_", ParamsInt2, Int, nullptr,
std::bind(compile_cmp_int, _1, _2, 2),
FunctionData::flagMarkedAsPure);
define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5),
define_builtin_func("_!=_", ParamsInt2, Int, nullptr,
std::bind(compile_cmp_int, _1, _2, 5),
FunctionData::flagMarkedAsPure);
define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4),
define_builtin_func("_<_", ParamsInt2, Int, nullptr,
std::bind(compile_cmp_int, _1, _2, 4),
FunctionData::flagMarkedAsPure);
define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1),
define_builtin_func("_>_", ParamsInt2, Int, nullptr,
std::bind(compile_cmp_int, _1, _2, 1),
FunctionData::flagMarkedAsPure);
define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6),
define_builtin_func("_<=_", ParamsInt2, Int, nullptr,
std::bind(compile_cmp_int, _1, _2, 6),
FunctionData::flagMarkedAsPure);
define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3),
define_builtin_func("_>=_", ParamsInt2, Int, nullptr,
std::bind(compile_cmp_int, _1, _2, 3),
FunctionData::flagMarkedAsPure);
define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7),
define_builtin_func("_<=>_", ParamsInt2, Int, nullptr,
std::bind(compile_cmp_int, _1, _2, 7),
FunctionData::flagMarkedAsPure);
define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1),
define_builtin_func("mulDivFloor", ParamsInt3, Int, nullptr,
std::bind(compile_muldiv, _1, _2, _3, -1),
FunctionData::flagMarkedAsPure);
define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0),
define_builtin_func("mulDivRound", ParamsInt3, Int, nullptr,
std::bind(compile_muldiv, _1, _2, _3, 0),
FunctionData::flagMarkedAsPure);
define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1),
define_builtin_func("mulDivCeil", ParamsInt3, Int, nullptr,
std::bind(compile_muldiv, _1, _2, _3, 1),
FunctionData::flagMarkedAsPure);
define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2),
define_builtin_func("mulDivMod", ParamsInt3, TypeDataTensor::create({Int, Int}), nullptr,
AsmOp::Custom("MULDIVMOD", 3, 2),
FunctionData::flagMarkedAsPure);
define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true),
define_builtin_func("__true", {}, Int, nullptr, /* AsmOp::Const("TRUE") */
std::bind(compile_bool_const, _1, _2, true),
FunctionData::flagMarkedAsPure);
define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false),
define_builtin_func("__false", {}, Int, nullptr, /* AsmOp::Const("FALSE") */
std::bind(compile_bool_const, _1, _2, false),
FunctionData::flagMarkedAsPure);
define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL"),
define_builtin_func("__null", {}, typeT, declGenericT,
AsmOp::Const("PUSHNULL"),
FunctionData::flagMarkedAsPure);
define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null,
define_builtin_func("__isNull", {typeT}, Int, declGenericT,
compile_is_null,
FunctionData::flagMarkedAsPure);
define_builtin_func("__throw", impure_un_op, compile_throw,
define_builtin_func("__throw", ParamsInt1, Unit, nullptr,
compile_throw,
0);
define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg,
define_builtin_func("__throw_arg", {typeT, Int}, Unit, declGenericT,
compile_throw_arg,
0);
define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), compile_throw_if_unless,
define_builtin_func("__throw_if_unless", ParamsInt3, Unit, nullptr,
compile_throw_if_unless,
0);
define_builtin_func("loadInt", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, true),
define_builtin_func("loadInt", ParamsSliceInt, Int, nullptr,
std::bind(compile_fetch_int, _1, _2, true, true),
FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf,
{}, {1, 0});
define_builtin_func("loadUint", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, false),
define_builtin_func("loadUint", ParamsSliceInt, Int, nullptr,
std::bind(compile_fetch_int, _1, _2, true, false),
FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf,
{}, {1, 0});
define_builtin_func("loadBits", fetch_slice_op_mutate, std::bind(compile_fetch_slice, _1, _2, true),
define_builtin_func("loadBits", ParamsSliceInt, Slice, nullptr,
std::bind(compile_fetch_slice, _1, _2, true),
FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf,
{}, {1, 0});
define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true),
define_builtin_func("preloadInt", ParamsSliceInt, Int, nullptr,
std::bind(compile_fetch_int, _1, _2, false, true),
FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf);
define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false),
define_builtin_func("preloadUint", ParamsSliceInt, Int, nullptr,
std::bind(compile_fetch_int, _1, _2, false, false),
FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf);
define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false),
define_builtin_func("preloadBits", ParamsSliceInt, Slice, nullptr,
std::bind(compile_fetch_slice, _1, _2, false),
FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf);
define_builtin_func("storeInt", store_int_mutate, std::bind(compile_store_int, _1, _2, true),
define_builtin_func("storeInt", {Builder, Int, Int}, Unit, nullptr,
std::bind(compile_store_int, _1, _2, true),
FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf,
{1, 0, 2}, {});
define_builtin_func("storeUint", store_int_mutate, std::bind(compile_store_int, _1, _2, false),
define_builtin_func("storeUint", {Builder, Int, Int}, Unit, nullptr,
std::bind(compile_store_int, _1, _2, false),
FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf,
{1, 0, 2}, {});
define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at,
define_builtin_func("tupleAt", {Tuple, Int}, typeT, declGenericT,
compile_tuple_at,
FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf);
define_builtin_func("debugPrint", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)),
define_builtin_func("debugPrint", {typeT}, Unit, declGenericT,
AsmOp::Custom("s0 DUMP DROP", 1, 1),
0);
define_builtin_func("debugPrintString", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)),
define_builtin_func("debugPrintString", {typeT}, Unit, declGenericT,
AsmOp::Custom("STRDUMP DROP", 1, 1),
0);
define_builtin_func("debugDumpStack", TypeExpr::new_map(Unit, Unit),
define_builtin_func("debugDumpStack", {}, Unit, nullptr,
AsmOp::Custom("DUMPSTK", 0, 0),
0);
}

View file

@ -16,6 +16,7 @@
*/
#include "tolk.h"
#include "compiler-state.h"
#include "type-system.h"
namespace tolk {
@ -345,16 +346,16 @@ bool Op::generate_code_step(Stack& stack) {
if (f_sym->is_asm_function() || f_sym->is_builtin_function()) {
// TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly)
std::vector<VarDescr> args0, res;
TypeExpr* func_type = f_sym->full_type;
TypeExpr::remove_indirect(func_type);
tolk_assert(func_type->is_map());
auto wr = func_type->args.at(0)->get_width();
auto wl = func_type->args.at(1)->get_width();
tolk_assert(wl >= 0 && wr >= 0);
for (int i = 0; i < wl; i++) {
int w_arg = 0;
for (const LocalVarData& param : f_sym->parameters) {
w_arg += param.declared_type->calc_width_on_stack();
}
int w_ret = f_sym->inferred_return_type->calc_width_on_stack();
tolk_assert(w_ret >= 0 && w_arg >= 0);
for (int i = 0; i < w_ret; i++) {
res.emplace_back(0);
}
for (int i = 0; i < wr; i++) {
for (int i = 0; i < w_arg; i++) {
args0.emplace_back(0);
}
if (f_sym->is_asm_function()) {
@ -456,14 +457,12 @@ bool Op::generate_code_step(Stack& stack) {
right1.push_back(arg.idx);
}
}
} else if (arg_order) {
for (int i = 0; i < (int)right.size(); i++) {
right1.push_back(right.at(arg_order->at(i)));
}
} else {
tolk_assert(!arg_order);
right1 = right;
}
std::vector<bool> last;
last.reserve(right1.size());
for (var_idx_t x : right1) {
last.push_back(var_info[x] && var_info[x]->is_last());
}
@ -489,7 +488,7 @@ bool Op::generate_code_step(Stack& stack) {
};
if (cl == _CallInd) {
exec_callxargs((int)right.size() - 1, (int)left.size());
} else if (!f_sym->is_regular_function()) {
} else if (!f_sym->is_code_function()) {
std::vector<VarDescr> res;
res.reserve(left.size());
for (var_idx_t i : left) {
@ -503,7 +502,7 @@ bool Op::generate_code_step(Stack& stack) {
} else {
if (f_sym->is_inline() || f_sym->is_inline_ref()) {
stack.o << AsmOp::Custom(f_sym->name + " INLINECALLDICT", (int)right.size(), (int)left.size());
} else if (f_sym->is_regular_function() && std::get<FunctionBodyCode*>(f_sym->body)->code->require_callxargs) {
} else if (f_sym->is_code_function() && std::get<FunctionBodyCode*>(f_sym->body)->code->require_callxargs) {
stack.o << AsmOp::Custom(f_sym->name + (" PREPAREDICT"), 0, 2);
exec_callxargs((int)right.size() + 1, (int)left.size());
} else {

View file

@ -66,4 +66,8 @@ void CompilerSettings::parse_experimental_options_cmd_arg(const std::string& cmd
}
}
const std::vector<const FunctionData*>& get_all_not_builtin_functions() {
return G.all_functions;
}
} // namespace tolk

View file

@ -95,7 +95,7 @@ struct CompilerState {
GlobalSymbolTable symtable;
PersistentHeapAllocator persistent_mem;
std::vector<const FunctionData*> all_code_functions;
std::vector<const FunctionData*> all_functions; // all user-defined (not built-in) functions, with generic instantiations
std::vector<const FunctionData*> all_get_methods;
std::vector<const GlobalVarData*> all_global_vars;
std::vector<const GlobalConstData*> all_constants;

View file

@ -248,15 +248,19 @@ struct ConstantEvaluator {
return ConstantValue::from_int(std::move(intval));
}
static ConstantValue handle_identifier(V<ast_identifier> v) {
static ConstantValue handle_reference(V<ast_reference> v) {
// todo better handle "appears, directly or indirectly, in its own initializer"
const Symbol* sym = lookup_global_symbol(v->name);
std::string_view name = v->get_name();
const Symbol* sym = lookup_global_symbol(name);
if (!sym) {
v->error("undefined symbol `" + static_cast<std::string>(v->name) + "`");
v->error("undefined symbol `" + static_cast<std::string>(name) + "`");
}
const GlobalConstData* const_ref = sym->try_as<GlobalConstData>();
if (!const_ref) {
v->error("symbol `" + static_cast<std::string>(v->name) + "` is not a constant");
v->error("symbol `" + static_cast<std::string>(name) + "` is not a constant");
}
if (v->has_instantiationTs()) { // SOME_CONST<int>
v->error("constant is not a generic");
}
return {const_ref->value};
}
@ -274,8 +278,8 @@ struct ConstantEvaluator {
if (auto v_binop = v->try_as<ast_binary_operator>()) {
return handle_binary_operator(v_binop, visit(v_binop->get_lhs()), visit(v_binop->get_rhs()));
}
if (auto v_ident = v->try_as<ast_identifier>()) {
return handle_identifier(v_ident);
if (auto v_ref = v->try_as<ast_reference>()) {
return handle_reference(v_ref);
}
if (auto v_par = v->try_as<ast_parenthesized_expression>()) {
return visit(v_par->get_expr());

View file

@ -32,7 +32,8 @@ struct FunctionData;
struct GlobalVarData;
struct GlobalConstData;
struct TypeExpr;
class TypeData;
using TypePtr = const TypeData*;
struct SrcFile;

272
tolk/generics-helpers.cpp Normal file
View file

@ -0,0 +1,272 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "generics-helpers.h"
#include "tolk.h"
#include "ast.h"
#include "ast-replicator.h"
#include "type-system.h"
#include "compiler-state.h"
#include "pipeline.h"
namespace tolk {
// given orig = "(int, T)" and substitutions = [slice], return "(int, slice)"
static TypePtr replace_genericT_with_deduced(TypePtr orig, const GenericsDeclaration* genericTs, const std::vector<TypePtr>& substitutionTs) {
if (!orig || !orig->has_genericT_inside()) {
return orig;
}
tolk_assert(genericTs->size() == substitutionTs.size());
return orig->replace_children_custom([genericTs, substitutionTs](TypePtr child) {
if (const TypeDataGenericT* asT = child->try_as<TypeDataGenericT>()) {
int idx = genericTs->find_nameT(asT->nameT);
if (idx == -1) {
throw Fatal("can not replace generic " + asT->nameT);
}
return substitutionTs[idx];
}
return child;
});
}
// purpose: having `f<T>(value: T)` and call `f(5)`, deduce T = int
// generally, there may be many generic Ts for declaration, and many arguments
// for every argument, `consider_next_condition()` is called
// example: `f<T1, T2>(a: int, b: T1, c: (T1, T2))` and call `f(6, 7, (8, cs))`
// - `a` does not affect, it doesn't depend on generic Ts
// - next condition: param_type = `T1`, arg_type = `int`, deduce T1 = int
// - next condition: param_type = `(T1, T2)`, arg_type = `(int, slice)`, deduce T1 = int, T2 = slice
// for call `f(6, cs, (8, cs))` T1 will be both `slice` and `int`, fired an error
class GenericSubstitutionsDeduceForFunctionCall final {
const FunctionData* fun_ref;
std::vector<TypePtr> substitutions;
void provideDeducedT(const std::string& nameT, TypePtr deduced) {
if (deduced == TypeDataNullLiteral::create() || deduced->has_unknown_inside()) {
return; // just 'null' doesn't give sensible info
}
int idx = fun_ref->genericTs->find_nameT(nameT);
if (substitutions[idx] == nullptr) {
substitutions[idx] = deduced;
} else if (substitutions[idx] != deduced) {
throw std::runtime_error(nameT + " is both " + substitutions[idx]->as_human_readable() + " and " + deduced->as_human_readable());
}
}
public:
explicit GenericSubstitutionsDeduceForFunctionCall(const FunctionData* fun_ref)
: fun_ref(fun_ref) {
substitutions.resize(fun_ref->genericTs->size()); // filled with nullptr (nothing deduced)
}
void consider_next_condition(TypePtr param_type, TypePtr arg_type) {
if (const auto* asT = param_type->try_as<TypeDataGenericT>()) {
// `(arg: T)` called as `f([1, 2])` => T is [int, int]
provideDeducedT(asT->nameT, arg_type);
} else if (const auto* p_tensor = param_type->try_as<TypeDataTensor>()) {
// `arg: (int, T)` called as `f((5, cs))` => T is slice
if (const auto* a_tensor = arg_type->try_as<TypeDataTensor>(); a_tensor && a_tensor->size() == p_tensor->size()) {
for (int i = 0; i < a_tensor->size(); ++i) {
consider_next_condition(p_tensor->items[i], a_tensor->items[i]);
}
}
} else if (const auto* p_tuple = param_type->try_as<TypeDataTypedTuple>()) {
// `arg: [int, T]` called as `f([5, cs])` => T is slice
if (const auto* a_tuple = arg_type->try_as<TypeDataTypedTuple>(); a_tuple && a_tuple->size() == p_tuple->size()) {
for (int i = 0; i < a_tuple->size(); ++i) {
consider_next_condition(p_tuple->items[i], a_tuple->items[i]);
}
}
} else if (const auto* p_callable = param_type->try_as<TypeDataFunCallable>()) {
// `arg: fun(TArg) -> TResult` called as `f(calcTupleLen)` => TArg is tuple, TResult is int
if (const auto* a_callable = arg_type->try_as<TypeDataFunCallable>(); a_callable && a_callable->params_size() == p_callable->params_size()) {
for (int i = 0; i < a_callable->params_size(); ++i) {
consider_next_condition(p_callable->params_types[i], a_callable->params_types[i]);
}
consider_next_condition(p_callable->return_type, a_callable->return_type);
}
}
}
int get_first_not_deduced_idx() const {
for (int i = 0; i < static_cast<int>(substitutions.size()); ++i) {
if (substitutions[i] == nullptr) {
return i;
}
}
return -1;
}
std::vector<TypePtr> flush() {
return {std::move(substitutions)};
}
};
// clone the body of `f<T>` replacing T everywhere with a substitution
// before: `fun f<T>(v: T) { var cp: [T] = [v]; }`
// after: `fun f<int>(v: int) { var cp: [int] = [v]; }`
// an instantiated function becomes a deep copy, all AST nodes are copied, no previous pointers left
class GenericFunctionReplicator final : public ASTReplicatorFunction {
const GenericsDeclaration* genericTs;
const std::vector<TypePtr>& substitutionTs;
protected:
using ASTReplicatorFunction::clone;
TypePtr clone(TypePtr t) override {
return replace_genericT_with_deduced(t, genericTs, substitutionTs);
}
public:
GenericFunctionReplicator(const GenericsDeclaration* genericTs, const std::vector<TypePtr>& substitutionTs)
: genericTs(genericTs)
, substitutionTs(substitutionTs) {
}
V<ast_function_declaration> clone_function_body(V<ast_function_declaration> v_function) override {
return createV<ast_function_declaration>(
v_function->loc,
clone(v_function->get_identifier()),
clone(v_function->get_param_list()),
clone(v_function->get_body()),
clone(v_function->declared_return_type),
nullptr, // a newly-created function is not generic
v_function->method_id,
v_function->flags
);
}
};
std::string GenericsDeclaration::as_human_readable() const {
std::string result = "<";
for (const GenericsItem& item : itemsT) {
if (result.size() > 1) {
result += ",";
}
result += item.nameT;
}
result += ">";
return result;
}
int GenericsDeclaration::find_nameT(std::string_view nameT) const {
for (int i = 0; i < static_cast<int>(itemsT.size()); ++i) {
if (itemsT[i].nameT == nameT) {
return i;
}
}
return -1;
}
// after creating a deep copy of `f<T>` like `f<int>`, its new and fresh body needs the previous pipeline to run
// for example, all local vars need to be registered as symbols, etc.
static void run_pipeline_for_instantiated_function(const FunctionData* inst_fun_ref) {
// these pipes are exactly the same as in tolk.cpp — all preceding (and including) type inferring
pipeline_resolve_identifiers_and_assign_symbols(inst_fun_ref);
pipeline_calculate_rvalue_lvalue(inst_fun_ref);
pipeline_detect_unreachable_statements(inst_fun_ref);
pipeline_infer_types_and_calls_and_fields(inst_fun_ref);
}
std::string generate_instantiated_name(const std::string& orig_name, const std::vector<TypePtr>& substitutions) {
// an instantiated function name will be "{orig_name}<{T1,T2,...}>"
std::string name = orig_name;
name += "<";
for (TypePtr subs : substitutions) {
if (name.size() > orig_name.size() + 1) {
name += ",";
}
name += subs->as_human_readable();
}
name.erase(std::remove(name.begin(), name.end(), ' '), name.end());
name += ">";
return name;
}
td::Result<std::vector<TypePtr>> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector<TypePtr>&& arg_types, TypePtr return_hint) {
try {
GenericSubstitutionsDeduceForFunctionCall deducing(called_fun);
for (const LocalVarData& param : called_fun->parameters) {
if (param.declared_type->has_genericT_inside() && param.idx < static_cast<int>(arg_types.size())) {
deducing.consider_next_condition(param.declared_type, arg_types[param.idx]);
}
}
int idx = deducing.get_first_not_deduced_idx();
if (idx != -1 && return_hint && called_fun->declared_return_type->has_genericT_inside()) {
deducing.consider_next_condition(called_fun->declared_return_type, return_hint);
idx = deducing.get_first_not_deduced_idx();
}
if (idx != -1) {
return td::Status::Error(td::Slice{"can not deduce " + called_fun->genericTs->get_nameT(idx)});
}
return deducing.flush();
} catch (const std::runtime_error& ex) {
return td::Status::Error(td::Slice{ex.what()});
}
}
const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector<TypePtr>&& substitutionTs) {
tolk_assert(fun_ref->genericTs);
// if `f<int>` was earlier instantiated, return it
if (const auto* existing = lookup_global_symbol(inst_name)) {
const FunctionData* inst_ref = existing->try_as<FunctionData>();
tolk_assert(inst_ref);
return inst_ref;
}
std::vector<LocalVarData> parameters;
parameters.reserve(fun_ref->get_num_params());
for (const LocalVarData& orig_p : fun_ref->parameters) {
parameters.emplace_back(orig_p.name, orig_p.loc, replace_genericT_with_deduced(orig_p.declared_type, fun_ref->genericTs, substitutionTs), orig_p.flags, orig_p.idx);
}
TypePtr declared_return_type = replace_genericT_with_deduced(fun_ref->declared_return_type, fun_ref->genericTs, substitutionTs);
const GenericsInstantiation* instantiationTs = new GenericsInstantiation(loc, std::move(substitutionTs));
if (fun_ref->is_asm_function()) {
FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, new FunctionBodyAsm, fun_ref->ast_root);
inst_ref->arg_order = fun_ref->arg_order;
inst_ref->ret_order = fun_ref->ret_order;
G.symtable.add_function(inst_ref);
G.all_functions.push_back(inst_ref);
run_pipeline_for_instantiated_function(inst_ref);
return inst_ref;
}
if (fun_ref->is_builtin_function()) {
FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, fun_ref->body, fun_ref->ast_root);
inst_ref->arg_order = fun_ref->arg_order;
inst_ref->ret_order = fun_ref->ret_order;
G.symtable.add_function(inst_ref);
return inst_ref;
}
GenericFunctionReplicator replicator(fun_ref->genericTs, instantiationTs->substitutions);
V<ast_function_declaration> inst_root = replicator.clone_function_body(fun_ref->ast_root->as<ast_function_declaration>());
FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, new FunctionBodyCode, inst_root);
inst_ref->arg_order = fun_ref->arg_order;
inst_ref->ret_order = fun_ref->ret_order;
inst_root->mutate()->assign_fun_ref(inst_ref);
G.symtable.add_function(inst_ref);
G.all_functions.push_back(inst_ref);
run_pipeline_for_instantiated_function(inst_ref);
return inst_ref;
}
} // namespace tolk

64
tolk/generics-helpers.h Normal file
View file

@ -0,0 +1,64 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "src-file.h"
#include "fwd-declarations.h"
#include "td/utils/Status.h"
#include <vector>
namespace tolk {
// when a function is declared `f<T>`, this "<T>" is represented as this class
// (not at AST, but at symbol storage level)
struct GenericsDeclaration {
struct GenericsItem {
std::string_view nameT;
explicit GenericsItem(std::string_view nameT)
: nameT(nameT) {}
};
explicit GenericsDeclaration(std::vector<GenericsItem>&& itemsT)
: itemsT(std::move(itemsT)) {}
const std::vector<GenericsItem> itemsT;
std::string as_human_readable() const;
size_t size() const { return itemsT.size(); }
bool has_nameT(std::string_view nameT) const { return find_nameT(nameT) != -1; }
int find_nameT(std::string_view nameT) const;
std::string get_nameT(int idx) const { return static_cast<std::string>(itemsT[idx].nameT); }
};
// when a function call is `f<int>()`, this "<int>" is represented as this class
struct GenericsInstantiation {
const std::vector<TypePtr> substitutions; // <SomeStruct, int> for genericTs <T1, T2>
const SrcLocation loc; // first instantiation location
explicit GenericsInstantiation(SrcLocation loc, std::vector<TypePtr>&& substitutions)
: substitutions(std::move(substitutions))
, loc(loc) {
}
};
std::string generate_instantiated_name(const std::string& orig_name, const std::vector<TypePtr>& substitutions);
td::Result<std::vector<TypePtr>> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector<TypePtr>&& arg_types, TypePtr return_hint);
const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector<TypePtr>&& substitutionTs);
} // namespace tolk

View file

@ -328,6 +328,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase {
case 2:
if (str == "do") return tok_do;
if (str == "if") return tok_if;
if (str == "as") return tok_as;
break;
case 3:
if (str == "int") return tok_int;
@ -345,7 +346,6 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase {
if (str == "null") return tok_null;
if (str == "void") return tok_void;
if (str == "bool") return tok_bool;
if (str == "auto") return tok_auto;
if (str == "self") return tok_self;
if (str == "tolk") return tok_tolk;
if (str == "type") return tok_type;
@ -578,6 +578,16 @@ void Lexer::next_special(TokenType parse_next_as, const char* str_expected) {
cur_token = tokens_circularbuf[++cur_token_idx & 7];
}
Lexer::SavedPositionForLookahead Lexer::save_parsing_position() const {
return {p_next, cur_token_idx, cur_token};
}
void Lexer::restore_position(SavedPositionForLookahead saved) {
p_next = saved.p_next;
cur_token_idx = last_token_idx = saved.cur_token_idx;
cur_token = saved.cur_token;
}
void Lexer::error(const std::string& err_msg) const {
throw ParseError(cur_location(), err_msg);
}
@ -595,7 +605,7 @@ void lexer_init() {
// Hence, it's difficult to measure Lexer performance separately.
// This function can be called just to tick Lexer performance, it just scans all input files.
// There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful.
void lexer_measure_performance(const AllSrcFiles& files_to_just_parse) {
void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse) {
for (const SrcFile* file : files_to_just_parse) {
Lexer lex(file);
while (!lex.is_eof()) {

View file

@ -125,9 +125,9 @@ enum TokenType {
tok_builder,
tok_continuation,
tok_tuple,
tok_auto,
tok_void,
tok_arrow,
tok_as,
tok_tolk,
tok_semver,
@ -166,6 +166,12 @@ class Lexer {
public:
struct SavedPositionForLookahead {
const char* p_next = nullptr;
int cur_token_idx = 0;
Token cur_token;
};
explicit Lexer(const SrcFile* file);
Lexer(const Lexer&) = delete;
Lexer &operator=(const Lexer&) = delete;
@ -209,6 +215,9 @@ public:
void next();
void next_special(TokenType parse_next_as, const char* str_expected);
SavedPositionForLookahead save_parsing_position() const;
void restore_position(SavedPositionForLookahead saved);
void check(TokenType next_tok, const char* str_expected) const {
if (cur_token.type != next_tok) {
unexpected(str_expected); // unlikely path, not inlined
@ -230,6 +239,6 @@ public:
void lexer_init();
// todo #ifdef TOLK_PROFILING
void lexer_measure_performance(const AllSrcFiles& files_to_just_parse);
void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse);
} // namespace tolk

View file

@ -17,7 +17,8 @@
#include "tolk.h"
#include "src-file.h"
#include "ast.h"
#include "compiler-state.h"
#include "ast-visitor.h"
#include "type-system.h"
#include "common/refint.h"
#include "constant-evaluator.h"
@ -48,7 +49,7 @@ struct LValGlobs {
};
std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs = nullptr);
void process_statement(AnyV v, CodeBlob& code);
void process_any_statement(AnyV v, CodeBlob& code);
static std::vector<std::vector<var_idx_t>> pre_compile_tensor_inner(CodeBlob& code, const std::vector<AnyExprV>& args,
@ -128,25 +129,24 @@ static std::vector<var_idx_t> pre_compile_tensor(CodeBlob& code, const std::vect
static std::vector<var_idx_t> pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) {
// [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs"
if (lhs->type == ast_tensor_square && rhs->type == ast_tensor_square) {
std::vector<var_idx_t> right = pre_compile_tensor(code, rhs->as<ast_tensor_square>()->get_items());
if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) {
std::vector<var_idx_t> right = pre_compile_tensor(code, rhs->as<ast_typed_tuple>()->get_items());
LValGlobs globs;
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_tensor_square>()->get_items(), &globs);
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_typed_tuple>()->get_items(), &globs);
code.on_var_modification(left, loc);
code.emplace_back(loc, Op::_Let, std::move(left), right);
globs.gen_ops_set_globs(code, loc);
return right;
}
// [lhs] = rhs; it's un-tuple to N left vars
if (lhs->type == ast_tensor_square) {
if (lhs->type == ast_typed_tuple) {
std::vector<var_idx_t> right = pre_compile_expr(rhs, code);
TypeExpr* rhs_type = rhs->inferred_type;
TypeExpr::remove_indirect(rhs_type);
TypeExpr* unpacked_type = rhs_type->args.at(0); // rhs->inferred_type is tuple<tensor<...>>
std::vector<var_idx_t> rvect = {code.create_tmp_var(unpacked_type, rhs->loc)};
const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as<TypeDataTypedTuple>();
std::vector<TypePtr> types_list = inferred_tuple->items;
std::vector<var_idx_t> rvect = {code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc)};
code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right));
LValGlobs globs;
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_tensor_square>()->get_items(), &globs);
std::vector<var_idx_t> left = pre_compile_tensor(code, lhs->as<ast_typed_tuple>()->get_items(), &globs);
code.on_var_modification(left, loc);
code.emplace_back(loc, Op::_Let, std::move(left), rvect);
globs.gen_ops_set_globs(code, loc);
@ -162,7 +162,7 @@ static std::vector<var_idx_t> pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE
return right;
}
static std::vector<var_idx_t> gen_op_call(CodeBlob& code, TypeExpr* ret_type, SrcLocation here,
static std::vector<var_idx_t> gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation here,
std::vector<var_idx_t>&& args_vars, const FunctionData* fun_ref) {
std::vector<var_idx_t> rvect = {code.create_tmp_var(ret_type, here)};
Op& op = code.emplace_back(here, Op::_Call, rvect, std::move(args_vars), fun_ref);
@ -173,38 +173,75 @@ static std::vector<var_idx_t> gen_op_call(CodeBlob& code, TypeExpr* ret_type, Sr
}
static std::vector<var_idx_t> process_binary_operator(V<ast_binary_operator> v, CodeBlob& code) {
TokenType t = v->tok;
std::string operator_name = static_cast<std::string>(v->operator_name);
if (v->is_set_assign()) {
std::string_view calc_operator = std::string_view{operator_name}.substr(0, operator_name.size() - 1);
auto v_apply = createV<ast_binary_operator>(v->loc, calc_operator, static_cast<TokenType>(t - 1), v->get_lhs(), v->get_rhs());
v_apply->assign_inferred_type(v->inferred_type);
return pre_compile_let(code, v->get_lhs(), v_apply, v->loc);
static std::vector<var_idx_t> process_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValGlobs* lval_globs) {
if (const auto* glob_ref = sym->try_as<GlobalVarData>()) {
std::vector<var_idx_t> rvect = {code.create_tmp_var(glob_ref->declared_type, loc)};
if (lval_globs) {
lval_globs->add_modified_glob(glob_ref, rvect[0]);
return rvect;
} else {
code.emplace_back(loc, Op::_GlobVar, rvect, std::vector<var_idx_t>{}, glob_ref);
return rvect;
}
if (v->is_assign()) {
}
if (const auto* const_ref = sym->try_as<GlobalConstData>()) {
if (const_ref->is_int_const()) {
std::vector<var_idx_t> rvect = {code.create_tmp_var(TypeDataInt::create(), loc)};
code.emplace_back(loc, Op::_IntConst, rvect, const_ref->as_int_const());
return rvect;
} else {
std::vector<var_idx_t> rvect = {code.create_tmp_var(TypeDataSlice::create(), loc)};
code.emplace_back(loc, Op::_SliceConst, rvect, const_ref->as_slice_const());
return rvect;
}
}
if (const auto* fun_ref = sym->try_as<FunctionData>()) {
std::vector<var_idx_t> rvect = {code.create_tmp_var(fun_ref->inferred_full_type, loc)};
code.emplace_back(loc, Op::_GlobVar, rvect, std::vector<var_idx_t>{}, fun_ref);
return rvect;
}
if (const auto* var_ref = sym->try_as<LocalVarData>()) {
return {var_ref->idx};
}
throw Fatal("process_symbol");
}
static std::vector<var_idx_t> process_assign(V<ast_assign> v, CodeBlob& code) {
if (auto lhs_decl = v->get_lhs()->try_as<ast_local_vars_declaration>()) {
return pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc);
} else {
return pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc);
}
if (t == tok_minus || t == tok_plus ||
t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor ||
t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship ||
t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR ||
t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) {
const FunctionData* fun_ref = lookup_global_symbol("_" + operator_name + "_")->as<FunctionData>();
}
static std::vector<var_idx_t> process_set_assign(V<ast_set_assign> v, CodeBlob& code) {
// for "a += b", emulate "a = a + b"
// seems not beautiful, but it works; probably, this transformation should be done at AST level in advance
std::string_view calc_operator = v->operator_name; // "+" for operator +=
auto v_apply = createV<ast_binary_operator>(v->loc, calc_operator, static_cast<TokenType>(v->tok - 1), v->get_lhs(), v->get_rhs());
v_apply->assign_inferred_type(v->inferred_type);
v_apply->assign_fun_ref(v->fun_ref);
return pre_compile_let(code, v->get_lhs(), v_apply, v->loc);
}
static std::vector<var_idx_t> process_binary_operator(V<ast_binary_operator> v, CodeBlob& code) {
TokenType t = v->tok;
if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()});
return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref);
return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref);
}
if (t == tok_logical_and || t == tok_logical_or) {
// do the following transformations:
// a && b -> a ? (b != 0) : 0
// a || b -> a ? 1 : (b != 0)
AnyExprV v_0 = createV<ast_int_const>(v->loc, td::make_refint(0), "0");
v_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
v_0->mutate()->assign_inferred_type(TypeDataInt::create());
AnyExprV v_1 = createV<ast_int_const>(v->loc, td::make_refint(-1), "-1");
v_1->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
AnyExprV v_b_ne_0 = createV<ast_binary_operator>(v->loc, "!=", tok_neq, v->get_rhs(), v_0);
v_b_ne_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
v_1->mutate()->assign_inferred_type(TypeDataInt::create());
auto v_b_ne_0 = createV<ast_binary_operator>(v->loc, "!=", tok_neq, v->get_rhs(), v_0);
v_b_ne_0->mutate()->assign_inferred_type(TypeDataInt::create());
v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as<FunctionData>());
std::vector<var_idx_t> cond = pre_compile_expr(v->get_lhs(), code);
tolk_assert(cond.size() == 1);
std::vector<var_idx_t> rvect = {code.create_tmp_var(v->inferred_type, v->loc)};
@ -222,9 +259,8 @@ static std::vector<var_idx_t> process_binary_operator(V<ast_binary_operator> v,
}
static std::vector<var_idx_t> process_unary_operator(V<ast_unary_operator> v, CodeBlob& code) {
const FunctionData* fun_ref = lookup_global_symbol(static_cast<std::string>(v->operator_name) + "_")->as<FunctionData>();
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, {v->get_rhs()});
return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref);
return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref);
}
static std::vector<var_idx_t> process_ternary_operator(V<ast_ternary_operator> v, CodeBlob& code) {
@ -241,8 +277,17 @@ static std::vector<var_idx_t> process_ternary_operator(V<ast_ternary_operator> v
return rvect;
}
static std::vector<var_idx_t> process_dot_access(V<ast_dot_access> v, CodeBlob& code, LValGlobs* lval_globs) {
// it's NOT a method call `t.tupleSize()` (since such cases are handled by process_function_call)
// it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call)
// currently, nothing except a global function can be a target of dot access
const FunctionData* fun_ref = v->target;
tolk_assert(fun_ref);
return process_symbol(v->loc, fun_ref, code, lval_globs);
}
static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, CodeBlob& code) {
// most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)`
// v is `globalF(args)` / `globalF<int>(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)`
const FunctionData* fun_ref = v->fun_maybe;
if (!fun_ref) {
std::vector<AnyExprV> args;
@ -251,7 +296,7 @@ static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, Code
args.push_back(v->get_arg(i)->get_expr());
}
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, args);
std::vector<var_idx_t> tfunc = pre_compile_expr(v->get_called_f(), code);
std::vector<var_idx_t> tfunc = pre_compile_expr(v->get_callee(), code);
tolk_assert(tfunc.size() == 1);
args_vars.push_back(tfunc[0]);
std::vector<var_idx_t> rvect = {code.create_tmp_var(v->inferred_type, v->loc)};
@ -260,95 +305,54 @@ static std::vector<var_idx_t> process_function_call(V<ast_function_call> v, Code
return rvect;
}
int delta_self = v->is_dot_call();
AnyExprV obj_leftmost = nullptr;
std::vector<AnyExprV> args;
args.reserve(v->get_num_args());
for (int i = 0; i < v->get_num_args(); ++i) {
args.push_back(v->get_arg(i)->get_expr());
}
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, args);
TypeExpr* op_call_type = v->inferred_type;
if (fun_ref->has_mutate_params()) {
std::vector<TypeExpr*> types_list;
for (int i = 0; i < v->get_num_args(); ++i) {
if (fun_ref->parameters[i].is_mutate_parameter()) {
types_list.push_back(args[i]->inferred_type);
args.reserve(delta_self + v->get_num_args());
if (delta_self) {
args.push_back(v->get_dot_obj());
obj_leftmost = v->get_dot_obj();
while (obj_leftmost->type == ast_function_call && obj_leftmost->as<ast_function_call>()->is_dot_call() && obj_leftmost->as<ast_function_call>()->fun_maybe && obj_leftmost->as<ast_function_call>()->fun_maybe->does_return_self()) {
obj_leftmost = obj_leftmost->as<ast_function_call>()->get_dot_obj();
}
}
types_list.push_back(v->inferred_type);
op_call_type = TypeExpr::new_tensor(std::move(types_list));
}
std::vector<var_idx_t> rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref);
if (fun_ref->has_mutate_params()) {
LValGlobs local_globs;
std::vector<var_idx_t> left;
for (int i = 0; i < v->get_num_args(); ++i) {
if (fun_ref->parameters[i].is_mutate_parameter()) {
AnyExprV arg_i = v->get_arg(i)->get_expr();
tolk_assert(arg_i->is_lvalue);
std::vector<var_idx_t> ith_var_idx = pre_compile_expr(arg_i, code, &local_globs);
left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end());
}
}
std::vector<var_idx_t> rvect = {code.create_tmp_var(v->inferred_type, v->loc)};
left.push_back(rvect[0]);
code.on_var_modification(left, v->loc);
code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply);
local_globs.gen_ops_set_globs(code, v->loc);
return rvect;
}
return rvect_apply;
}
static std::vector<var_idx_t> process_dot_method_call(V<ast_dot_method_call> v, CodeBlob& code) {
std::vector<AnyExprV> args;
args.reserve(1 + v->get_num_args());
args.push_back(v->get_obj());
for (int i = 0; i < v->get_num_args(); ++i) {
args.push_back(v->get_arg(i)->get_expr());
}
std::vector<std::vector<var_idx_t>> vars_per_arg = pre_compile_tensor_inner(code, args, nullptr);
TypeExpr* op_call_type = v->inferred_type;
TypeExpr* real_ret_type = v->inferred_type;
if (v->fun_ref->does_return_self()) {
real_ret_type = TypeExpr::new_unit();
if (!v->fun_ref->parameters[0].is_mutate_parameter()) {
op_call_type = TypeExpr::new_unit();
TypePtr op_call_type = v->inferred_type;
TypePtr real_ret_type = v->inferred_type;
if (delta_self && fun_ref->does_return_self()) {
real_ret_type = TypeDataVoid::create();
if (!fun_ref->parameters[0].is_mutate_parameter()) {
op_call_type = TypeDataVoid::create();
}
}
if (v->fun_ref->has_mutate_params()) {
std::vector<TypeExpr*> types_list;
for (int i = 0; i < 1 + v->get_num_args(); ++i) {
if (v->fun_ref->parameters[i].is_mutate_parameter()) {
if (fun_ref->has_mutate_params()) {
std::vector<TypePtr> types_list;
for (int i = 0; i < delta_self + v->get_num_args(); ++i) {
if (fun_ref->parameters[i].is_mutate_parameter()) {
types_list.push_back(args[i]->inferred_type);
}
}
types_list.push_back(real_ret_type);
op_call_type = TypeExpr::new_tensor(std::move(types_list));
op_call_type = TypeDataTensor::create(std::move(types_list));
}
std::vector<var_idx_t> args_vars;
for (const std::vector<var_idx_t>& list : vars_per_arg) {
args_vars.insert(args_vars.end(), list.cbegin(), list.cend());
}
std::vector<var_idx_t> rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), v->fun_ref);
std::vector<var_idx_t> rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref);
AnyExprV obj_leftmost = args[0];
while (obj_leftmost->type == ast_dot_method_call && obj_leftmost->as<ast_dot_method_call>()->fun_ref->does_return_self()) {
obj_leftmost = obj_leftmost->as<ast_dot_method_call>()->get_obj();
}
if (v->fun_ref->has_mutate_params()) {
if (fun_ref->has_mutate_params()) {
LValGlobs local_globs;
std::vector<var_idx_t> left;
for (int i = 0; i < 1 + v->get_num_args(); ++i) {
if (v->fun_ref->parameters[i].is_mutate_parameter()) {
AnyExprV arg_i = i == 0 ? obj_leftmost : args[i];
tolk_assert (arg_i->is_lvalue || i == 0);
for (int i = 0; i < delta_self + v->get_num_args(); ++i) {
if (fun_ref->parameters[i].is_mutate_parameter()) {
AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[i];
tolk_assert(arg_i->is_lvalue || i == 0);
if (arg_i->is_lvalue) {
std::vector<var_idx_t> ith_var_idx = pre_compile_expr(arg_i, code, &local_globs);
left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end());
@ -365,7 +369,7 @@ static std::vector<var_idx_t> process_dot_method_call(V<ast_dot_method_call> v,
rvect_apply = rvect;
}
if (v->fun_ref->does_return_self()) {
if (obj_leftmost && fun_ref->does_return_self()) {
if (obj_leftmost->is_lvalue) { // to handle if obj is global var, potentially re-assigned inside a chain
rvect_apply = pre_compile_expr(obj_leftmost, code);
} else { // temporary object, not lvalue, pre_compile_expr
@ -380,7 +384,7 @@ static std::vector<var_idx_t> process_tensor(V<ast_tensor> v, CodeBlob& code, LV
return pre_compile_tensor(code, v->get_items(), lval_globs);
}
static std::vector<var_idx_t> process_tensor_square(V<ast_tensor_square> v, CodeBlob& code, LValGlobs* lval_globs) {
static std::vector<var_idx_t> process_typed_tuple(V<ast_typed_tuple> v, CodeBlob& code, LValGlobs* lval_globs) {
if (lval_globs) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work
v->error("[...] can not be used as lvalue here");
}
@ -417,82 +421,53 @@ static std::vector<var_idx_t> process_null_keyword(V<ast_null_keyword> v, CodeBl
return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym);
}
static std::vector<var_idx_t> process_self_keyword(V<ast_self_keyword> v, CodeBlob& code) {
tolk_assert(code.fun_ref->does_accept_self() && v->param_ref);
tolk_assert(v->param_ref->idx == 0);
return {0};
}
static std::vector<var_idx_t> process_identifier(V<ast_identifier> v, CodeBlob& code, LValGlobs* lval_globs) {
const Symbol* sym = v->sym;
if (const auto* glob_ref = sym->try_as<GlobalVarData>()) {
std::vector<var_idx_t> rvect = {code.create_tmp_var(v->inferred_type, v->loc)};
if (lval_globs) {
lval_globs->add_modified_glob(glob_ref, rvect[0]);
return rvect;
} else {
code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector<var_idx_t>{}, glob_ref);
return rvect;
}
}
if (const auto* const_ref = sym->try_as<GlobalConstData>()) {
std::vector<var_idx_t> rvect = {code.create_tmp_var(v->inferred_type, v->loc)};
if (const_ref->is_int_const()) {
code.emplace_back(v->loc, Op::_IntConst, rvect, const_ref->as_int_const());
} else {
code.emplace_back(v->loc, Op::_SliceConst, rvect, const_ref->as_slice_const());
}
return rvect;
}
if (const auto* fun_ref = sym->try_as<FunctionData>()) {
std::vector<var_idx_t> rvect = {code.create_tmp_var(v->inferred_type, v->loc)};
code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector<var_idx_t>{}, fun_ref);
return rvect;
}
if (const auto* var_ref = sym->try_as<LocalVarData>()) {
#ifdef TOLK_DEBUG
tolk_assert(var_ref->idx != -1);
#endif
return {var_ref->idx};
}
throw UnexpectedASTNodeType(v, "process_identifier");
}
static std::vector<var_idx_t> process_local_var(V<ast_local_var> v, CodeBlob& code, LValGlobs* lval_globs) {
static std::vector<var_idx_t> process_local_var(V<ast_local_var_lhs> v, CodeBlob& code) {
if (v->marked_as_redef) {
return process_identifier(v->get_identifier()->as<ast_identifier>(), code, lval_globs);
return process_symbol(v->loc, v->var_ref, code, nullptr);
}
if (v->get_identifier()->try_as<ast_identifier>()) {
const LocalVarData* var_ref = v->var_maybe->as<LocalVarData>();
tolk_assert(var_ref->idx == -1);
var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, var_ref, v->loc));
return {var_ref->idx};
}
return {code.create_tmp_var(v->inferred_type, v->loc)}; // underscore
tolk_assert(v->var_ref->idx == -1);
v->var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, v->var_ref, v->loc));
return {v->var_ref->idx};
}
static std::vector<var_idx_t> process_local_vars_declaration(V<ast_local_vars_declaration>, CodeBlob&) {
// it can not appear as a standalone expression
// `var ... = rhs` is handled by ast_assign
tolk_assert(false);
}
static std::vector<var_idx_t> process_underscore(V<ast_underscore> v, CodeBlob& code) {
// when _ is used as left side of assignment, like `(cs, _) = cs.loadAndReturn()`
return {code.create_tmp_var(v->inferred_type, v->loc)};
}
std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs) {
switch (v->type) {
case ast_reference:
return process_symbol(v->loc, v->as<ast_reference>()->sym, code, lval_globs);
case ast_assign:
return process_assign(v->as<ast_assign>(), code);
case ast_set_assign:
return process_set_assign(v->as<ast_set_assign>(), code);
case ast_binary_operator:
return process_binary_operator(v->as<ast_binary_operator>(), code);
case ast_unary_operator:
return process_unary_operator(v->as<ast_unary_operator>(), code);
case ast_ternary_operator:
return process_ternary_operator(v->as<ast_ternary_operator>(), code);
case ast_cast_as_operator:
return pre_compile_expr(v->as<ast_cast_as_operator>()->get_expr(), code, lval_globs);
case ast_dot_access:
return process_dot_access(v->as<ast_dot_access>(), code, lval_globs);
case ast_function_call:
return process_function_call(v->as<ast_function_call>(), code);
case ast_dot_method_call:
return process_dot_method_call(v->as<ast_dot_method_call>(), code);
case ast_parenthesized_expression:
return pre_compile_expr(v->as<ast_parenthesized_expression>()->get_expr(), code, lval_globs);
case ast_tensor:
return process_tensor(v->as<ast_tensor>(), code, lval_globs);
case ast_tensor_square:
return process_tensor_square(v->as<ast_tensor_square>(), code, lval_globs);
case ast_typed_tuple:
return process_typed_tuple(v->as<ast_typed_tuple>(), code, lval_globs);
case ast_int_const:
return process_int_const(v->as<ast_int_const>(), code);
case ast_string_const:
@ -501,12 +476,10 @@ std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* l
return process_bool_const(v->as<ast_bool_const>(), code);
case ast_null_keyword:
return process_null_keyword(v->as<ast_null_keyword>(), code);
case ast_self_keyword:
return process_self_keyword(v->as<ast_self_keyword>(), code);
case ast_identifier:
return process_identifier(v->as<ast_identifier>(), code, lval_globs);
case ast_local_var:
return process_local_var(v->as<ast_local_var>(), code, lval_globs);
case ast_local_var_lhs:
return process_local_var(v->as<ast_local_var_lhs>(), code);
case ast_local_vars_declaration:
return process_local_vars_declaration(v->as<ast_local_vars_declaration>(), code);
case ast_underscore:
return process_underscore(v->as<ast_underscore>(), code);
default:
@ -515,39 +488,34 @@ std::vector<var_idx_t> pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* l
}
static void process_local_vars_declaration(V<ast_local_vars_declaration> v, CodeBlob& code) {
pre_compile_let(code, v->get_lhs(), v->get_assigned_val(), v->loc);
}
static void process_sequence(V<ast_sequence> v, CodeBlob& code) {
for (AnyV item : v->get_items()) {
process_statement(item, code);
process_any_statement(item, code);
}
}
static void process_assert_statement(V<ast_assert_statement> v, CodeBlob& code) {
std::vector<AnyExprV> args(3);
if (auto v_not = v->get_cond()->try_as<ast_unary_operator>(); v_not && v_not->tok == tok_logical_not) {
args[0] = v->get_thrown_code();
args[1] = v->get_cond()->as<ast_unary_operator>()->get_rhs();
args[2] = createV<ast_bool_const>(v->loc, true);
args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
args[2]->mutate()->assign_inferred_type(TypeDataInt::create());
} else {
args[0] = v->get_thrown_code();
args[1] = v->get_cond();
args[2] = createV<ast_bool_const>(v->loc, false);
args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
args[2]->mutate()->assign_inferred_type(TypeDataInt::create());
}
const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as<FunctionData>();
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, args);
gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym);
gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym);
}
static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) {
if (auto v_ident = v_catch_var->try_as<ast_identifier>()) {
const LocalVarData* var_ref = v_ident->sym->as<LocalVarData>();
if (auto v_ref = v_catch_var->try_as<ast_reference>(); v_ref && v_ref->sym) { // not underscore
const LocalVarData* var_ref = v_ref->sym->as<LocalVarData>();
tolk_assert(var_ref->idx == -1);
var_ref->mutate()->assign_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc));
}
@ -557,7 +525,7 @@ static void process_try_catch_statement(V<ast_try_catch_statement> v, CodeBlob&
code.require_callxargs = true;
Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch);
code.push_set_cur(try_catch_op.block0);
process_statement(v->get_try_body(), code);
process_any_statement(v->get_try_body(), code);
code.close_pop_cur(v->get_try_body()->loc_end);
code.push_set_cur(try_catch_op.block1);
@ -567,7 +535,7 @@ static void process_try_catch_statement(V<ast_try_catch_statement> v, CodeBlob&
process_catch_variable(catch_vars[0], code);
process_catch_variable(catch_vars[1], code);
try_catch_op.left = pre_compile_tensor(code, {catch_vars[1], catch_vars[0]});
process_statement(v->get_catch_body(), code);
process_any_statement(v->get_catch_body(), code);
code.close_pop_cur(v->get_catch_body()->loc_end);
}
@ -575,7 +543,7 @@ static void process_repeat_statement(V<ast_repeat_statement> v, CodeBlob& code)
std::vector<var_idx_t> tmp_vars = pre_compile_expr(v->get_cond(), code);
Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars);
code.push_set_cur(repeat_op.block0);
process_statement(v->get_body(), code);
process_any_statement(v->get_body(), code);
code.close_pop_cur(v->get_body()->loc_end);
}
@ -583,10 +551,10 @@ static void process_if_statement(V<ast_if_statement> v, CodeBlob& code) {
std::vector<var_idx_t> tmp_vars = pre_compile_expr(v->get_cond(), code);
Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars));
code.push_set_cur(if_op.block0);
process_statement(v->get_if_body(), code);
process_any_statement(v->get_if_body(), code);
code.close_pop_cur(v->get_if_body()->loc_end);
code.push_set_cur(if_op.block1);
process_statement(v->get_else_body(), code);
process_any_statement(v->get_else_body(), code);
code.close_pop_cur(v->get_else_body()->loc_end);
if (v->is_ifnot) {
std::swap(if_op.block0, if_op.block1);
@ -596,7 +564,7 @@ static void process_if_statement(V<ast_if_statement> v, CodeBlob& code) {
static void process_do_while_statement(V<ast_do_while_statement> v, CodeBlob& code) {
Op& until_op = code.emplace_back(v->loc, Op::_Until);
code.push_set_cur(until_op.block0);
process_statement(v->get_body(), code);
process_any_statement(v->get_body(), code);
// in TVM, there is only "do until", but in Tolk, we want "do while"
// here we negate condition to pass it forward to legacy to Op::_Until
@ -621,7 +589,12 @@ static void process_do_while_statement(V<ast_do_while_statement> v, CodeBlob& co
} else {
until_cond = createV<ast_unary_operator>(cond->loc, "!", tok_logical_not, cond);
}
until_cond->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
until_cond->mutate()->assign_inferred_type(TypeDataInt::create());
if (auto v_bin = until_cond->try_as<ast_binary_operator>(); v_bin && !v_bin->fun_ref) {
v_bin->mutate()->assign_fun_ref(lookup_global_symbol("_" + static_cast<std::string>(v_bin->operator_name) + "_")->as<FunctionData>());
} else if (auto v_un = until_cond->try_as<ast_unary_operator>(); v_un && !v_un->fun_ref) {
v_un->mutate()->assign_fun_ref(lookup_global_symbol(static_cast<std::string>(v_un->operator_name) + "_")->as<FunctionData>());
}
until_op.left = pre_compile_expr(until_cond, code);
code.close_pop_cur(v->get_body()->loc_end);
@ -633,7 +606,7 @@ static void process_while_statement(V<ast_while_statement> v, CodeBlob& code) {
while_op.left = pre_compile_expr(v->get_cond(), code);
code.close_pop_cur(v->get_body()->loc);
code.push_set_cur(while_op.block1);
process_statement(v->get_body(), code);
process_any_statement(v->get_body(), code);
code.close_pop_cur(v->get_body()->loc_end);
}
@ -641,16 +614,16 @@ static void process_throw_statement(V<ast_throw_statement> v, CodeBlob& code) {
if (v->has_thrown_arg()) {
const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as<FunctionData>();
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()});
gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym);
gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym);
} else {
const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as<FunctionData>();
std::vector<var_idx_t> args_vars = pre_compile_tensor(code, {v->get_thrown_code()});
gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym);
gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym);
}
}
static void process_return_statement(V<ast_return_statement> v, CodeBlob& code) {
std::vector<var_idx_t> return_vars = pre_compile_expr(v->get_return_value(), code);
std::vector<var_idx_t> return_vars = v->has_return_value() ? pre_compile_expr(v->get_return_value(), code) : std::vector<var_idx_t>{};
if (code.fun_ref->does_return_self()) {
tolk_assert(return_vars.size() == 1);
return_vars = {};
@ -680,10 +653,8 @@ static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code
}
void process_statement(AnyV v, CodeBlob& code) {
void process_any_statement(AnyV v, CodeBlob& code) {
switch (v->type) {
case ast_local_vars_declaration:
return process_local_vars_declaration(v->as<ast_local_vars_declaration>(), code);
case ast_sequence:
return process_sequence(v->as<ast_sequence>(), code);
case ast_return_statement:
@ -709,30 +680,31 @@ void process_statement(AnyV v, CodeBlob& code) {
}
}
static void convert_function_body_to_CodeBlob(V<ast_function_declaration> v, V<ast_sequence> v_body) {
CodeBlob* blob = new CodeBlob{static_cast<std::string>(v->get_identifier()->name), v->loc, v->fun_ref, v->ret_type};
static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, FunctionBodyCode* code_body) {
auto v_body = fun_ref->ast_root->as<ast_function_declaration>()->get_body()->as<ast_sequence>();
CodeBlob* blob = new CodeBlob{fun_ref->name, fun_ref->loc, fun_ref};
FormalArgList legacy_arg_list;
for (int i = 0; i < v->get_num_params(); ++i) {
legacy_arg_list.emplace_back(v->get_param(i)->declared_type, &v->fun_ref->parameters[i], v->loc);
for (const LocalVarData& param : fun_ref->parameters) {
legacy_arg_list.emplace_back(param.declared_type, &param, param.loc);
}
blob->import_params(std::move(legacy_arg_list));
for (AnyV item : v_body->get_items()) {
process_statement(item, *blob);
process_any_statement(item, *blob);
}
if (v->fun_ref->is_implicit_return()) {
if (fun_ref->is_implicit_return()) {
append_implicit_return_statement(v_body->loc_end, *blob);
}
blob->close_blk(v_body->loc_end);
std::get<FunctionBodyCode*>(v->fun_ref->body)->set_code(blob);
code_body->set_code(blob);
}
static void convert_asm_body_to_AsmOp(V<ast_function_declaration> v, V<ast_asm_body> v_body) {
int cnt = v->get_num_params();
int width = v->ret_type->get_width();
static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyAsm* asm_body) {
int cnt = fun_ref->get_num_params();
int width = fun_ref->inferred_return_type->calc_width_on_stack();
std::vector<AsmOp> asm_ops;
for (AnyV v_child : v_body->get_asm_commands()) {
for (AnyV v_child : fun_ref->ast_root->as<ast_function_declaration>()->get_body()->as<ast_asm_body>()->get_asm_commands()) {
std::string_view ops = v_child->as<ast_string_const>()->str_val; // <op>\n<op>\n...
std::string op;
for (char c : ops) {
@ -756,21 +728,77 @@ static void convert_asm_body_to_AsmOp(V<ast_function_declaration> v, V<ast_asm_b
}
}
std::get<FunctionBodyAsm*>(v->fun_ref->body)->set_code(std::move(asm_ops));
asm_body->set_code(std::move(asm_ops));
}
void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) {
for (const SrcFile* file : all_src_files) {
for (AnyV v : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
if (auto v_func = v->try_as<ast_function_declaration>()) {
if (v_func->is_asm_function()) {
convert_asm_body_to_AsmOp(v_func, v_func->get_body()->as<ast_asm_body>());
} else if (!v_func->marked_as_builtin) {
convert_function_body_to_CodeBlob(v_func, v_func->get_body()->as<ast_sequence>());
class UpdateArgRetOrderConsideringStackWidth final {
public:
static bool should_visit_function(const FunctionData* fun_ref) {
return !fun_ref->is_generic_function() && (!fun_ref->ret_order.empty() || !fun_ref->arg_order.empty());
}
static void start_visiting_function(const FunctionData* fun_ref, V<ast_function_declaration> v_function) {
int total_arg_mutate_width = 0;
bool has_arg_width_not_1 = false;
for (const LocalVarData& param : fun_ref->parameters) {
int arg_width = param.declared_type->calc_width_on_stack();
has_arg_width_not_1 |= arg_width != 1;
total_arg_mutate_width += param.is_mutate_parameter() * arg_width;
}
// example: `fun f(a: int, b: (int, (int, int)), c: int)` with `asm (b a c)`
// current arg_order is [1 0 2]
// needs to be converted to [1 2 3 0 4] because b width is 3
if (has_arg_width_not_1) {
int total_arg_width = 0;
std::vector<int> cum_arg_width;
cum_arg_width.reserve(1 + fun_ref->get_num_params());
cum_arg_width.push_back(0);
for (const LocalVarData& param : fun_ref->parameters) {
cum_arg_width.push_back(total_arg_width += param.declared_type->calc_width_on_stack());
}
std::vector<int> arg_order;
for (int i = 0; i < fun_ref->get_num_params(); ++i) {
int j = fun_ref->arg_order[i];
int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1];
while (c1 < c2) {
arg_order.push_back(c1++);
}
}
fun_ref->mutate()->assign_arg_order(std::move(arg_order));
}
// example: `fun f(mutate self: slice): slice` with `asm(-> 1 0)`
// ret_order is a shuffled range 0...N
// validate N: a function should return value and mutated arguments onto a stack
if (!fun_ref->ret_order.empty()) {
size_t expected_width = fun_ref->inferred_return_type->calc_width_on_stack() + total_arg_mutate_width;
if (expected_width != fun_ref->ret_order.size()) {
v_function->get_body()->error("ret_order (after ->) expected to contain " + std::to_string(expected_width) + " numbers");
}
}
}
};
class ConvertASTToLegacyOpVisitor final {
public:
static bool should_visit_function(const FunctionData* fun_ref) {
return !fun_ref->is_generic_function();
}
static void start_visiting_function(const FunctionData* fun_ref, V<ast_function_declaration>) {
tolk_assert(fun_ref->is_type_inferring_done());
if (fun_ref->is_code_function()) {
convert_function_body_to_CodeBlob(fun_ref, std::get<FunctionBodyCode*>(fun_ref->body));
} else if (fun_ref->is_asm_function()) {
convert_asm_body_to_AsmOp(fun_ref, std::get<FunctionBodyAsm*>(fun_ref->body));
}
}
};
void pipeline_convert_ast_to_legacy_Expr_Op() {
visit_ast_of_all_functions<UpdateArgRetOrderConsideringStackWidth>();
visit_ast_of_all_functions<ConvertASTToLegacyOpVisitor>();
}
} // namespace tolk

View file

@ -74,12 +74,12 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody {
parent::visit(v);
}
void visit(V<ast_tensor_square> v) override {
void visit(V<ast_typed_tuple> v) override {
mark_vertex_cur_or_rvalue(v);
parent::visit(v);
}
void visit(V<ast_identifier> v) override {
void visit(V<ast_reference> v) override {
mark_vertex_cur_or_rvalue(v);
}
@ -99,10 +99,6 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody {
mark_vertex_cur_or_rvalue(v);
}
void visit(V<ast_self_keyword> v) override {
mark_vertex_cur_or_rvalue(v);
}
void visit(V<ast_argument> v) override {
mark_vertex_cur_or_rvalue(v);
MarkingState saved = enter_state(v->passed_as_mutate ? MarkingState::LValueAndRValue : MarkingState::RValue);
@ -115,6 +111,13 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody {
parent::visit(v);
}
void visit(V<ast_dot_access> v) override {
mark_vertex_cur_or_rvalue(v);
MarkingState saved = enter_state(MarkingState::RValue);
parent::visit(v->get_obj());
restore_state(saved);
}
void visit(V<ast_function_call> v) override {
mark_vertex_cur_or_rvalue(v);
MarkingState saved = enter_state(MarkingState::RValue);
@ -122,15 +125,6 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody {
restore_state(saved);
}
void visit(V<ast_dot_method_call> v) override {
mark_vertex_cur_or_rvalue(v);
MarkingState saved = enter_state(MarkingState::RValue);
parent::visit(v->get_obj());
enter_state(MarkingState::RValue);
parent::visit(v->get_arg_list());
restore_state(saved);
}
void visit(V<ast_underscore> v) override {
// underscore is a placeholder to ignore left side of assignment: `(a, _) = get2params()`
// so, if current state is "lvalue", `_` will be marked as lvalue, and ok
@ -139,6 +133,24 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody {
mark_vertex_cur_or_rvalue(v);
}
void visit(V<ast_assign> v) override {
mark_vertex_cur_or_rvalue(v);
MarkingState saved = enter_state(MarkingState::LValue);
parent::visit(v->get_lhs());
enter_state(MarkingState::RValue);
parent::visit(v->get_rhs());
restore_state(saved);
}
void visit(V<ast_set_assign> v) override {
mark_vertex_cur_or_rvalue(v);
MarkingState saved = enter_state(MarkingState::LValueAndRValue);
parent::visit(v->get_lhs());
enter_state(MarkingState::RValue);
parent::visit(v->get_rhs());
restore_state(saved);
}
void visit(V<ast_unary_operator> v) override {
mark_vertex_cur_or_rvalue(v);
MarkingState saved = enter_state(MarkingState::RValue);
@ -148,10 +160,8 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody {
void visit(V<ast_binary_operator> v) override {
mark_vertex_cur_or_rvalue(v);
MarkingState saved = enter_state(v->is_set_assign() ? MarkingState::LValueAndRValue : v->is_assign() ? MarkingState::LValue : MarkingState::RValue);
parent::visit(v->get_lhs());
enter_state(MarkingState::RValue);
parent::visit(v->get_rhs());
MarkingState saved = enter_state(MarkingState::RValue);
parent::visit(v);
restore_state(saved);
}
@ -162,15 +172,18 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody {
restore_state(saved);
}
void visit(V<ast_local_vars_declaration> v) override {
MarkingState saved = enter_state(MarkingState::LValue);
parent::visit(v->get_lhs());
enter_state(MarkingState::RValue);
parent::visit(v->get_assigned_val());
restore_state(saved);
void visit(V<ast_cast_as_operator> v) override {
mark_vertex_cur_or_rvalue(v);
parent::visit(v->get_expr()); // leave lvalue state unchanged, for `mutate (t.0 as int)` both `t.0 as int` and `t.0` are lvalue
}
void visit(V<ast_local_var> v) override {
void visit(V<ast_local_var_lhs> v) override {
tolk_assert(cur_state == MarkingState::LValue);
mark_vertex_cur_or_rvalue(v);
parent::visit(v);
}
void visit(V<ast_local_vars_declaration> v) override {
tolk_assert(cur_state == MarkingState::LValue);
mark_vertex_cur_or_rvalue(v);
parent::visit(v);
@ -183,10 +196,22 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody {
restore_state(saved);
parent::visit(v->get_catch_body());
}
public:
bool should_visit_function(const FunctionData* fun_ref) override {
return fun_ref->is_code_function() && !fun_ref->is_generic_function();
}
};
void pipeline_calculate_rvalue_lvalue(const AllSrcFiles& all_src_files) {
visit_ast_of_all_functions<CalculateRvalueLvalueVisitor>(all_src_files);
void pipeline_calculate_rvalue_lvalue() {
visit_ast_of_all_functions<CalculateRvalueLvalueVisitor>();
}
void pipeline_calculate_rvalue_lvalue(const FunctionData* fun_ref) {
CalculateRvalueLvalueVisitor visitor;
if (visitor.should_visit_function(fun_ref)) {
visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as<ast_function_declaration>());
}
}
} // namespace tolk

View file

@ -33,31 +33,27 @@ static void fire_error_impure_operation_inside_pure_function(AnyV v) {
class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFunctionBody {
static void fire_if_global_var(AnyExprV v) {
if (auto v_ident = v->try_as<ast_identifier>()) {
if (auto v_ident = v->try_as<ast_reference>()) {
if (v_ident->sym->try_as<GlobalVarData>()) {
fire_error_impure_operation_inside_pure_function(v);
}
}
}
void visit(V<ast_local_var> v) override {
if (v->marked_as_redef) {
fire_if_global_var(v->get_identifier());
}
}
void visit(V<ast_binary_operator> v) override {
if (v->is_set_assign() || v->is_assign()) {
void visit(V<ast_assign> v) override {
fire_if_global_var(v->get_lhs());
parent::visit(v);
}
void visit(V<ast_set_assign> v) override {
fire_if_global_var(v->get_lhs());
parent::visit(v);
}
void visit(V<ast_function_call> v) override {
// most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)`
// v is `globalF(args)` / `globalF<int>(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)`
if (!v->fun_maybe) {
// calling variables is always impure, no considerations about what's there at runtime
// `local_var(args)` is always impure, no considerations about what's there at runtime
fire_error_impure_operation_inside_pure_function(v);
}
@ -68,14 +64,6 @@ class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFuncti
parent::visit(v);
}
void visit(V<ast_dot_method_call> v) override {
if (!v->fun_ref->is_marked_as_pure()) {
fire_error_impure_operation_inside_pure_function(v);
}
parent::visit(v);
}
void visit(V<ast_argument> v) override {
if (v->passed_as_mutate) {
fire_if_global_var(v->get_expr());
@ -93,15 +81,13 @@ class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFuncti
}
public:
void start_visiting_function(V<ast_function_declaration> v_function) override {
if (v_function->marked_as_pure) {
parent::visit(v_function->get_body());
}
bool should_visit_function(const FunctionData* fun_ref) override {
return fun_ref->is_code_function() && !fun_ref->is_generic_function() && fun_ref->is_marked_as_pure();
}
};
void pipeline_check_pure_impure_operations(const AllSrcFiles& all_src_files) {
visit_ast_of_all_functions<CheckImpureOperationsInPureFunctionVisitor>(all_src_files);
void pipeline_check_pure_impure_operations() {
visit_ast_of_all_functions<CheckImpureOperationsInPureFunctionVisitor>();
}
} // namespace tolk

View file

@ -36,9 +36,18 @@ static void fire_error_cannot_be_used_as_lvalue(AnyV v, const std::string& detai
v->error(details + " can not be used as lvalue");
}
// handle when a function used as rvalue, like `var cb = f`
static void handle_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) {
fun_ref->mutate()->assign_is_used_as_noncall();
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_modifying_immutable_variable(AnyExprV v, const LocalVarData* var_ref) {
if (var_ref->idx == 0 && var_ref->name == "self") {
v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`");
} else {
v->error("modifying immutable variable `" + var_ref->name + "`");
}
}
// validate a function used as rvalue, like `var cb = f`
// it's not a generic function (ensured earlier at type inferring) and has some more restrictions
static void validate_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) {
if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) {
v->error("saving `" + fun_ref->name + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack");
}
@ -48,16 +57,30 @@ static void handle_function_used_as_noncall(AnyExprV v, const FunctionData* fun_
}
class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody {
void visit(V<ast_assign> v) override {
if (v->is_lvalue) {
fire_error_cannot_be_used_as_lvalue(v, "assignment");
}
parent::visit(v);
}
void visit(V<ast_set_assign> v) override {
if (v->is_lvalue) {
fire_error_cannot_be_used_as_lvalue(v, "assignment");
}
parent::visit(v);
}
void visit(V<ast_binary_operator> v) override {
if (v->is_lvalue) {
fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast<std::string>(v->operator_name));
fire_error_cannot_be_used_as_lvalue(v, "operator " + static_cast<std::string>(v->operator_name));
}
parent::visit(v);
}
void visit(V<ast_unary_operator> v) override {
if (v->is_lvalue) {
fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast<std::string>(v->operator_name));
fire_error_cannot_be_used_as_lvalue(v, "operator " + static_cast<std::string>(v->operator_name));
}
parent::visit(v);
}
@ -69,6 +92,11 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody {
parent::visit(v);
}
void visit(V<ast_cast_as_operator> v) override {
// if `x as int` is lvalue, then `x` is also lvalue, so check that `x` is ok
parent::visit(v->get_expr());
}
void visit(V<ast_int_const> v) override {
if (v->is_lvalue) {
fire_error_cannot_be_used_as_lvalue(v, "literal");
@ -93,46 +121,45 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody {
}
}
void visit(V<ast_dot_access> v) override {
// a reference to a method used as rvalue, like `var v = t.tupleAt`
if (const FunctionData* fun_ref = v->target; v->is_rvalue) {
validate_function_used_as_noncall(v, fun_ref);
}
}
void visit(V<ast_function_call> v) override {
if (v->is_lvalue) {
fire_error_cannot_be_used_as_lvalue(v, "function call");
}
if (!v->fun_maybe) {
parent::visit(v->get_called_f());
parent::visit(v->get_callee());
}
// for `f()` don't visit ast_reference `f`, to detect `f` usage as non-call, like `var cb = f`
// same for `obj.method()`, don't visit ast_reference method, visit only obj
if (v->is_dot_call()) {
parent::visit(v->get_dot_obj());
}
// for `f(...)` don't visit identifier `f`, to detect `f` usage as non-call, like `var cb = f`
for (int i = 0; i < v->get_num_args(); ++i) {
parent::visit(v->get_arg(i));
}
}
void visit(V<ast_dot_method_call> v) override {
if (v->is_lvalue) {
fire_error_cannot_be_used_as_lvalue(v, "method call");
}
parent::visit(v->get_obj());
for (int i = 0; i < v->get_num_args(); ++i) {
parent::visit(v->get_arg(i));
}
}
void visit(V<ast_local_var> v) override {
void visit(V<ast_local_var_lhs> v) override {
if (v->marked_as_redef) {
tolk_assert(v->var_maybe); // always filled, but for `var g_var redef` might point not to a local
if (const LocalVarData* var_ref = v->var_maybe->try_as<LocalVarData>(); var_ref && var_ref->is_immutable()) {
tolk_assert(v->var_ref);
if (v->var_ref->is_immutable()) {
v->error("`redef` for immutable variable");
}
}
}
void visit(V<ast_identifier> v) override {
void visit(V<ast_reference> v) override {
if (v->is_lvalue) {
tolk_assert(v->sym);
if (const auto* var_ref = v->sym->try_as<LocalVarData>(); var_ref && var_ref->is_immutable()) {
v->error("modifying immutable variable `" + var_ref->name + "`");
fire_error_modifying_immutable_variable(v, var_ref);
} else if (v->sym->try_as<GlobalConstData>()) {
v->error("modifying immutable constant");
} else if (v->sym->try_as<FunctionData>()) {
@ -142,13 +169,7 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody {
// a reference to a function used as rvalue, like `var v = someFunction`
if (const FunctionData* fun_ref = v->sym->try_as<FunctionData>(); fun_ref && v->is_rvalue) {
handle_function_used_as_noncall(v, fun_ref);
}
}
void visit(V<ast_self_keyword> v) override {
if (v->is_lvalue && v->param_ref->is_immutable()) {
v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`");
validate_function_used_as_noncall(v, fun_ref);
}
}
@ -163,10 +184,15 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody {
// skip catch(_,excNo), there are always vars due to grammar, lvalue/rvalue aren't set to them
parent::visit(v->get_catch_body());
}
public:
bool should_visit_function(const FunctionData* fun_ref) override {
return fun_ref->is_code_function() && !fun_ref->is_generic_function();
}
};
void pipeline_check_rvalue_lvalue(const AllSrcFiles& all_src_files) {
visit_ast_of_all_functions<CheckRValueLvalueVisitor>(all_src_files);
void pipeline_check_rvalue_lvalue() {
visit_ast_of_all_functions<CheckRValueLvalueVisitor>();
}
} // namespace tolk

View file

@ -17,6 +17,7 @@
#include "tolk.h"
#include "ast.h"
#include "ast-replacer.h"
#include "type-system.h"
/*
* This pipe is supposed to do constant folding, like replacing `2 + 3` with `5`.
@ -33,7 +34,7 @@ namespace tolk {
class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody {
static V<ast_int_const> create_int_const(SrcLocation loc, td::RefInt256&& intval) {
auto v_int = createV<ast_int_const>(loc, std::move(intval), {});
v_int->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
v_int->assign_inferred_type(TypeDataInt::create());
v_int->assign_rvalue_true();
return v_int;
}
@ -59,10 +60,15 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody {
return v;
}
public:
bool should_visit_function(const FunctionData* fun_ref) override {
return fun_ref->is_code_function() && !fun_ref->is_generic_function();
}
};
void pipeline_constant_folding(const AllSrcFiles& all_src_files) {
replace_ast_of_all_functions<ConstantFoldingReplacer>(all_src_files);
void pipeline_constant_folding() {
replace_ast_of_all_functions<ConstantFoldingReplacer>();
}
} // namespace tolk

View file

@ -111,17 +111,28 @@ class UnreachableStatementsDetectVisitor final {
}
public:
void start_visiting_function(V<ast_function_declaration> v_function) {
static bool should_visit_function(const FunctionData* fun_ref) {
return fun_ref->is_code_function() && !fun_ref->is_generic_function();
}
void start_visiting_function(const FunctionData* fun_ref, V<ast_function_declaration> v_function) {
bool control_flow_reaches_end = !always_returns(v_function->get_body()->as<ast_sequence>());
if (control_flow_reaches_end) {
v_function->fun_ref->mutate()->assign_is_implicit_return();
fun_ref->mutate()->assign_is_implicit_return();
}
}
};
void pipeline_detect_unreachable_statements(const AllSrcFiles& all_src_files) {
visit_ast_of_all_functions<UnreachableStatementsDetectVisitor>(all_src_files);
void pipeline_detect_unreachable_statements() {
visit_ast_of_all_functions<UnreachableStatementsDetectVisitor>();
}
void pipeline_detect_unreachable_statements(const FunctionData* fun_ref) {
UnreachableStatementsDetectVisitor visitor;
if (UnreachableStatementsDetectVisitor::should_visit_function(fun_ref)) {
visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as<ast_function_declaration>());
}
}
} // namespace tolk

View file

@ -38,7 +38,7 @@
namespace tolk {
AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) {
void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) {
G.all_src_files.locate_and_register_source_file(stdlib_filename, {});
G.all_src_files.locate_and_register_source_file(entrypoint_filename, {});
@ -46,27 +46,25 @@ AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filena
tolk_assert(!file->ast);
file->ast = parse_src_file_to_ast(file);
// file->ast->debug_print();
// if (!file->is_stdlib_file()) file->ast->debug_print();
for (AnyV v_toplevel : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
if (auto v_import = v_toplevel->try_as<ast_import_statement>()) {
if (auto v_import = v_toplevel->try_as<ast_import_directive>()) {
std::string imported_str = v_import->get_file_name();
size_t cur_slash_pos = file->rel_filename.rfind('/');
std::string rel_filename = cur_slash_pos == std::string::npos || imported_str[0] == '@'
? std::move(imported_str)
: file->rel_filename.substr(0, cur_slash_pos + 1) + imported_str;
SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc);
file->imports.push_back(SrcFile::ImportStatement{imported});
const SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc);
file->imports.push_back(SrcFile::ImportDirective{imported});
v_import->mutate()->assign_src_file(imported);
}
}
}
// todo #ifdef TOLK_PROFILING
// lexer_measure_performance(G.all_src_files.get_all_files());
return G.all_src_files.get_all_files();
lexer_measure_performance(G.all_src_files);
}
} // namespace tolk

View file

@ -37,7 +37,7 @@ namespace tolk {
static void mark_function_used_dfs(const std::unique_ptr<Op>& op);
static void mark_function_used(const FunctionData* fun_ref) {
if (!fun_ref->is_regular_function() || fun_ref->is_really_used()) { // already handled
if (!fun_ref->is_code_function() || fun_ref->is_really_used()) { // already handled
return;
}
@ -66,7 +66,7 @@ static void mark_function_used_dfs(const std::unique_ptr<Op>& op) {
}
void pipeline_find_unused_symbols() {
for (const FunctionData* fun_ref : G.all_code_functions) {
for (const FunctionData* fun_ref : G.all_functions) {
if (fun_ref->is_method_id_not_empty()) { // get methods, main and other entrypoints, regular functions with @method_id
mark_function_used(fun_ref);
}

View file

@ -40,20 +40,15 @@ void FunctionBodyAsm::set_code(std::vector<AsmOp>&& code) {
static void generate_output_func(const FunctionData* fun_ref) {
tolk_assert(fun_ref->is_regular_function());
tolk_assert(fun_ref->is_code_function());
if (G.is_verbosity(2)) {
std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->full_type << std::endl;
std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->inferred_return_type << std::endl;
}
CodeBlob* code = std::get<FunctionBodyCode*>(fun_ref->body)->code;
if (G.is_verbosity(3)) {
code->print(std::cerr, 9);
}
code->simplify_var_types();
if (G.is_verbosity(5)) {
std::cerr << "after simplify_var_types: \n";
code->print(std::cerr, 0);
}
code->prune_unreachable_code();
if (G.is_verbosity(5)) {
std::cerr << "after prune_unreachable: \n";
@ -112,11 +107,11 @@ static void generate_output_func(const FunctionData* fun_ref) {
}
}
void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) {
void pipeline_generate_fif_output_to_std_cout() {
std::cout << "\"Asm.fif\" include\n";
std::cout << "// automatically generated from ";
bool need_comma = false;
for (const SrcFile* file : all_src_files) {
for (const SrcFile* file : G.all_src_files) {
if (!file->is_stdlib_file()) {
if (need_comma) {
std::cout << ", ";
@ -129,9 +124,9 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files)
std::cout << "PROGRAM{\n";
bool has_main_procedure = false;
for (const FunctionData* fun_ref : G.all_code_functions) {
for (const FunctionData* fun_ref : G.all_functions) {
if (!fun_ref->does_need_codegen()) {
if (G.is_verbosity(2)) {
if (G.is_verbosity(2) && fun_ref->is_code_function()) {
std::cerr << fun_ref->name << ": code not generated, function does not need codegen\n";
}
continue;
@ -164,7 +159,7 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files)
std::cout << std::string(2, ' ') << "DECLGLOBVAR " << var_ref->name << "\n";
}
for (const FunctionData* fun_ref : G.all_code_functions) {
for (const FunctionData* fun_ref : G.all_functions) {
if (!fun_ref->does_need_codegen()) {
continue;
}

View file

@ -1,524 +0,0 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
#include "src-file.h"
#include "ast.h"
#include "ast-visitor.h"
/*
* This pipe does type inferring.
* It will be fully rewritten, because current type system is based on Hindley-Milner (unifying usages),
* and I am going to introduce a static type system, drop TypeExpr completely, etc.
* Currently, after this inferring, lots of `te_Indirect` and partially complete types still exist,
* whey are partially refined during converting AST to legacy.
*/
namespace tolk {
class InferAndCheckTypesInsideFunctionVisitor final : public ASTVisitorFunctionBody {
const FunctionData* current_function = nullptr;
static bool expect_integer(TypeExpr* inferred) {
try {
TypeExpr* t_int = TypeExpr::new_atomic(TypeExpr::_Int);
unify(inferred, t_int);
return true;
} catch (UnifyError&) {
return false;
}
}
static bool expect_integer(AnyExprV v_inferred) {
return expect_integer(v_inferred->inferred_type);
}
static bool is_expr_valid_as_return_self(AnyExprV return_expr) {
// `return self`
if (return_expr->type == ast_self_keyword) {
return true;
}
// `return self.someMethod()`
if (auto v_call = return_expr->try_as<ast_dot_method_call>()) {
return v_call->fun_ref->does_return_self() && is_expr_valid_as_return_self(v_call->get_obj());
}
// `return cond ? ... : ...`
if (auto v_ternary = return_expr->try_as<ast_ternary_operator>()) {
return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false());
}
return false;
}
void visit(V<ast_parenthesized_expression> v) override {
parent::visit(v->get_expr());
v->mutate()->assign_inferred_type(v->get_expr()->inferred_type);
}
void visit(V<ast_tensor> v) override {
if (v->empty()) {
v->mutate()->assign_inferred_type(TypeExpr::new_unit());
return;
}
std::vector<TypeExpr*> types_list;
types_list.reserve(v->get_items().size());
for (AnyExprV item : v->get_items()) {
parent::visit(item);
types_list.emplace_back(item->inferred_type);
}
v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list)));
}
void visit(V<ast_tensor_square> v) override {
if (v->empty()) {
v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_unit()));
return;
}
std::vector<TypeExpr*> types_list;
types_list.reserve(v->get_items().size());
for (AnyExprV item : v->get_items()) {
parent::visit(item);
types_list.emplace_back(item->inferred_type);
}
v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types_list), false)));
}
void visit(V<ast_identifier> v) override {
if (const auto* glob_ref = v->sym->try_as<GlobalVarData>()) {
v->mutate()->assign_inferred_type(glob_ref->declared_type);
} else if (const auto* const_ref = v->sym->try_as<GlobalConstData>()) {
v->mutate()->assign_inferred_type(const_ref->inferred_type);
} else if (const auto* fun_ref = v->sym->try_as<FunctionData>()) {
v->mutate()->assign_inferred_type(fun_ref->full_type);
} else if (const auto* var_ref = v->sym->try_as<LocalVarData>()) {
v->mutate()->assign_inferred_type(var_ref->declared_type);
}
}
void visit(V<ast_int_const> v) override {
v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
}
void visit(V<ast_string_const> v) override {
switch (v->modifier) {
case 0:
case 's':
case 'a':
v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Slice));
break;
case 'u':
case 'h':
case 'H':
case 'c':
v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
break;
default:
break;
}
}
void visit(V<ast_bool_const> v) override {
v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
}
void visit(V<ast_null_keyword> v) override {
const FunctionData* fun_ref = lookup_global_symbol("__null")->as<FunctionData>();
TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_unit(), TypeExpr::new_hole());
TypeExpr* sym_type = fun_ref->full_type;
try {
unify(fun_type, sym_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type "
<< fun_type->args[0] << ": " << ue;
v->error(os.str());
}
TypeExpr* e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
v->mutate()->assign_inferred_type(e_type);
}
void visit(V<ast_self_keyword> v) override {
v->mutate()->assign_inferred_type(v->param_ref->declared_type);
}
void visit(V<ast_argument> v) override {
parent::visit(v->get_expr());
v->mutate()->assign_inferred_type(v->get_expr()->inferred_type);
}
void visit(V<ast_argument_list> v) override {
if (v->empty()) {
v->mutate()->assign_inferred_type(TypeExpr::new_unit());
return;
}
std::vector<TypeExpr*> types_list;
types_list.reserve(v->size());
for (AnyExprV item : v->get_arguments()) {
parent::visit(item);
types_list.emplace_back(item->inferred_type);
}
v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list)));
}
void visit(V<ast_function_call> v) override {
// special error for "null()" which is a FunC syntax
if (v->get_called_f()->type == ast_null_keyword) {
v->error("null is not a function: use `null`, not `null()`");
}
parent::visit(v->get_called_f());
visit(v->get_arg_list());
// most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)`
const FunctionData* fun_ref = v->fun_maybe;
if (!fun_ref) {
TypeExpr* arg_tensor = v->get_arg_list()->inferred_type;
TypeExpr* lhs_type = v->get_called_f()->inferred_type;
TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole());
try {
unify(fun_type, lhs_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot apply expression of type " << lhs_type << " to an expression of type " << arg_tensor
<< ": " << ue;
v->error(os.str());
}
TypeExpr* e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
v->mutate()->assign_inferred_type(e_type);
return;
}
TypeExpr* arg_tensor = v->get_arg_list()->inferred_type;
TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole());
TypeExpr* sym_type = fun_ref->full_type;
try {
unify(fun_type, sym_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type "
<< fun_type->args[0] << ": " << ue;
v->error(os.str());
}
TypeExpr* e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
if (fun_ref->has_mutate_params()) {
tolk_assert(e_type->constr == TypeExpr::te_Tensor);
e_type = e_type->args[e_type->args.size() - 1];
}
v->mutate()->assign_inferred_type(e_type);
}
void visit(V<ast_dot_method_call> v) override {
parent::visit(v->get_obj());
visit(v->get_arg_list());
std::vector<TypeExpr*> arg_types;
arg_types.reserve(1 + v->get_num_args());
arg_types.push_back(v->get_obj()->inferred_type);
for (int i = 0; i < v->get_num_args(); ++i) {
arg_types.push_back(v->get_arg(i)->inferred_type);
}
TypeExpr* arg_tensor = TypeExpr::new_tensor(std::move(arg_types));
TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole());
TypeExpr* sym_type = v->fun_ref->full_type;
try {
unify(fun_type, sym_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot apply function " << v->fun_ref->name << " : " << v->fun_ref->full_type << " to arguments of type "
<< fun_type->args[0] << ": " << ue;
v->error(os.str());
}
TypeExpr* e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
if (v->fun_ref->has_mutate_params()) {
tolk_assert(e_type->constr == TypeExpr::te_Tensor);
e_type = e_type->args[e_type->args.size() - 1];
}
if (v->fun_ref->does_return_self()) {
e_type = v->get_obj()->inferred_type;
TypeExpr::remove_indirect(e_type);
}
v->mutate()->assign_inferred_type(e_type);
}
void visit(V<ast_underscore> v) override {
v->mutate()->assign_inferred_type(TypeExpr::new_hole());
}
void visit(V<ast_unary_operator> v) override {
parent::visit(v->get_rhs());
if (!expect_integer(v->get_rhs())) {
v->error("operator `" + static_cast<std::string>(v->operator_name) + "` expects integer operand");
}
v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
}
void visit(V<ast_binary_operator> v) override {
parent::visit(v->get_lhs());
parent::visit(v->get_rhs());
switch (v->tok) {
case tok_assign: {
TypeExpr* lhs_type = v->get_lhs()->inferred_type;
TypeExpr* rhs_type = v->get_rhs()->inferred_type;
try {
unify(lhs_type, rhs_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot assign an expression of type " << rhs_type << " to a variable or pattern of type "
<< lhs_type << ": " << ue;
v->error(os.str());
}
TypeExpr* e_type = lhs_type;
TypeExpr::remove_indirect(e_type);
v->mutate()->assign_inferred_type(e_type);
break;
}
case tok_eq:
case tok_neq:
case tok_spaceship: {
if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) {
v->error("comparison operators `== !=` can compare only integers");
}
v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
break;
}
case tok_logical_and:
case tok_logical_or: {
if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) {
v->error("logical operators `&& ||` expect integer operands");
}
v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
break;
}
default:
if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) {
v->error("operator `" + static_cast<std::string>(v->operator_name) + "` expects integer operands");
}
v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int));
}
}
void visit(V<ast_ternary_operator> v) override {
parent::visit(v->get_cond());
if (!expect_integer(v->get_cond())) {
v->get_cond()->error("condition of ternary ?: operator must be an integer");
}
parent::visit(v->get_when_true());
parent::visit(v->get_when_false());
TypeExpr* res = TypeExpr::new_hole();
TypeExpr *ttrue = v->get_when_true()->inferred_type;
TypeExpr *tfals = v->get_when_false()->inferred_type;
unify(res, ttrue);
unify(res, tfals);
v->mutate()->assign_inferred_type(res);
}
void visit(V<ast_if_statement> v) override {
parent::visit(v->get_cond());
parent::visit(v->get_if_body());
parent::visit(v->get_else_body());
TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int);
TypeExpr* cond_type = v->get_cond()->inferred_type;
try {
unify(cond_type, flag_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "`if` condition value of type " << cond_type << " is not an integer: " << ue;
v->get_cond()->error(os.str());
}
v->get_cond()->mutate()->assign_inferred_type(cond_type);
}
void visit(V<ast_repeat_statement> v) override {
parent::visit(v->get_cond());
parent::visit(v->get_body());
TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int);
TypeExpr* cond_type = v->get_cond()->inferred_type;
try {
unify(cond_type, cnt_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "repeat count value of type " << cond_type << " is not an integer: " << ue;
v->get_cond()->error(os.str());
}
v->get_cond()->mutate()->assign_inferred_type(cond_type);
}
void visit(V<ast_while_statement> v) override {
parent::visit(v->get_cond());
parent::visit(v->get_body());
TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int);
TypeExpr* cond_type = v->get_cond()->inferred_type;
try {
unify(cond_type, cnt_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "`while` condition value of type " << cond_type << " is not an integer: " << ue;
v->get_cond()->error(os.str());
}
v->get_cond()->mutate()->assign_inferred_type(cond_type);
}
void visit(V<ast_do_while_statement> v) override {
parent::visit(v->get_body());
parent::visit(v->get_cond());
TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int);
TypeExpr* cond_type = v->get_cond()->inferred_type;
try {
unify(cond_type, cnt_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "`while` condition value of type " << cond_type << " is not an integer: " << ue;
v->get_cond()->error(os.str());
}
v->get_cond()->mutate()->assign_inferred_type(cond_type);
}
void visit(V<ast_return_statement> v) override {
parent::visit(v->get_return_value());
if (current_function->does_return_self()) {
if (!is_expr_valid_as_return_self(v->get_return_value())) {
v->error("invalid return from `self` function");
}
return;
}
TypeExpr* expr_type = v->get_return_value()->inferred_type;
TypeExpr* ret_type = current_function->full_type;
if (ret_type->constr == TypeExpr::te_ForAll) {
ret_type = ret_type->args[0];
}
tolk_assert(ret_type->constr == TypeExpr::te_Map);
ret_type = ret_type->args[1];
if (current_function->has_mutate_params()) {
tolk_assert(ret_type->constr == TypeExpr::te_Tensor);
ret_type = ret_type->args[ret_type->args.size() - 1];
}
try {
unify(expr_type, ret_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "previous function return type " << ret_type
<< " cannot be unified with return statement expression type " << expr_type << ": " << ue;
v->error(os.str());
}
}
void visit(V<ast_local_var> v) override {
if (v->var_maybe) { // not underscore
if (const auto* var_ref = v->var_maybe->try_as<LocalVarData>()) {
v->mutate()->assign_inferred_type(var_ref->declared_type);
} else if (const auto* glob_ref = v->var_maybe->try_as<GlobalVarData>()) {
v->mutate()->assign_inferred_type(glob_ref->declared_type);
} else {
tolk_assert(0);
}
} else if (v->declared_type) { // underscore with type
v->mutate()->assign_inferred_type(v->declared_type);
} else { // just underscore
v->mutate()->assign_inferred_type(TypeExpr::new_hole());
}
v->get_identifier()->mutate()->assign_inferred_type(v->inferred_type);
}
void visit(V<ast_local_vars_declaration> v) override {
parent::visit(v->get_lhs());
parent::visit(v->get_assigned_val());
TypeExpr* lhs = v->get_lhs()->inferred_type;
TypeExpr* rhs = v->get_assigned_val()->inferred_type;
try {
unify(lhs, rhs);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot assign an expression of type " << rhs << " to a variable or pattern of type " << lhs << ": " << ue;
v->error(os.str());
}
}
void visit(V<ast_try_catch_statement> v) override {
parent::visit(v->get_try_body());
parent::visit(v->get_catch_expr());
TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int));
tolk_assert(v->get_catch_expr()->size() == 2);
TypeExpr* type1 = v->get_catch_expr()->get_item(0)->inferred_type;
unify(type1, tvm_error_type->args[1]);
TypeExpr* type2 = v->get_catch_expr()->get_item(1)->inferred_type;
unify(type2, tvm_error_type->args[0]);
parent::visit(v->get_catch_body());
}
void visit(V<ast_throw_statement> v) override {
parent::visit(v->get_thrown_code());
if (!expect_integer(v->get_thrown_code())) {
v->get_thrown_code()->error("excNo of `throw` must be an integer");
}
if (v->has_thrown_arg()) {
parent::visit(v->get_thrown_arg());
}
}
void visit(V<ast_assert_statement> v) override {
parent::visit(v->get_cond());
if (!expect_integer(v->get_cond())) {
v->get_cond()->error("condition of `assert` must be an integer");
}
parent::visit(v->get_thrown_code());
}
public:
void start_visiting_function(V<ast_function_declaration> v_function) override {
current_function = v_function->fun_ref;
parent::visit(v_function->get_body());
if (current_function->is_implicit_return()) {
if (current_function->does_return_self()) {
throw ParseError(v_function->get_body()->as<ast_sequence>()->loc_end, "missing return; forgot `return self`?");
}
TypeExpr* expr_type = TypeExpr::new_unit();
TypeExpr* ret_type = current_function->full_type;
if (ret_type->constr == TypeExpr::te_ForAll) {
ret_type = ret_type->args[0];
}
tolk_assert(ret_type->constr == TypeExpr::te_Map);
ret_type = ret_type->args[1];
if (current_function->has_mutate_params()) {
ret_type = ret_type->args[ret_type->args.size() - 1];
}
try {
unify(expr_type, ret_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "implicit function return type " << expr_type
<< " cannot be unified with inferred return type " << ret_type << ": " << ue;
v_function->error(os.str());
}
}
}
};
void pipeline_infer_and_check_types(const AllSrcFiles& all_src_files) {
visit_ast_of_all_functions<InferAndCheckTypesInsideFunctionVisitor>(all_src_files);
}
} // namespace tolk

File diff suppressed because it is too large Load diff

View file

@ -34,8 +34,8 @@
namespace tolk {
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_invalid_mutate_arg_passed(AnyV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) {
std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as<ast_identifier>()->name : "obj");
static void fire_error_invalid_mutate_arg_passed(AnyExprV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) {
std::string arg_str(arg_expr->type == ast_reference ? arg_expr->as<ast_reference>()->get_name() : "obj");
// case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)`
if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && p_sym.idx == 0 && fun_ref->does_accept_self()) {
@ -59,7 +59,7 @@ static void fire_error_invalid_mutate_arg_passed(AnyV v, const FunctionData* fun
class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBody {
void visit(V<ast_function_call> v) override {
// most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)`
// v is `globalF(args)` / `globalF<int>(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)`
const FunctionData* fun_ref = v->fun_maybe;
if (!fun_ref) {
parent::visit(v);
@ -72,47 +72,55 @@ class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBod
return;
}
tolk_assert(static_cast<int>(fun_ref->parameters.size()) == v->get_num_args());
int delta_self = v->is_dot_call();
tolk_assert(fun_ref->get_num_params() == delta_self + v->get_num_args());
if (v->is_dot_call()) {
if (fun_ref->does_mutate_self()) {
// for `b.storeInt()`, `b` should become lvalue, since `storeInt` is a method mutating self
// but: `beginCell().storeInt()`, then `beginCell()` is not lvalue
// (it will be extracted as tmp var when transforming AST to IR)
AnyExprV leftmost_obj = v->get_dot_obj();
while (true) {
if (auto as_par = leftmost_obj->try_as<ast_parenthesized_expression>()) {
leftmost_obj = as_par->get_expr();
} else if (auto as_cast = leftmost_obj->try_as<ast_cast_as_operator>()) {
leftmost_obj = as_cast->get_expr();
} else {
break;
}
}
bool will_be_extracted_as_tmp_var = leftmost_obj->type == ast_function_call;
if (!will_be_extracted_as_tmp_var) {
leftmost_obj->mutate()->assign_lvalue_true();
v->get_dot_obj()->mutate()->assign_lvalue_true();
}
}
if (!fun_ref->does_accept_self() && fun_ref->parameters[0].is_mutate_parameter()) {
fire_error_invalid_mutate_arg_passed(v, fun_ref, fun_ref->parameters[0], true, false, v->get_dot_obj());
}
}
for (int i = 0; i < v->get_num_args(); ++i) {
const LocalVarData& p_sym = fun_ref->parameters[i];
const LocalVarData& p_sym = fun_ref->parameters[delta_self + i];
auto arg_i = v->get_arg(i);
if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) {
fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr());
}
parent::visit(arg_i);
}
parent::visit(v->get_callee());
}
void visit(V<ast_dot_method_call> v) override {
parent::visit(v);
const FunctionData* fun_ref = v->fun_ref;
tolk_assert(static_cast<int>(fun_ref->parameters.size()) == 1 + v->get_num_args());
if (fun_ref->does_mutate_self()) {
bool will_be_extracted_as_tmp_var = v->get_obj()->type == ast_function_call || v->get_obj()->type == ast_dot_method_call;
if (!will_be_extracted_as_tmp_var) {
v->get_obj()->mutate()->assign_lvalue_true();
}
}
if (!fun_ref->does_accept_self() && fun_ref->parameters[0].is_mutate_parameter()) {
fire_error_invalid_mutate_arg_passed(v, fun_ref, fun_ref->parameters[0], true, false, v->get_obj());
}
for (int i = 0; i < v->get_num_args(); ++i) {
const LocalVarData& p_sym = fun_ref->parameters[1 + i];
auto arg_i = v->get_arg(i);
if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) {
fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr());
}
}
public:
bool should_visit_function(const FunctionData* fun_ref) override {
return fun_ref->is_code_function() && !fun_ref->is_generic_function();
}
};
void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles& all_src_files) {
visit_ast_of_all_functions<RefineLvalueForMutateArgumentsVisitor>(all_src_files);
void pipeline_refine_lvalue_for_mutate_arguments() {
visit_ast_of_all_functions<RefineLvalueForMutateArgumentsVisitor>();
}
} // namespace tolk

View file

@ -20,7 +20,9 @@
#include "ast.h"
#include "compiler-state.h"
#include "constant-evaluator.h"
#include "generics-helpers.h"
#include "td/utils/crypto.h"
#include "type-system.h"
#include <unordered_set>
/*
@ -59,68 +61,70 @@ static int calculate_method_id_by_func_name(std::string_view func_name) {
return static_cast<int>(crc & 0xffff) | 0x10000;
}
static void calc_arg_ret_order_of_asm_function(V<ast_asm_body> v_body, V<ast_parameter_list> param_list, TypeExpr* ret_type,
std::vector<int>& arg_order, std::vector<int>& ret_order) {
int cnt = param_list->size();
int width = ret_type->get_width();
if (width < 0 || width > 16) {
v_body->error("return type of an assembler built-in function must have a well-defined fixed width");
static void validate_arg_ret_order_of_asm_function(V<ast_asm_body> v_body, int n_params, TypePtr ret_type) {
if (!ret_type) {
v_body->error("asm function must declare return type (before asm instructions)");
}
if (cnt > 16) {
v_body->error("assembler built-in function must have at most 16 arguments");
}
std::vector<int> cum_arg_width;
cum_arg_width.push_back(0);
int tot_width = 0;
for (int i = 0; i < cnt; ++i) {
V<ast_parameter> v_param = param_list->get_param(i);
int arg_width = v_param->declared_type->get_width();
if (arg_width < 0 || arg_width > 16) {
v_param->error("parameters of an assembler built-in function must have a well-defined fixed width");
}
cum_arg_width.push_back(tot_width += arg_width);
if (n_params > 16) {
v_body->error("asm function can have at most 16 parameters");
}
// asm(param1 ... paramN), param names were previously mapped into indices
if (!v_body->arg_order.empty()) {
if (static_cast<int>(v_body->arg_order.size()) != cnt) {
if (static_cast<int>(v_body->arg_order.size()) != n_params) {
v_body->error("arg_order of asm function must specify all parameters");
}
std::vector<bool> visited(cnt, false);
for (int i = 0; i < cnt; ++i) {
int j = v_body->arg_order[i];
std::vector<bool> visited(v_body->arg_order.size(), false);
for (int j : v_body->arg_order) {
if (visited[j]) {
v_body->error("arg_order of asm function contains duplicates");
}
visited[j] = true;
int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1];
while (c1 < c2) {
arg_order.push_back(c1++);
}
}
tolk_assert(arg_order.size() == (unsigned)tot_width);
}
// asm(-> 0 2 1 3), check for a shuffled range 0...N
// correctness of N (actual return width onto a stack) will be checked after type inferring and generics instantiation
if (!v_body->ret_order.empty()) {
if (static_cast<int>(v_body->ret_order.size()) != width) {
v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width));
}
std::vector<bool> visited(width, false);
for (int i = 0; i < width; ++i) {
int j = v_body->ret_order[i];
if (j < 0 || j >= width || visited[j]) {
v_body->error("ret_order contains invalid integer, not in range 0 .. width-1");
std::vector<bool> visited(v_body->ret_order.size(), false);
for (int j : v_body->ret_order) {
if (j < 0 || j >= static_cast<int>(v_body->ret_order.size()) || visited[j]) {
v_body->error("ret_order contains invalid integer, not in range 0 .. N");
}
visited[j] = true;
}
ret_order = v_body->ret_order;
}
}
static const GenericsDeclaration* construct_genericTs(V<ast_genericsT_list> v_list) {
std::vector<GenericsDeclaration::GenericsItem> itemsT;
itemsT.reserve(v_list->size());
for (int i = 0; i < v_list->size(); ++i) {
auto v_item = v_list->get_item(i);
auto it_existing = std::find_if(itemsT.begin(), itemsT.end(), [v_item](const GenericsDeclaration::GenericsItem& prev) {
return prev.nameT == v_item->nameT;
});
if (it_existing != itemsT.end()) {
v_item->error("duplicate generic parameter `" + static_cast<std::string>(v_item->nameT) + "`");
}
itemsT.emplace_back(v_item->nameT);
}
return new GenericsDeclaration(std::move(itemsT));
}
static void register_constant(V<ast_constant_declaration> v) {
ConstantValue init_value = eval_const_init_value(v->get_init_value());
GlobalConstData* c_sym = new GlobalConstData(static_cast<std::string>(v->get_identifier()->name), v->loc, std::move(init_value));
GlobalConstData* c_sym = new GlobalConstData(static_cast<std::string>(v->get_identifier()->name), v->loc, v->declared_type, std::move(init_value));
if (v->declared_type && !v->declared_type->equals_to(c_sym->inferred_type)) {
if (v->declared_type) {
bool ok = (c_sym->is_int_const() && (v->declared_type == TypeDataInt::create()))
|| (c_sym->is_slice_const() && (v->declared_type == TypeDataSlice::create()));
if (!ok) {
v->error("expression type does not match declared type");
}
}
G.symtable.add_global_const(c_sym);
G.all_constants.push_back(c_sym);
@ -137,124 +141,82 @@ static void register_global_var(V<ast_global_var_declaration> v) {
static LocalVarData register_parameter(V<ast_parameter> v, int idx) {
if (v->is_underscore()) {
return {"", v->loc, idx, v->declared_type};
return {"", v->loc, v->declared_type, 0, idx};
}
LocalVarData p_sym(static_cast<std::string>(v->param_name), v->loc, idx, v->declared_type);
int flags = 0;
if (v->declared_as_mutate) {
p_sym.flags |= LocalVarData::flagMutateParameter;
flags |= LocalVarData::flagMutateParameter;
}
if (!v->declared_as_mutate && idx == 0 && v->param_name == "self") {
p_sym.flags |= LocalVarData::flagImmutable;
flags |= LocalVarData::flagImmutable;
}
return p_sym;
return LocalVarData(static_cast<std::string>(v->param_name), v->loc, v->declared_type, flags, idx);
}
static void register_function(V<ast_function_declaration> v) {
std::string_view func_name = v->get_identifier()->name;
// calculate TypeExpr of a function: it's a map (params -> ret), probably surrounded by forall
TypeExpr* params_tensor_type = nullptr;
// calculate TypeData of a function
std::vector<TypePtr> arg_types;
std::vector<LocalVarData> parameters;
int n_params = v->get_num_params();
int n_mutate_params = 0;
std::vector<LocalVarData> parameters;
if (n_params) {
std::vector<TypeExpr*> param_tensor_items;
param_tensor_items.reserve(n_params);
arg_types.reserve(n_params);
parameters.reserve(n_params);
for (int i = 0; i < n_params; ++i) {
auto v_param = v->get_param(i);
n_mutate_params += static_cast<int>(v_param->declared_as_mutate);
param_tensor_items.emplace_back(v_param->declared_type);
arg_types.emplace_back(v_param->declared_type);
parameters.emplace_back(register_parameter(v_param, i));
}
params_tensor_type = TypeExpr::new_tensor(std::move(param_tensor_items));
} else {
params_tensor_type = TypeExpr::new_unit();
n_mutate_params += static_cast<int>(v_param->declared_as_mutate);
}
TypeExpr* function_type = TypeExpr::new_map(params_tensor_type, v->ret_type);
const GenericsDeclaration* genericTs = nullptr;
if (v->genericsT_list) {
std::vector<TypeExpr*> type_vars;
type_vars.reserve(v->genericsT_list->size());
for (int idx = 0; idx < v->genericsT_list->size(); ++idx) {
type_vars.emplace_back(v->genericsT_list->get_item(idx)->created_type);
genericTs = construct_genericTs(v->genericsT_list);
}
function_type = TypeExpr::new_forall(std::move(type_vars), function_type);
}
if (v->marked_as_builtin) {
if (v->is_builtin_function()) {
const Symbol* builtin_func = lookup_global_symbol(func_name);
const FunctionData* func_val = builtin_func ? builtin_func->as<FunctionData>() : nullptr;
if (!func_val || !func_val->is_builtin_function()) {
const FunctionData* fun_ref = builtin_func ? builtin_func->as<FunctionData>() : nullptr;
if (!fun_ref || !fun_ref->is_builtin_function()) {
v->error("`builtin` used for non-builtin function");
}
#ifdef TOLK_DEBUG
// in release, we don't need this check, since `builtin` is used only in stdlib, which is our responsibility
if (!func_val->full_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) {
v->error("declaration for `builtin` function doesn't match an actual one");
}
#endif
v->mutate()->assign_fun_ref(fun_ref);
return;
}
if (G.is_verbosity(1)) {
std::cerr << "fun " << func_name << " : " << function_type << std::endl;
}
if (v->marked_as_pure && v->ret_type->get_width() == 0) {
v->error("a pure function should return something, otherwise it will be optimized out anyway");
if (G.is_verbosity(1) && v->is_code_function()) {
std::cerr << "fun " << func_name << " : " << v->declared_return_type << std::endl;
}
FunctionBody f_body = v->get_body()->type == ast_sequence ? static_cast<FunctionBody>(new FunctionBodyCode) : static_cast<FunctionBody>(new FunctionBodyAsm);
FunctionData* f_sym = new FunctionData(static_cast<std::string>(func_name), v->loc, function_type, std::move(parameters), 0, f_body);
FunctionData* f_sym = new FunctionData(static_cast<std::string>(func_name), v->loc, v->declared_return_type, std::move(parameters), 0, genericTs, nullptr, f_body, v);
if (const auto* v_asm = v->get_body()->try_as<ast_asm_body>()) {
calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, f_sym->arg_order, f_sym->ret_order);
validate_arg_ret_order_of_asm_function(v_asm, v->get_num_params(), v->declared_return_type);
f_sym->arg_order = v_asm->arg_order;
f_sym->ret_order = v_asm->ret_order;
}
if (v->method_id) {
if (v->method_id->intval.is_null() || !v->method_id->intval->signed_fits_bits(32)) {
v->method_id->error("invalid integer constant");
}
f_sym->method_id = static_cast<int>(v->method_id->intval->to_long());
} else if (v->marked_as_get_method) {
if (v->method_id.not_null()) {
f_sym->method_id = static_cast<int>(v->method_id->to_long());
} else if (v->flags & FunctionData::flagGetMethod) {
f_sym->method_id = calculate_method_id_by_func_name(func_name);
for (const FunctionData* other : G.all_get_methods) {
if (other->method_id == f_sym->method_id) {
v->error(PSTRING() << "GET methods hash collision: `" << other->name << "` and `" << f_sym->name << "` produce the same hash. Consider renaming one of these functions.");
}
}
} else if (v->is_entrypoint) {
} else if (v->flags & FunctionData::flagIsEntrypoint) {
f_sym->method_id = calculate_method_id_for_entrypoint(func_name);
}
if (v->marked_as_pure) {
f_sym->flags |= FunctionData::flagMarkedAsPure;
}
if (v->marked_as_inline) {
f_sym->flags |= FunctionData::flagInline;
}
if (v->marked_as_inline_ref) {
f_sym->flags |= FunctionData::flagInlineRef;
}
if (v->marked_as_get_method) {
f_sym->flags |= FunctionData::flagGetMethod;
}
if (v->is_entrypoint) {
f_sym->flags |= FunctionData::flagIsEntrypoint;
}
f_sym->flags |= v->flags;
if (n_mutate_params) {
f_sym->flags |= FunctionData::flagHasMutateParams;
}
if (v->accepts_self) {
f_sym->flags |= FunctionData::flagAcceptsSelf;
}
if (v->returns_self) {
f_sym->flags |= FunctionData::flagReturnsSelf;
}
G.symtable.add_function(f_sym);
if (f_sym->is_regular_function()) {
G.all_code_functions.push_back(f_sym);
}
G.all_functions.push_back(f_sym);
if (f_sym->is_get_method()) {
G.all_get_methods.push_back(f_sym);
}
@ -270,10 +232,10 @@ static void iterate_through_file_symbols(const SrcFile* file) {
for (AnyV v : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
switch (v->type) {
case ast_import_statement:
case ast_import_directive:
// on `import "another-file.tolk"`, register symbols from that file at first
// (for instance, it can calculate constants, which are used in init_val of constants in current file below import)
iterate_through_file_symbols(v->as<ast_import_statement>()->file);
iterate_through_file_symbols(v->as<ast_import_directive>()->file);
break;
case ast_constant_declaration:
@ -291,8 +253,8 @@ static void iterate_through_file_symbols(const SrcFile* file) {
}
}
void pipeline_register_global_symbols(const AllSrcFiles& all_src_files) {
for (const SrcFile* file : all_src_files) {
void pipeline_register_global_symbols() {
for (const SrcFile* file : G.all_src_files) {
iterate_through_file_symbols(file);
}
}

View file

@ -0,0 +1,350 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
#include "platform-utils.h"
#include "compiler-state.h"
#include "src-file.h"
#include "generics-helpers.h"
#include "ast.h"
#include "ast-visitor.h"
#include "type-system.h"
#include <unordered_map>
/*
* This pipe resolves identifiers (local variables and types) in all functions bodies.
* It happens before type inferring, but after all global symbols are registered.
* It means, that for any symbol `x` we can look up whether it's a global name or not.
*
* About resolving variables.
* Example: `var x = 10; x = 20;` both `x` point to one LocalVarData.
* Example: `x = 20` undefined symbol `x` is also here (unless it's a global)
* Variables scoping and redeclaration are also here.
* Note, that `x` is stored as `ast_reference (ast_identifier "x")`. More formally, "references" are resolved.
* "Reference" in AST, besides the identifier, stores optional generics instantiation. `x<int>` is grammar-valid.
*
* About resolving types. At the moment of parsing, `int`, `cell` and other predefined are parsed as TypeDataInt, etc.
* All the others are stored as TypeDataUnresolved, to be resolved here, after global symtable is filled.
* Example: `var x: T = 0` unresolved "T" is replaced by TypeDataGenericT inside `f<T>`.
* Example: `f<MyAlias>()` unresolved "MyAlias" is replaced by TypeDataAlias inside the reference.
* Example: `fun f(): KKK` unresolved "KKK" fires an error "unknown type name".
* When structures and type aliases are implemented, their resolving will also be done here.
* See finalize_type_data().
*
* Note, that functions/methods binding is NOT here.
* In other words, for ast_function_call `beginCell()` and `t.tupleAt(0)`, their fun_ref is NOT filled here.
* Functions/methods binding is done later, simultaneously with type inferring and generics instantiation.
* For instance, to call a generic function `t.tuplePush(1)`, we need types of `t` and `1` to be inferred,
* as well as `tuplePush<int>` to be instantiated, and fun_ref to point at that exact instantiations.
*
* As a result of this step,
* * every V<ast_reference>::sym is filled, pointing either to a local var/parameter, or to a global symbol
* (exceptional for function calls and methods, their references are bound later)
* * all TypeData in all symbols is ready for analyzing, TypeDataUnresolved won't occur later in pipeline
*/
namespace tolk {
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_undefined_symbol(V<ast_identifier> v) {
if (v->name == "self") {
v->error("using `self` in a non-member function (it does not accept the first `self` parameter)");
} else {
v->error("undefined symbol `" + static_cast<std::string>(v->name) + "`");
}
}
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_unknown_type_name(SrcLocation loc, const std::string &text) {
throw ParseError(loc, "unknown type name `" + text + "`");
}
static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) {
SrcLocation sym_loc = used_sym->loc;
if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) {
const SrcFile* declared_in = sym_loc.get_src_file();
bool has_import = false;
for (const SrcFile::ImportDirective& import : v_usage->loc.get_src_file()->imports) {
if (import.imported_file == declared_in) {
has_import = true;
}
}
if (!has_import) {
v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?");
}
}
}
struct NameAndScopeResolver {
std::vector<std::unordered_map<uint64_t, const Symbol*>> scopes;
static uint64_t key_hash(std::string_view name_key) {
return std::hash<std::string_view>{}(name_key);
}
void open_scope([[maybe_unused]] SrcLocation loc) {
// std::cerr << "open_scope " << scopes.size() + 1 << " at " << loc << std::endl;
scopes.emplace_back();
}
void close_scope([[maybe_unused]] SrcLocation loc) {
// std::cerr << "close_scope " << scopes.size() << " at " << loc << std::endl;
if (UNLIKELY(scopes.empty())) {
throw Fatal{"cannot close the outer scope"};
}
scopes.pop_back();
}
const Symbol* lookup_symbol(std::string_view name) const {
uint64_t key = key_hash(name);
for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) { // NOLINT(*-loop-convert)
const auto& scope = *it;
if (auto it_sym = scope.find(key); it_sym != scope.end()) {
return it_sym->second;
}
}
return G.symtable.lookup(name);
}
void add_local_var(const LocalVarData* v_sym) {
if (UNLIKELY(scopes.empty())) {
throw Fatal("unexpected scope_level = 0");
}
if (v_sym->name.empty()) { // underscore
return;
}
uint64_t key = key_hash(v_sym->name);
const auto& [_, inserted] = scopes.rbegin()->emplace(key, v_sym);
if (UNLIKELY(!inserted)) {
throw ParseError(v_sym->loc, "redeclaration of local variable `" + v_sym->name + "`");
}
}
};
struct TypeDataResolver {
GNU_ATTRIBUTE_NOINLINE
static TypePtr resolve_identifiers_in_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) {
return type_data->replace_children_custom([genericTs](TypePtr child) {
if (const TypeDataUnresolved* un = child->try_as<TypeDataUnresolved>()) {
if (genericTs && genericTs->has_nameT(un->text)) {
std::string nameT = un->text;
return TypeDataGenericT::create(std::move(nameT));
}
if (un->text == "auto") {
throw ParseError(un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed");
}
if (un->text == "self") {
throw ParseError(un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)");
}
if (un->text == "bool") {
throw ParseError(un->loc, "bool type is not supported yet");
}
fire_error_unknown_type_name(un->loc, un->text);
}
return child;
});
}
};
static TypePtr finalize_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) {
if (!type_data || !type_data->has_unresolved_inside()) {
return type_data;
}
return TypeDataResolver::resolve_identifiers_in_type_data(type_data, genericTs);
}
class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody {
// more correctly this field shouldn't be static, but currently there is no need to make it a part of state
static NameAndScopeResolver current_scope;
static const FunctionData* current_function;
static const LocalVarData* create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) {
LocalVarData* v_sym = new LocalVarData(static_cast<std::string>(name), loc, declared_type, immutable * LocalVarData::flagImmutable, -1);
current_scope.add_local_var(v_sym);
return v_sym;
}
static void process_catch_variable(AnyExprV catch_var) {
if (auto v_ref = catch_var->try_as<ast_reference>()) {
const LocalVarData* var_ref = create_local_var_sym(v_ref->get_name(), catch_var->loc, nullptr, true);
v_ref->mutate()->assign_sym(var_ref);
}
}
protected:
void visit(V<ast_local_var_lhs> v) override {
if (v->marked_as_redef) {
const Symbol* sym = current_scope.lookup_symbol(v->get_name());
if (sym == nullptr) {
v->error("`redef` for unknown variable");
}
const LocalVarData* var_ref = sym->try_as<LocalVarData>();
if (!var_ref) {
v->error("`redef` for unknown variable");
}
v->mutate()->assign_var_ref(var_ref);
} else {
TypePtr declared_type = finalize_type_data(v->declared_type, current_function->genericTs);
const LocalVarData* var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable);
v->mutate()->assign_resolved_type(declared_type);
v->mutate()->assign_var_ref(var_ref);
}
}
void visit(V<ast_assign> v) override {
parent::visit(v->get_rhs()); // in this order, so that `var x = x` is invalid, "x" on the right unknown
parent::visit(v->get_lhs());
}
void visit(V<ast_reference> v) override {
const Symbol* sym = current_scope.lookup_symbol(v->get_name());
if (!sym) {
fire_error_undefined_symbol(v->get_identifier());
}
v->mutate()->assign_sym(sym);
// for global functions, global vars and constants, `import` must exist
if (!sym->try_as<LocalVarData>()) {
check_import_exists_when_using_sym(v, sym);
}
// for `f<int, MyAlias>` / `f<T>`, resolve "MyAlias" and "T"
// (for function call `f<T>()`, this v (ast_reference `f<T>`) is callee)
if (auto v_instantiationTs = v->get_instantiationTs()) {
for (int i = 0; i < v_instantiationTs->size(); ++i) {
TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs);
v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type);
}
}
}
void visit(V<ast_dot_access> v) override {
// for `t.tupleAt<MyAlias>` / `obj.method<T>`, resolve "MyAlias" and "T"
// (for function call `t.tupleAt<MyAlias>()`, this v (ast_dot_access `t.tupleAt<MyAlias>`) is callee)
if (auto v_instantiationTs = v->get_instantiationTs()) {
for (int i = 0; i < v_instantiationTs->size(); ++i) {
TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs);
v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type);
}
}
parent::visit(v->get_obj());
}
void visit(V<ast_cast_as_operator> v) override {
TypePtr cast_to_type = finalize_type_data(v->cast_to_type, current_function->genericTs);
v->mutate()->assign_resolved_type(cast_to_type);
parent::visit(v->get_expr());
}
void visit(V<ast_sequence> v) override {
if (v->empty()) {
return;
}
current_scope.open_scope(v->loc);
parent::visit(v);
current_scope.close_scope(v->loc_end);
}
void visit(V<ast_do_while_statement> v) override {
current_scope.open_scope(v->loc);
parent::visit(v->get_body());
parent::visit(v->get_cond()); // in 'while' condition it's ok to use variables declared inside do
current_scope.close_scope(v->get_body()->loc_end);
}
void visit(V<ast_try_catch_statement> v) override {
visit(v->get_try_body());
current_scope.open_scope(v->get_catch_body()->loc);
const std::vector<AnyExprV>& catch_items = v->get_catch_expr()->get_items();
tolk_assert(catch_items.size() == 2);
process_catch_variable(catch_items[1]);
process_catch_variable(catch_items[0]);
parent::visit(v->get_catch_body());
current_scope.close_scope(v->get_catch_body()->loc_end);
}
public:
bool should_visit_function(const FunctionData* fun_ref) override {
// this pipe is done just after parsing
// visit both asm and code functions, resolve identifiers in parameter/return types everywhere
// for generic functions, unresolved "T" will be replaced by TypeDataGenericT
return true;
}
void start_visiting_function(const FunctionData* fun_ref, V<ast_function_declaration> v) override {
current_function = fun_ref;
for (int i = 0; i < v->get_num_params(); ++i) {
const LocalVarData& param_var = fun_ref->parameters[i];
TypePtr declared_type = finalize_type_data(param_var.declared_type, fun_ref->genericTs);
v->get_param(i)->mutate()->assign_param_ref(&param_var);
v->get_param(i)->mutate()->assign_resolved_type(declared_type);
param_var.mutate()->assign_resolved_type(declared_type);
}
TypePtr return_type = finalize_type_data(fun_ref->declared_return_type, fun_ref->genericTs);
v->mutate()->assign_resolved_type(return_type);
fun_ref->mutate()->assign_resolved_type(return_type);
if (fun_ref->is_code_function()) {
auto v_seq = v->get_body()->as<ast_sequence>();
current_scope.open_scope(v->loc);
for (int i = 0; i < v->get_num_params(); ++i) {
current_scope.add_local_var(&fun_ref->parameters[i]);
}
parent::visit(v_seq);
current_scope.close_scope(v_seq->loc_end);
tolk_assert(current_scope.scopes.empty());
}
current_function = nullptr;
}
};
NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope;
const FunctionData* AssignSymInsideFunctionVisitor::current_function = nullptr;
void pipeline_resolve_identifiers_and_assign_symbols() {
AssignSymInsideFunctionVisitor visitor;
for (const SrcFile* file : G.all_src_files) {
for (AnyV v : file->ast->as<ast_tolk_file>()->get_toplevel_declarations()) {
if (auto v_func = v->try_as<ast_function_declaration>()) {
tolk_assert(v_func->fun_ref);
visitor.start_visiting_function(v_func->fun_ref, v_func);
} else if (auto v_global = v->try_as<ast_global_var_declaration>()) {
TypePtr declared_type = finalize_type_data(v_global->var_ref->declared_type, nullptr);
v_global->mutate()->assign_resolved_type(declared_type);
v_global->var_ref->mutate()->assign_resolved_type(declared_type);
} else if (auto v_const = v->try_as<ast_constant_declaration>(); v_const && v_const->declared_type) {
TypePtr declared_type = finalize_type_data(v_const->const_ref->declared_type, nullptr);
v_const->mutate()->assign_resolved_type(declared_type);
v_const->const_ref->mutate()->assign_resolved_type(declared_type);
}
}
}
}
void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData* fun_ref) {
AssignSymInsideFunctionVisitor visitor;
if (visitor.should_visit_function(fun_ref)) {
visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as<ast_function_declaration>());
}
}
} // namespace tolk

View file

@ -1,272 +0,0 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
#include "platform-utils.h"
#include "src-file.h"
#include "ast.h"
#include "ast-visitor.h"
#include "compiler-state.h"
#include <unordered_map>
/*
* This pipe resolves identifiers (local variables) in all functions bodies.
* It happens before type inferring, but after all global symbols are registered.
* It means, that for any symbol `x` we can look up whether it's a global name or not.
*
* Example: `var x = 10; x = 20;` both `x` point to one LocalVarData.
* Example: `x = 20` undefined symbol `x` is also here (unless it's a global)
* Variables scoping and redeclaration are also here.
*
* As a result of this step, every V<ast_identifier>::sym is filled, pointing either to a local var/parameter,
* or to a global var / constant / function.
*/
namespace tolk {
static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) {
SrcLocation sym_loc = used_sym->loc;
if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) {
const SrcFile* declared_in = sym_loc.get_src_file();
bool has_import = false;
for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) {
if (import_stmt.imported_file == declared_in) {
has_import = true;
}
}
if (!has_import) {
v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?");
}
}
}
struct NameAndScopeResolver {
std::vector<std::unordered_map<uint64_t, const Symbol*>> scopes;
static uint64_t key_hash(std::string_view name_key) {
return std::hash<std::string_view>{}(name_key);
}
void open_scope([[maybe_unused]] SrcLocation loc) {
// std::cerr << "open_scope " << scopes.size() + 1 << " at " << loc << std::endl;
scopes.emplace_back();
}
void close_scope([[maybe_unused]] SrcLocation loc) {
// std::cerr << "close_scope " << scopes.size() << " at " << loc << std::endl;
if (UNLIKELY(scopes.empty())) {
throw Fatal{"cannot close the outer scope"};
}
scopes.pop_back();
}
const Symbol* lookup_symbol(std::string_view name) const {
uint64_t key = key_hash(name);
for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) { // NOLINT(*-loop-convert)
const auto& scope = *it;
if (auto it_sym = scope.find(key); it_sym != scope.end()) {
return it_sym->second;
}
}
return G.symtable.lookup(name);
}
const Symbol* add_local_var(const LocalVarData* v_sym) {
if (UNLIKELY(scopes.empty())) {
throw Fatal("unexpected scope_level = 0");
}
if (v_sym->name.empty()) { // underscore
return v_sym;
}
uint64_t key = key_hash(v_sym->name);
const auto& [_, inserted] = scopes.rbegin()->emplace(key, v_sym);
if (UNLIKELY(!inserted)) {
throw ParseError(v_sym->loc, "redeclaration of local variable `" + v_sym->name + "`");
}
return v_sym;
}
};
class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody {
// more correctly this field shouldn't be static, but currently there is no need to make it a part of state
static NameAndScopeResolver current_scope;
static const Symbol* create_local_var_sym(std::string_view name, SrcLocation loc, TypeExpr* var_type, bool immutable) {
LocalVarData* v_sym = new LocalVarData(static_cast<std::string>(name), loc, -1, var_type);
if (immutable) {
v_sym->flags |= LocalVarData::flagImmutable;
}
return current_scope.add_local_var(v_sym);
}
static void process_catch_variable(AnyV catch_var) {
if (auto v_ident = catch_var->try_as<ast_identifier>()) {
const Symbol* sym = create_local_var_sym(v_ident->name, catch_var->loc, TypeExpr::new_hole(), true);
v_ident->mutate()->assign_sym(sym);
}
}
static void process_function_arguments(const FunctionData* fun_ref, V<ast_argument_list> v, AnyExprV lhs_of_dot_call) {
int delta_self = lhs_of_dot_call ? 1 : 0;
int n_arguments = static_cast<int>(v->get_arguments().size()) + delta_self;
int n_parameters = static_cast<int>(fun_ref->parameters.size());
// Tolk doesn't have optional parameters currently, so just compare counts
if (n_parameters < n_arguments) {
v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self));
}
if (n_arguments < n_parameters) {
v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self));
}
}
void visit(V<ast_local_var> v) override {
if (v->marked_as_redef) {
auto v_ident = v->get_identifier()->as<ast_identifier>();
const Symbol* sym = current_scope.lookup_symbol(v_ident->name);
if (sym == nullptr) {
v->error("`redef` for unknown variable");
}
if (!sym->try_as<LocalVarData>() && !sym->try_as<GlobalVarData>()) {
v->error("`redef` for unknown variable");
}
v->mutate()->assign_var_ref(sym);
v_ident->mutate()->assign_sym(sym);
} else if (auto v_ident = v->get_identifier()->try_as<ast_identifier>()) {
TypeExpr* var_type = v->declared_type ? v->declared_type : TypeExpr::new_hole();
const Symbol* sym = create_local_var_sym(v_ident->name, v->loc, var_type, v->is_immutable);
v->mutate()->assign_var_ref(sym);
v_ident->mutate()->assign_sym(sym);
} else {
// underscore, do nothing, v->sym remains nullptr
}
}
void visit(V<ast_local_vars_declaration> v) override {
parent::visit(v->get_assigned_val());
parent::visit(v->get_lhs());
}
void visit(V<ast_identifier> v) override {
const Symbol* sym = current_scope.lookup_symbol(v->name);
if (!sym) {
v->error("undefined symbol `" + static_cast<std::string>(v->name) + "`");
}
v->mutate()->assign_sym(sym);
// for global functions, global vars and constants, `import` must exist
if (!sym->try_as<LocalVarData>()) {
check_import_exists_when_using_sym(v, sym);
}
}
void visit(V<ast_function_call> v) override {
parent::visit(v->get_called_f());
parent::visit(v->get_arg_list());
// most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)`
// for such corner cases, sym remains nullptr
if (auto v_ident = v->get_called_f()->try_as<ast_identifier>()) {
if (const auto* fun_ref = v_ident->sym->try_as<FunctionData>()) {
v->mutate()->assign_fun_ref(fun_ref);
process_function_arguments(fun_ref, v->get_arg_list(), nullptr);
}
}
// for `some_var(args)`, if it's called with wrong arguments count, the error is not here
// it will be fired later, it's a type checking error
}
void visit(V<ast_dot_method_call> v) override {
const Symbol* sym = lookup_global_symbol(v->method_name);
if (!sym) {
v->error("undefined symbol `" + static_cast<std::string>(v->method_name) + "`");
}
const auto* fun_ref = sym->try_as<FunctionData>();
if (!fun_ref) {
v->error("`" + static_cast<std::string>(v->method_name) + "` is not a method");
}
if (fun_ref->parameters.empty()) {
v->error("`" + static_cast<std::string>(v->method_name) + "` has no parameters and can not be called as method");
}
v->mutate()->assign_fun_ref(fun_ref);
parent::visit(v);
process_function_arguments(fun_ref, v->get_arg_list(), v->get_obj());
}
void visit(V<ast_self_keyword> v) override {
const Symbol* sym = current_scope.lookup_symbol("self");
if (!sym) {
v->error("using `self` in a non-member function (it does not accept the first `self` parameter)");
}
v->mutate()->assign_param_ref(sym->as<LocalVarData>());
}
void visit(V<ast_sequence> v) override {
if (v->empty()) {
return;
}
current_scope.open_scope(v->loc);
parent::visit(v);
current_scope.close_scope(v->loc_end);
}
void visit(V<ast_do_while_statement> v) override {
current_scope.open_scope(v->loc);
parent::visit(v->get_body());
parent::visit(v->get_cond()); // in 'while' condition it's ok to use variables declared inside do
current_scope.close_scope(v->get_body()->loc_end);
}
void visit(V<ast_try_catch_statement> v) override {
visit(v->get_try_body());
current_scope.open_scope(v->get_catch_body()->loc);
const std::vector<AnyExprV>& catch_items = v->get_catch_expr()->get_items();
tolk_assert(catch_items.size() == 2);
process_catch_variable(catch_items[1]);
process_catch_variable(catch_items[0]);
parent::visit(v->get_catch_body());
current_scope.close_scope(v->get_catch_body()->loc_end);
}
public:
void start_visiting_function(V<ast_function_declaration> v_function) override {
auto v_seq = v_function->get_body()->try_as<ast_sequence>();
tolk_assert(v_seq != nullptr);
current_scope.open_scope(v_function->loc);
for (int i = 0; i < v_function->get_num_params(); ++i) {
current_scope.add_local_var(&v_function->fun_ref->parameters[i]);
v_function->get_param(i)->mutate()->assign_param_ref(&v_function->fun_ref->parameters[i]);
}
parent::visit(v_seq);
current_scope.close_scope(v_seq->loc_end);
tolk_assert(current_scope.scopes.empty());
}
};
NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope;
void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles& all_src_files) {
visit_ast_of_all_functions<AssignSymInsideFunctionVisitor>(all_src_files);
}
} // namespace tolk

View file

@ -25,25 +25,33 @@
*/
#pragma once
#include "src-file.h"
#include "fwd-declarations.h"
#include <string>
namespace tolk {
AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename);
void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename);
void pipeline_register_global_symbols(const AllSrcFiles&);
void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles&);
void pipeline_calculate_rvalue_lvalue(const AllSrcFiles&);
void pipeline_detect_unreachable_statements(const AllSrcFiles&);
void pipeline_infer_and_check_types(const AllSrcFiles&);
void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles&);
void pipeline_check_rvalue_lvalue(const AllSrcFiles&);
void pipeline_check_pure_impure_operations(const AllSrcFiles&);
void pipeline_constant_folding(const AllSrcFiles&);
void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&);
void pipeline_register_global_symbols();
void pipeline_resolve_identifiers_and_assign_symbols();
void pipeline_calculate_rvalue_lvalue();
void pipeline_detect_unreachable_statements();
void pipeline_infer_types_and_calls_and_fields();
void pipeline_refine_lvalue_for_mutate_arguments();
void pipeline_check_rvalue_lvalue();
void pipeline_check_pure_impure_operations();
void pipeline_constant_folding();
void pipeline_convert_ast_to_legacy_Expr_Op();
void pipeline_find_unused_symbols();
void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles&);
void pipeline_generate_fif_output_to_std_cout();
// these pipes also can be called per-function individually
// they are called for instantiated generics functions, when `f<T>` is deeply cloned as `f<int>`
void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData*);
void pipeline_calculate_rvalue_lvalue(const FunctionData*);
void pipeline_detect_unreachable_statements(const FunctionData*);
void pipeline_infer_types_and_calls_and_fields(const FunctionData*);
} // namespace tolk

View file

@ -27,11 +27,15 @@
#if __GNUC__
#define GNU_ATTRIBUTE_COLD [[gnu::cold]]
#define GNU_ATTRIBUTE_FLATTEN [[gnu::flatten]]
#define GNU_ATTRIBUTE_NORETURN [[gnu::noreturn]]
#define GNU_ATTRIBUTE_NOINLINE [[gnu::noinline]]
#define GNU_ATTRIBUTE_ALWAYS_INLINE [[gnu::always_inline]]
#else
#define GNU_ATTRIBUTE_COLD
#define GNU_ATTRIBUTE_FLATTEN
#define GNU_ATTRIBUTE_NORETURN [[noreturn]]
#define GNU_ATTRIBUTE_NOINLINE [[noinline]]
#define GNU_ATTRIBUTE_ALWAYS_INLINE
#endif

View file

@ -23,8 +23,8 @@ namespace tolk {
static_assert(sizeof(SrcLocation) == 8);
SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const {
for (SrcFile* file : all_src_files) {
const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const {
for (const SrcFile* file : all_src_files) {
if (file->file_id == file_id) {
return file;
}
@ -32,8 +32,8 @@ SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const {
return nullptr;
}
SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const {
for (SrcFile* file : all_src_files) {
const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const {
for (const SrcFile* file : all_src_files) {
if (file->abs_filename == abs_filename) {
return file;
}
@ -41,7 +41,7 @@ SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const
return nullptr;
}
SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) {
const SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) {
td::Result<std::string> path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str());
if (path.is_error()) {
if (included_from.is_defined()) {
@ -51,7 +51,7 @@ SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::strin
}
std::string abs_filename = path.move_as_ok();
if (SrcFile* file = find_file(abs_filename)) {
if (const SrcFile* file = find_file(abs_filename)) {
return file;
}
@ -75,16 +75,7 @@ SrcFile* AllRegisteredSrcFiles::get_next_unparsed_file() {
if (last_parsed_file_id >= last_registered_file_id) {
return nullptr;
}
return all_src_files[++last_parsed_file_id];
}
AllSrcFiles AllRegisteredSrcFiles::get_all_files() const {
AllSrcFiles src_files_immutable;
src_files_immutable.reserve(all_src_files.size());
for (const SrcFile* file : all_src_files) {
src_files_immutable.push_back(file);
}
return src_files_immutable;
return const_cast<SrcFile*>(all_src_files[++last_parsed_file_id]);
}
bool SrcFile::is_stdlib_file() const {

View file

@ -30,7 +30,7 @@ struct SrcFile {
std::string_view line_str;
};
struct ImportStatement {
struct ImportDirective {
const SrcFile* imported_file;
};
@ -39,7 +39,7 @@ struct SrcFile {
std::string abs_filename; // absolute from root
std::string text; // file contents loaded into memory, every Token::str_val points inside it
AnyV ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here
std::vector<ImportStatement> imports; // to check strictness (can't use a symbol without importing its file)
std::vector<ImportDirective> imports; // to check strictness (can't use a symbol without importing its file)
SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text)
: file_id(file_id)
@ -95,21 +95,20 @@ public:
std::ostream& operator<<(std::ostream& os, SrcLocation loc);
using AllSrcFiles = std::vector<const SrcFile*>;
class AllRegisteredSrcFiles {
std::vector<SrcFile*> all_src_files;
std::vector<const SrcFile*> all_src_files;
int last_registered_file_id = -1;
int last_parsed_file_id = -1;
public:
SrcFile *find_file(int file_id) const;
SrcFile* find_file(const std::string& abs_filename) const;
const SrcFile* find_file(int file_id) const;
const SrcFile* find_file(const std::string& abs_filename) const;
SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from);
const SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from);
SrcFile* get_next_unparsed_file();
AllSrcFiles get_all_files() const;
auto begin() const { return all_src_files.begin(); }
auto end() const { return all_src_files.end(); }
};
struct Fatal final : std::exception {

View file

@ -17,28 +17,48 @@
#include "symtable.h"
#include "compiler-state.h"
#include "platform-utils.h"
#include <sstream>
#include <cassert>
#include "generics-helpers.h"
namespace tolk {
std::string FunctionData::as_human_readable() const {
if (!genericTs) {
return name; // if it's generic instantiation like `f<int>`, its name is "f<int>", not "f"
}
return name + genericTs->as_human_readable();
}
bool FunctionData::does_need_codegen() const {
// when a function is declared, but not referenced from code in any way, don't generate its body
if (!is_really_used() && G.settings.remove_unused_functions) {
return false;
}
// functions with asm body don't need code generation
// (even if used as non-call: `var a = beginCell;` inserts TVM continuation inline)
if (is_asm_function() || is_builtin_function()) {
return false;
}
// when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist
if (is_used_as_noncall()) {
return true;
}
// generic functions also don't need code generation, only generic instantiations do
if (is_generic_function()) {
return false;
}
// currently, there is no inlining, all functions are codegenerated
// (but actually, unused ones are later removed by Fift)
// in the future, we may want to implement a true AST inlining for "simple" functions
return true;
}
void FunctionData::assign_is_really_used() {
this->flags |= flagReallyUsed;
void FunctionData::assign_resolved_type(TypePtr declared_return_type) {
this->declared_return_type = declared_return_type;
}
void FunctionData::assign_inferred_type(TypePtr inferred_return_type, TypePtr inferred_full_type) {
this->inferred_return_type = inferred_return_type;
this->inferred_full_type = inferred_full_type;
}
void FunctionData::assign_is_used_as_noncall() {
@ -49,14 +69,45 @@ void FunctionData::assign_is_implicit_return() {
this->flags |= flagImplicitReturn;
}
void FunctionData::assign_is_type_inferring_done() {
this->flags |= flagTypeInferringDone;
}
void FunctionData::assign_is_really_used() {
this->flags |= flagReallyUsed;
}
void FunctionData::assign_arg_order(std::vector<int>&& arg_order) {
this->arg_order = std::move(arg_order);
}
void GlobalVarData::assign_resolved_type(TypePtr declared_type) {
this->declared_type = declared_type;
}
void GlobalVarData::assign_is_really_used() {
this->flags |= flagReallyUsed;
}
void GlobalConstData::assign_resolved_type(TypePtr declared_type) {
this->declared_type = declared_type;
}
void LocalVarData::assign_idx(int idx) {
this->idx = idx;
}
void LocalVarData::assign_resolved_type(TypePtr declared_type) {
this->declared_type = declared_type;
}
void LocalVarData::assign_inferred_type(TypePtr inferred_type) {
#ifdef TOLK_DEBUG
assert(this->declared_type == nullptr); // called when type declaration omitted, inferred from assigned value
#endif
this->declared_type = inferred_type;
}
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_redefinition_of_symbol(SrcLocation loc, const Symbol* previous) {
SrcLocation prev_loc = previous->loc;

View file

@ -17,7 +17,7 @@
#pragma once
#include "src-file.h"
#include "type-expr.h"
#include "fwd-declarations.h"
#include "constant-evaluator.h"
#include "crypto/common/refint.h"
#include <unordered_map>
@ -57,27 +57,31 @@ struct LocalVarData final : Symbol {
flagImmutable = 2, // variable was declared via `val` (not `var`)
};
TypeExpr* declared_type;
int flags = 0;
TypePtr declared_type; // either at declaration `var x:int`, or if omitted, from assigned value `var x=2`
int flags;
int idx;
LocalVarData(std::string name, SrcLocation loc, int idx, TypeExpr* declared_type)
LocalVarData(std::string name, SrcLocation loc, TypePtr declared_type, int flags, int idx)
: Symbol(std::move(name), loc)
, declared_type(declared_type)
, flags(flags)
, idx(idx) {
}
bool is_underscore() const { return name.empty(); }
bool is_immutable() const { return flags & flagImmutable; }
bool is_mutate_parameter() const { return flags & flagMutateParameter; }
LocalVarData* mutate() const { return const_cast<LocalVarData*>(this); }
void assign_idx(int idx);
void assign_resolved_type(TypePtr declared_type);
void assign_inferred_type(TypePtr inferred_type);
};
struct FunctionBodyCode;
struct FunctionBodyAsm;
struct FunctionBodyBuiltin;
struct GenericsDeclaration;
struct GenericsInstantiation;
typedef std::variant<
FunctionBodyCode*,
@ -91,7 +95,7 @@ struct FunctionData final : Symbol {
enum {
flagInline = 1, // marked `@inline`
flagInlineRef = 2, // marked `@inline_ref`
flagReallyUsed = 4, // calculated via dfs from used functions; declared but unused functions are not codegenerated
flagTypeInferringDone = 4, // type inferring step of function's body (all AST nodes assigning v->inferred_type) is done
flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.)
flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out
flagImplicitReturn = 32, // control flow reaches end of function, so it needs implicit return at the end
@ -100,25 +104,36 @@ struct FunctionData final : Symbol {
flagHasMutateParams = 256, // has parameters declared as `mutate`
flagAcceptsSelf = 512, // is a member function (has `self` first parameter)
flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable
flagReallyUsed = 2048, // calculated via dfs from used functions; declared but unused functions are not codegenerated
};
int method_id = EMPTY_METHOD_ID;
int flags;
TypeExpr* full_type; // currently, TypeExpr::_Map, probably wrapped with forall
std::vector<LocalVarData> parameters;
std::vector<int> arg_order, ret_order;
TypePtr declared_return_type; // may be nullptr, meaning "auto infer"
TypePtr inferred_return_type = nullptr; // assigned on type inferring
TypePtr inferred_full_type = nullptr; // assigned on type inferring, it's TypeDataFunCallable(params -> return)
const GenericsDeclaration* genericTs;
const GenericsInstantiation* instantiationTs;
FunctionBody body;
AnyV ast_root; // V<ast_function_declaration> for user-defined (not builtin)
FunctionData(std::string name, SrcLocation loc, TypeExpr* full_type, std::vector<LocalVarData> parameters, int initial_flags, FunctionBody body)
FunctionData(std::string name, SrcLocation loc, TypePtr declared_return_type, std::vector<LocalVarData> parameters, int initial_flags, const GenericsDeclaration* genericTs, const GenericsInstantiation* instantiationTs, FunctionBody body, AnyV ast_root)
: Symbol(std::move(name), loc)
, flags(initial_flags)
, full_type(full_type)
, parameters(std::move(parameters))
, body(body) {
, declared_return_type(declared_return_type)
, genericTs(genericTs)
, instantiationTs(instantiationTs)
, body(body)
, ast_root(ast_root) {
}
std::string as_human_readable() const;
const std::vector<int>* get_arg_order() const {
return arg_order.empty() ? nullptr : &arg_order;
}
@ -126,13 +141,19 @@ struct FunctionData final : Symbol {
return ret_order.empty() ? nullptr : &ret_order;
}
bool is_regular_function() const { return std::holds_alternative<FunctionBodyCode*>(body); }
int get_num_params() const { return static_cast<int>(parameters.size()); }
const LocalVarData& get_param(int idx) const { return parameters[idx]; }
bool is_code_function() const { return std::holds_alternative<FunctionBodyCode*>(body); }
bool is_asm_function() const { return std::holds_alternative<FunctionBodyAsm*>(body); }
bool is_builtin_function() const { return std::holds_alternative<FunctionBodyBuiltin*>(body); }
bool is_builtin_function() const { return ast_root == nullptr; }
bool is_generic_function() const { return genericTs != nullptr; }
bool is_instantiation_of_generic_function() const { return instantiationTs != nullptr; }
bool is_inline() const { return flags & flagInline; }
bool is_inline_ref() const { return flags & flagInlineRef; }
bool is_really_used() const { return flags & flagReallyUsed; }
bool is_type_inferring_done() const { return flags & flagTypeInferringDone; }
bool is_used_as_noncall() const { return flags & flagUsedAsNonCall; }
bool is_marked_as_pure() const { return flags & flagMarkedAsPure; }
bool is_implicit_return() const { return flags & flagImplicitReturn; }
@ -143,13 +164,18 @@ struct FunctionData final : Symbol {
bool does_accept_self() const { return flags & flagAcceptsSelf; }
bool does_return_self() const { return flags & flagReturnsSelf; }
bool does_mutate_self() const { return (flags & flagAcceptsSelf) && parameters[0].is_mutate_parameter(); }
bool is_really_used() const { return flags & flagReallyUsed; }
bool does_need_codegen() const;
FunctionData* mutate() const { return const_cast<FunctionData*>(this); }
void assign_is_really_used();
void assign_resolved_type(TypePtr declared_return_type);
void assign_inferred_type(TypePtr inferred_return_type, TypePtr inferred_full_type);
void assign_is_used_as_noncall();
void assign_is_implicit_return();
void assign_is_type_inferring_done();
void assign_is_really_used();
void assign_arg_order(std::vector<int>&& arg_order);
};
struct GlobalVarData final : Symbol {
@ -157,10 +183,10 @@ struct GlobalVarData final : Symbol {
flagReallyUsed = 1, // calculated via dfs from used functions; unused globals are not codegenerated
};
TypeExpr* declared_type;
TypePtr declared_type; // always exists, declaring globals without type is prohibited
int flags = 0;
GlobalVarData(std::string name, SrcLocation loc, TypeExpr* declared_type)
GlobalVarData(std::string name, SrcLocation loc, TypePtr declared_type)
: Symbol(std::move(name), loc)
, declared_type(declared_type) {
}
@ -168,17 +194,18 @@ struct GlobalVarData final : Symbol {
bool is_really_used() const { return flags & flagReallyUsed; }
GlobalVarData* mutate() const { return const_cast<GlobalVarData*>(this); }
void assign_resolved_type(TypePtr declared_type);
void assign_is_really_used();
};
struct GlobalConstData final : Symbol {
ConstantValue value;
TypeExpr* inferred_type;
TypePtr declared_type; // may be nullptr
GlobalConstData(std::string name, SrcLocation loc, ConstantValue&& value)
GlobalConstData(std::string name, SrcLocation loc, TypePtr declared_type, ConstantValue&& value)
: Symbol(std::move(name), loc)
, value(std::move(value))
, inferred_type(TypeExpr::new_atomic(this->value.is_int() ? TypeExpr::_Int : TypeExpr::_Slice)) {
, declared_type(declared_type) {
}
bool is_int_const() const { return value.is_int(); }
@ -186,6 +213,9 @@ struct GlobalConstData final : Symbol {
td::RefInt256 as_int_const() const { return value.as_int(); }
const std::string& as_slice_const() const { return value.as_slice(); }
GlobalConstData* mutate() const { return const_cast<GlobalConstData*>(this); }
void assign_resolved_type(TypePtr declared_type);
};
class GlobalSymbolTable {

View file

@ -28,6 +28,7 @@
#include "compiler-state.h"
#include "lexer.h"
#include "ast.h"
#include "type-system.h"
namespace tolk {
@ -45,27 +46,28 @@ void on_assertion_failed(const char *description, const char *file_name, int lin
}
int tolk_proceed(const std::string &entrypoint_filename) {
type_system_init();
define_builtins();
lexer_init();
// on any error, an exception is thrown, and the message is printed out below
// (currently, only a single error can be printed)
try {
AllSrcFiles all_files = pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename);
pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename);
pipeline_register_global_symbols(all_files);
pipeline_resolve_identifiers_and_assign_symbols(all_files);
pipeline_calculate_rvalue_lvalue(all_files);
pipeline_detect_unreachable_statements(all_files);
pipeline_infer_and_check_types(all_files);
pipeline_refine_lvalue_for_mutate_arguments(all_files);
pipeline_check_rvalue_lvalue(all_files);
pipeline_check_pure_impure_operations(all_files);
pipeline_constant_folding(all_files);
pipeline_convert_ast_to_legacy_Expr_Op(all_files);
pipeline_register_global_symbols();
pipeline_resolve_identifiers_and_assign_symbols();
pipeline_calculate_rvalue_lvalue();
pipeline_detect_unreachable_statements();
pipeline_infer_types_and_calls_and_fields();
pipeline_refine_lvalue_for_mutate_arguments();
pipeline_check_rvalue_lvalue();
pipeline_check_pure_impure_operations();
pipeline_constant_folding();
pipeline_convert_ast_to_legacy_Expr_Op();
pipeline_find_unused_symbols();
pipeline_generate_fif_output_to_std_cout(all_files);
pipeline_generate_fif_output_to_std_cout();
return 0;
} catch (Fatal& fatal) {
@ -74,11 +76,6 @@ int tolk_proceed(const std::string &entrypoint_filename) {
} catch (ParseError& error) {
std::cerr << error << std::endl;
return 2;
} catch (UnifyError& unif_err) {
std::cerr << "fatal: ";
unif_err.print_message(std::cerr);
std::cerr << std::endl;
return 2;
} catch (UnexpectedASTNodeType& error) {
std::cerr << "fatal: " << error.what() << std::endl;
std::cerr << "It's a compiler bug, please report to developers" << std::endl;

View file

@ -18,7 +18,6 @@
#include "platform-utils.h"
#include "src-file.h"
#include "type-expr.h"
#include "symtable.h"
#include "crypto/common/refint.h"
#include "td/utils/Status.h"
@ -35,30 +34,6 @@ namespace tolk {
GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN
void on_assertion_failed(const char *description, const char *file_name, int line_number);
/*
*
* TYPE EXPRESSIONS
*
*/
struct UnifyError : std::exception {
TypeExpr* te1;
TypeExpr* te2;
std::string msg;
UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(std::move(_msg)) {
}
void print_message(std::ostream& os) const;
const char* what() const noexcept override {
return msg.c_str();
}
};
std::ostream& operator<<(std::ostream& os, const UnifyError& ue);
void unify(TypeExpr*& te1, TypeExpr*& te2);
/*
*
* ABSTRACT CODE
@ -69,15 +44,15 @@ typedef int var_idx_t;
typedef int const_idx_t;
struct TmpVar {
TypeExpr* v_type;
TypePtr v_type;
var_idx_t idx;
const LocalVarData* v_sym; // points to var defined in code; nullptr for implicitly created tmp vars
int coord;
SrcLocation where;
std::vector<std::function<void(SrcLocation)>> on_modification;
TmpVar(var_idx_t _idx, TypeExpr* _type, const LocalVarData* v_sym, SrcLocation loc)
: v_type(_type)
TmpVar(var_idx_t _idx, TypePtr type, const LocalVarData* v_sym, SrcLocation loc)
: v_type(type)
, idx(_idx)
, v_sym(v_sym)
, coord(0)
@ -410,13 +385,13 @@ inline ListIterator<const Op> end(const Op* op_list) {
return ListIterator<const Op>{};
}
typedef std::tuple<TypeExpr*, const LocalVarData*, SrcLocation> FormalArg;
typedef std::tuple<TypePtr, const LocalVarData*, SrcLocation> FormalArg;
typedef std::vector<FormalArg> FormalArgList;
struct AsmOpList;
struct FunctionBodyCode {
CodeBlob* code;
CodeBlob* code = nullptr;
void set_code(CodeBlob* code);
};
@ -597,6 +572,7 @@ inline std::ostream& operator<<(std::ostream& os, const AsmOp& op) {
}
std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg);
std::ostream& operator<<(std::ostream& os, TypePtr type_data);
struct AsmOpList {
std::vector<AsmOp> list_;
@ -1116,7 +1092,6 @@ struct FunctionBodyAsm {
struct CodeBlob {
int var_cnt, in_var_cnt;
TypeExpr* ret_type;
const FunctionData* fun_ref;
std::string name;
SrcLocation loc;
@ -1128,8 +1103,8 @@ struct CodeBlob {
#endif
std::stack<std::unique_ptr<Op>*> cur_ops_stack;
bool require_callxargs = false;
CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref, TypeExpr* ret_type)
: var_cnt(0), in_var_cnt(0), ret_type(ret_type), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) {
CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref)
: var_cnt(0), in_var_cnt(0), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) {
}
template <typename... Args>
Op& emplace_back(Args&&... args) {
@ -1141,8 +1116,8 @@ struct CodeBlob {
return res;
}
bool import_params(FormalArgList&& arg_list);
var_idx_t create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation loc);
var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) {
var_idx_t create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc);
var_idx_t create_tmp_var(TypePtr var_type, SrcLocation loc) {
return create_var(var_type, nullptr, loc);
}
int split_vars(bool strict = false);
@ -1164,7 +1139,6 @@ struct CodeBlob {
close_blk(location);
pop_cur();
}
void simplify_var_types();
void prune_unreachable_code();
void fwd_analyze();
void mark_noreturn();

View file

@ -1,131 +0,0 @@
#pragma once
#include <vector>
#include <iostream>
namespace tolk {
struct TypeExpr {
enum Kind { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll };
enum AtomicType { _Int, _Cell, _Slice, _Builder, _Continutaion, _Tuple };
Kind constr;
int value;
int minw, maxw;
static constexpr int w_inf = 1023;
std::vector<TypeExpr*> args;
bool was_forall_var = false;
explicit TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) {
}
TypeExpr(Kind _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) {
}
TypeExpr(Kind _constr, std::vector<TypeExpr*> list)
: constr(_constr), value((int)list.size()), args(std::move(list)) {
compute_width();
}
TypeExpr(Kind _constr, std::initializer_list<TypeExpr*> list)
: constr(_constr), value((int)list.size()), args(std::move(list)) {
compute_width();
}
TypeExpr(Kind _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} {
compute_width();
}
TypeExpr(Kind _constr, TypeExpr* elem0, std::vector<TypeExpr*> list)
: constr(_constr), value((int)list.size() + 1), args{elem0} {
args.insert(args.end(), list.begin(), list.end());
compute_width();
}
TypeExpr(Kind _constr, TypeExpr* elem0, std::initializer_list<TypeExpr*> list)
: constr(_constr), value((int)list.size() + 1), args{elem0} {
args.insert(args.end(), list.begin(), list.end());
compute_width();
}
bool is_atomic() const {
return constr == te_Atomic;
}
bool is_atomic(int v) const {
return constr == te_Atomic && value == v;
}
bool is_int() const {
return is_atomic(_Int);
}
bool is_var() const {
return constr == te_Var;
}
bool is_map() const {
return constr == te_Map;
}
bool is_tuple() const {
return constr == te_Tuple;
}
bool has_fixed_width() const {
return minw == maxw;
}
int get_width() const {
return has_fixed_width() ? minw : -1;
}
void compute_width();
bool recompute_width();
void show_width(std::ostream& os);
std::ostream& print(std::ostream& os, int prio = 0) const;
void replace_with(TypeExpr* te2);
int extract_components(std::vector<TypeExpr*>& comp_list);
bool equals_to(const TypeExpr* rhs) const;
bool has_unknown_inside() const;
static int holes, type_vars;
static TypeExpr* new_hole() {
return new TypeExpr{te_Unknown, ++holes};
}
static TypeExpr* new_hole(int width) {
return new TypeExpr{te_Unknown, ++holes, width};
}
static TypeExpr* new_unit() {
return new TypeExpr{te_Tensor, 0, 0};
}
static TypeExpr* new_atomic(int value) {
return new TypeExpr{te_Atomic, value, 1};
}
static TypeExpr* new_map(TypeExpr* from, TypeExpr* to);
static TypeExpr* new_func() {
return new_map(new_hole(), new_hole());
}
static TypeExpr* new_tensor(std::vector<TypeExpr*> list, bool red = true) {
return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)};
}
static TypeExpr* new_tensor(std::initializer_list<TypeExpr*> list) {
return new TypeExpr{te_Tensor, std::move(list)};
}
static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) {
return new_tensor({te1, te2});
}
static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) {
return new_tensor({te1, te2, te3});
}
static TypeExpr* new_tuple(TypeExpr* arg0) {
return new TypeExpr{te_Tuple, arg0};
}
static TypeExpr* new_tuple(std::vector<TypeExpr*> list, bool red = false) {
return new_tuple(new_tensor(std::move(list), red));
}
static TypeExpr* new_tuple(std::initializer_list<TypeExpr*> list) {
return new_tuple(new_tensor(list));
}
static TypeExpr* new_var() {
return new TypeExpr{te_Var, --type_vars, 1};
}
static TypeExpr* new_var(int idx) {
return new TypeExpr{te_Var, idx, 1};
}
static TypeExpr* new_forall(std::vector<TypeExpr*> list, TypeExpr* body) {
return new TypeExpr{te_ForAll, body, std::move(list)};
}
static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr);
static std::vector<TypeExpr*> remove_forall(TypeExpr*& te);
static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector<TypeExpr*>& new_vars);
};
std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr);
} // namespace tolk

684
tolk/type-system.cpp Normal file
View file

@ -0,0 +1,684 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "type-system.h"
#include "lexer.h"
#include "platform-utils.h"
#include "compiler-state.h"
#include <unordered_map>
namespace tolk {
/*
* This class stores a big hashtable [hash => TypeData]
* Every non-trivial TypeData*::create() method at first looks here, and allocates an object only if not found.
* That's why all allocated TypeData objects are unique, storing unique type_id.
*/
class TypeDataTypeIdCalculation {
uint64_t cur_hash;
int children_flags_mask = 0;
static std::unordered_map<uint64_t, TypePtr> all_unique_occurred_types;
public:
explicit TypeDataTypeIdCalculation(uint64_t initial_arbitrary_unique_number)
: cur_hash(initial_arbitrary_unique_number) {}
void feed_hash(uint64_t val) {
cur_hash = cur_hash * 56235515617499ULL + val;
}
void feed_string(const std::string& s) {
feed_hash(std::hash<std::string>{}(s));
}
void feed_child(TypePtr inner) {
feed_hash(inner->type_id);
children_flags_mask |= inner->flags;
}
uint64_t type_id() const {
return cur_hash;
}
int children_flags() const {
return children_flags_mask;
}
GNU_ATTRIBUTE_FLATTEN
TypePtr get_existing() const {
auto it = all_unique_occurred_types.find(cur_hash);
return it != all_unique_occurred_types.end() ? it->second : nullptr;
}
GNU_ATTRIBUTE_NOINLINE
TypePtr register_unique(TypePtr newly_created) const {
#ifdef TOLK_DEBUG
assert(newly_created->type_id == cur_hash);
#endif
all_unique_occurred_types[cur_hash] = newly_created;
return newly_created;
}
};
std::unordered_map<uint64_t, TypePtr> TypeDataTypeIdCalculation::all_unique_occurred_types;
TypePtr TypeDataInt::singleton;
TypePtr TypeDataCell::singleton;
TypePtr TypeDataSlice::singleton;
TypePtr TypeDataBuilder::singleton;
TypePtr TypeDataTuple::singleton;
TypePtr TypeDataContinuation::singleton;
TypePtr TypeDataNullLiteral::singleton;
TypePtr TypeDataUnknown::singleton;
TypePtr TypeDataVoid::singleton;
void type_system_init() {
TypeDataInt::singleton = new TypeDataInt;
TypeDataCell::singleton = new TypeDataCell;
TypeDataSlice::singleton = new TypeDataSlice;
TypeDataBuilder::singleton = new TypeDataBuilder;
TypeDataTuple::singleton = new TypeDataTuple;
TypeDataContinuation::singleton = new TypeDataContinuation;
TypeDataNullLiteral::singleton = new TypeDataNullLiteral;
TypeDataUnknown::singleton = new TypeDataUnknown;
TypeDataVoid::singleton = new TypeDataVoid;
}
// --------------------------------------------
// create()
//
// all constructors of TypeData classes are private, only TypeData*::create() is allowed
// each non-trivial create() method calculates hash (type_id)
// and creates an object only if it isn't found in a global hashtable
//
TypePtr TypeDataFunCallable::create(std::vector<TypePtr>&& params_types, TypePtr return_type) {
TypeDataTypeIdCalculation hash(3184039965511020991ULL);
for (TypePtr param : params_types) {
hash.feed_child(param);
hash.feed_hash(767721);
}
hash.feed_child(return_type);
hash.feed_hash(767722);
if (TypePtr existing = hash.get_existing()) {
return existing;
}
return hash.register_unique(new TypeDataFunCallable(hash.type_id(), hash.children_flags(), std::move(params_types), return_type));
}
TypePtr TypeDataGenericT::create(std::string&& nameT) {
TypeDataTypeIdCalculation hash(9145033724911680012ULL);
hash.feed_string(nameT);
if (TypePtr existing = hash.get_existing()) {
return existing;
}
return hash.register_unique(new TypeDataGenericT(hash.type_id(), std::move(nameT)));
}
TypePtr TypeDataTensor::create(std::vector<TypePtr>&& items) {
TypeDataTypeIdCalculation hash(3159238551239480381ULL);
for (TypePtr item : items) {
hash.feed_child(item);
hash.feed_hash(819613);
}
if (TypePtr existing = hash.get_existing()) {
return existing;
}
return hash.register_unique(new TypeDataTensor(hash.type_id(), hash.children_flags(), std::move(items)));
}
TypePtr TypeDataTypedTuple::create(std::vector<TypePtr>&& items) {
TypeDataTypeIdCalculation hash(9189266157349499320ULL);
for (TypePtr item : items) {
hash.feed_child(item);
hash.feed_hash(735911);
}
if (TypePtr existing = hash.get_existing()) {
return existing;
}
return hash.register_unique(new TypeDataTypedTuple(hash.type_id(), hash.children_flags(), std::move(items)));
}
TypePtr TypeDataUnresolved::create(std::string&& text, SrcLocation loc) {
TypeDataTypeIdCalculation hash(3680147223540048162ULL);
hash.feed_string(text);
// hash.feed_hash(*reinterpret_cast<uint64_t*>(&loc));
if (TypePtr existing = hash.get_existing()) {
return existing;
}
return hash.register_unique(new TypeDataUnresolved(hash.type_id(), std::move(text), loc));
}
// --------------------------------------------
// as_human_readable()
//
// is used only for error messages and debugging, therefore no optimizations for simplicity
// only non-trivial implementations are here; trivial are defined in .h file
//
std::string TypeDataFunCallable::as_human_readable() const {
std::string result = "(";
for (TypePtr param : params_types) {
if (result.size() > 1) {
result += ", ";
}
result += param->as_human_readable();
}
result += ") -> ";
result += return_type->as_human_readable();
return result;
}
std::string TypeDataTensor::as_human_readable() const {
std::string result = "(";
for (TypePtr item : items) {
if (result.size() > 1) {
result += ", ";
}
result += item->as_human_readable();
}
result += ')';
return result;
}
std::string TypeDataTypedTuple::as_human_readable() const {
std::string result = "[";
for (TypePtr item : items) {
if (result.size() > 1) {
result += ", ";
}
result += item->as_human_readable();
}
result += ']';
return result;
}
// --------------------------------------------
// traverse()
//
// invokes a callback for TypeData itself and all its children
// only non-trivial implementations are here; by default (no children), `callback(this)` is executed
//
void TypeDataFunCallable::traverse(const TraverserCallbackT& callback) const {
callback(this);
for (TypePtr param : params_types) {
param->traverse(callback);
}
return_type->traverse(callback);
}
void TypeDataTensor::traverse(const TraverserCallbackT& callback) const {
callback(this);
for (TypePtr item : items) {
item->traverse(callback);
}
}
void TypeDataTypedTuple::traverse(const TraverserCallbackT& callback) const {
callback(this);
for (TypePtr item : items) {
item->traverse(callback);
}
}
// --------------------------------------------
// replace_children_custom()
//
// returns new TypeData with children replaced by a custom callback
// used to replace generic T on generics expansion — to convert `f<T>` to `f<int>`
// only non-trivial implementations are here; by default (no children), `return callback(this)` is executed
//
TypePtr TypeDataFunCallable::replace_children_custom(const ReplacerCallbackT& callback) const {
std::vector<TypePtr> mapped;
mapped.reserve(params_types.size());
for (TypePtr param : params_types) {
mapped.push_back(param->replace_children_custom(callback));
}
return callback(create(std::move(mapped), return_type->replace_children_custom(callback)));
}
TypePtr TypeDataTensor::replace_children_custom(const ReplacerCallbackT& callback) const {
std::vector<TypePtr> mapped;
mapped.reserve(items.size());
for (TypePtr item : items) {
mapped.push_back(item->replace_children_custom(callback));
}
return callback(create(std::move(mapped)));
}
TypePtr TypeDataTypedTuple::replace_children_custom(const ReplacerCallbackT& callback) const {
std::vector<TypePtr> mapped;
mapped.reserve(items.size());
for (TypePtr item : items) {
mapped.push_back(item->replace_children_custom(callback));
}
return callback(create(std::move(mapped)));
}
// --------------------------------------------
// calc_width_on_stack()
//
// returns the number of stack slots occupied by a variable of this type
// only non-trivial implementations are here; by default (most types) occupy 1 stack slot
//
int TypeDataGenericT::calc_width_on_stack() const {
// this function is invoked only in functions with generics already instantiated
assert(false);
return -999999;
}
int TypeDataTensor::calc_width_on_stack() const {
int sum = 0;
for (TypePtr item : items) {
sum += item->calc_width_on_stack();
}
return sum;
}
int TypeDataUnresolved::calc_width_on_stack() const {
// since early pipeline stages, no unresolved types left
assert(false);
return -999999;
}
int TypeDataVoid::calc_width_on_stack() const {
return 0;
}
// --------------------------------------------
// can_rhs_be_assigned()
//
// on `var lhs: <lhs_type> = rhs`, having inferred rhs_type, check that it can be assigned without any casts
// the same goes for passing arguments, returning values, etc. — where the "receiver" (lhs) checks "applier" (rhs)
// for now, `null` can be assigned to any TVM primitive, be later we'll have T? types and null safety
//
bool TypeDataInt::can_rhs_be_assigned(TypePtr rhs) const {
if (rhs == this) {
return true;
}
if (rhs == TypeDataNullLiteral::create()) {
return true;
}
return false;
}
bool TypeDataCell::can_rhs_be_assigned(TypePtr rhs) const {
if (rhs == this) {
return true;
}
if (rhs == TypeDataNullLiteral::create()) {
return true;
}
return false;
}
bool TypeDataSlice::can_rhs_be_assigned(TypePtr rhs) const {
if (rhs == this) {
return true;
}
if (rhs == TypeDataNullLiteral::create()) {
return true;
}
return false;
}
bool TypeDataBuilder::can_rhs_be_assigned(TypePtr rhs) const {
if (rhs == this) {
return true;
}
if (rhs == TypeDataNullLiteral::create()) {
return true;
}
return false;
}
bool TypeDataTuple::can_rhs_be_assigned(TypePtr rhs) const {
if (rhs == this) {
return true;
}
if (rhs == TypeDataNullLiteral::create()) {
return true;
}
return false;
}
bool TypeDataContinuation::can_rhs_be_assigned(TypePtr rhs) const {
if (rhs == this) {
return true;
}
if (rhs == TypeDataNullLiteral::create()) {
return true;
}
return false;
}
bool TypeDataNullLiteral::can_rhs_be_assigned(TypePtr rhs) const {
return rhs == this;
}
bool TypeDataFunCallable::can_rhs_be_assigned(TypePtr rhs) const {
return rhs == this;
}
bool TypeDataGenericT::can_rhs_be_assigned(TypePtr rhs) const {
assert(false);
return false;
}
bool TypeDataTensor::can_rhs_be_assigned(TypePtr rhs) const {
if (const auto* as_tensor = rhs->try_as<TypeDataTensor>(); as_tensor && as_tensor->size() == size()) {
for (int i = 0; i < size(); ++i) {
if (!items[i]->can_rhs_be_assigned(as_tensor->items[i])) {
return false;
}
}
return true;
}
// note, that tensors can not accept null
return false;
}
bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const {
if (const auto* as_tuple = rhs->try_as<TypeDataTypedTuple>(); as_tuple && as_tuple->size() == size()) {
for (int i = 0; i < size(); ++i) {
if (!items[i]->can_rhs_be_assigned(as_tuple->items[i])) {
return false;
}
}
return true;
}
if (rhs == TypeDataNullLiteral::create()) {
return true;
}
return false;
}
bool TypeDataUnknown::can_rhs_be_assigned(TypePtr rhs) const {
return true;
}
bool TypeDataUnresolved::can_rhs_be_assigned(TypePtr rhs) const {
assert(false);
return false;
}
bool TypeDataVoid::can_rhs_be_assigned(TypePtr rhs) const {
return rhs == this;
}
// --------------------------------------------
// can_be_casted_with_as_operator()
//
// on `expr as <cast_to>`, check whether casting is applicable
// note, that it's not auto-casts `var lhs: <lhs_type> = rhs`, it's an expression `rhs as <cast_to>`
//
bool TypeDataInt::can_be_casted_with_as_operator(TypePtr cast_to) const {
return cast_to == this;
}
bool TypeDataCell::can_be_casted_with_as_operator(TypePtr cast_to) const {
return cast_to == this;
}
bool TypeDataSlice::can_be_casted_with_as_operator(TypePtr cast_to) const {
return cast_to == this;
}
bool TypeDataBuilder::can_be_casted_with_as_operator(TypePtr cast_to) const {
return cast_to == this;
}
bool TypeDataTuple::can_be_casted_with_as_operator(TypePtr cast_to) const {
return cast_to == this;
}
bool TypeDataContinuation::can_be_casted_with_as_operator(TypePtr cast_to) const {
return cast_to == this;
}
bool TypeDataNullLiteral::can_be_casted_with_as_operator(TypePtr cast_to) const {
return cast_to == this
|| cast_to == TypeDataInt::create() || cast_to == TypeDataCell::create() || cast_to == TypeDataSlice::create()
|| cast_to == TypeDataBuilder::create() || cast_to == TypeDataContinuation::create() || cast_to == TypeDataTuple::create()
|| cast_to->try_as<TypeDataTypedTuple>();
}
bool TypeDataFunCallable::can_be_casted_with_as_operator(TypePtr cast_to) const {
return this == cast_to;
}
bool TypeDataGenericT::can_be_casted_with_as_operator(TypePtr cast_to) const {
return true;
}
bool TypeDataTensor::can_be_casted_with_as_operator(TypePtr cast_to) const {
if (const auto* to_tensor = cast_to->try_as<TypeDataTensor>(); to_tensor && to_tensor->size() == size()) {
for (int i = 0; i < size(); ++i) {
if (!items[i]->can_be_casted_with_as_operator(to_tensor->items[i])) {
return false;
}
}
return true;
}
return false;
}
bool TypeDataTypedTuple::can_be_casted_with_as_operator(TypePtr cast_to) const {
if (const auto* to_tuple = cast_to->try_as<TypeDataTypedTuple>(); to_tuple && to_tuple->size() == size()) {
for (int i = 0; i < size(); ++i) {
if (!items[i]->can_be_casted_with_as_operator(to_tuple->items[i])) {
return false;
}
}
return true;
}
return false;
}
bool TypeDataUnknown::can_be_casted_with_as_operator(TypePtr cast_to) const {
// 'unknown' can be cast to any type
// (though it's not valid for exception arguments when casting them to non-1 stack width,
// but to ensure it, we need a special type "unknown TVM primitive", which is overwhelming I think)
return true;
}
bool TypeDataUnresolved::can_be_casted_with_as_operator(TypePtr cast_to) const {
return false;
}
bool TypeDataVoid::can_be_casted_with_as_operator(TypePtr cast_to) const {
return cast_to == this;
}
// --------------------------------------------
// extract_components()
//
// used in code generation (transforming Ops to other Ops)
// to be removed in the future
//
void TypeDataGenericT::extract_components(std::vector<TypePtr>& comp_types) const {
assert(false);
}
void TypeDataTensor::extract_components(std::vector<TypePtr>& comp_types) const {
for (TypePtr item : items) {
item->extract_components(comp_types);
}
}
void TypeDataUnresolved::extract_components(std::vector<TypePtr>& comp_types) const {
assert(false);
}
void TypeDataVoid::extract_components(std::vector<TypePtr>& comp_types) const {
}
// --------------------------------------------
// parsing type from tokens
//
// here we implement parsing types (mostly after colon) to TypeData
// example: `var v: int` is TypeDataInt
// example: `var v: (builder, [cell])` is TypeDataTensor(TypeDataBuilder, TypeDataTypedTuple(TypeDataCell))
// example: `fun f(): ()` is TypeDataTensor() (an empty one)
//
// note, that unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved,
// and later, when all files are parsed and all symbols registered, such identifiers are resolved
// example: `fun f<T>(v: T)` at first v is TypeDataUnresolved("T"), later becomes TypeDataGenericT
// see finalize_type_data()
//
// note, that `self` does not name a type, it can appear only as a return value of a function (parsed specially)
// when `self` appears as a type, it's parsed as TypeDataUnresolved, and later an error is emitted
//
static TypePtr parse_type_expression(Lexer& lex);
std::vector<TypePtr> parse_nested_type_list(Lexer& lex, TokenType tok_op, const char* s_op, TokenType tok_cl, const char* s_cl) {
lex.expect(tok_op, s_op);
std::vector<TypePtr> sub_types;
while (true) {
if (lex.tok() == tok_cl) { // empty lists allowed
lex.next();
break;
}
sub_types.emplace_back(parse_type_expression(lex));
if (lex.tok() == tok_comma) {
lex.next();
} else if (lex.tok() != tok_cl) {
lex.unexpected(s_cl);
}
}
return sub_types;
}
std::vector<TypePtr> parse_nested_type_list_in_parenthesis(Lexer& lex) {
return parse_nested_type_list(lex, tok_oppar, "`(`", tok_clpar, "`)` or `,`");
}
static TypePtr parse_simple_type(Lexer& lex) {
switch (lex.tok()) {
case tok_int:
lex.next();
return TypeDataInt::create();
case tok_cell:
lex.next();
return TypeDataCell::create();
case tok_builder:
lex.next();
return TypeDataBuilder::create();
case tok_slice:
lex.next();
return TypeDataSlice::create();
case tok_tuple:
lex.next();
return TypeDataTuple::create();
case tok_continuation:
lex.next();
return TypeDataContinuation::create();
case tok_null:
lex.next();
return TypeDataNullLiteral::create();
case tok_void:
lex.next();
return TypeDataVoid::create();
case tok_bool:
case tok_self:
case tok_identifier: {
SrcLocation loc = lex.cur_location();
std::string text = static_cast<std::string>(lex.cur_str());
lex.next();
return TypeDataUnresolved::create(std::move(text), loc);
}
case tok_oppar: {
std::vector<TypePtr> items = parse_nested_type_list_in_parenthesis(lex);
if (items.size() == 1) {
return items.front();
}
return TypeDataTensor::create(std::move(items));
}
case tok_opbracket: {
std::vector<TypePtr> items = parse_nested_type_list(lex, tok_opbracket, "`[`", tok_clbracket, "`]` or `,`");
return TypeDataTypedTuple::create(std::move(items));
}
case tok_fun: {
lex.next();
std::vector<TypePtr> params_types = parse_nested_type_list_in_parenthesis(lex);
lex.expect(tok_arrow, "`->`");
}
default:
lex.unexpected("<type>");
}
}
static TypePtr parse_type_nullable(Lexer& lex) {
TypePtr result = parse_simple_type(lex);
if (lex.tok() == tok_question) {
lex.error("nullable types are not supported yet");
}
return result;
}
static TypePtr parse_type_expression(Lexer& lex) {
TypePtr result = parse_type_nullable(lex);
if (lex.tok() == tok_arrow) { // `int -> int`, `(cell, slice) -> void`
lex.next();
TypePtr return_type = parse_type_expression(lex);
std::vector<TypePtr> params_types = {result};
if (const auto* as_tensor = result->try_as<TypeDataTensor>()) {
params_types = as_tensor->items;
}
return TypeDataFunCallable::create(std::move(params_types), return_type);
}
if (lex.tok() != tok_bitwise_or) {
return result;
}
lex.error("union types are not supported yet");
}
TypePtr parse_type_from_tokens(Lexer& lex) {
return parse_type_expression(lex);
}
std::ostream& operator<<(std::ostream& os, TypePtr type_data) {
return os << (type_data ? type_data->as_human_readable() : "(nullptr-type)");
}
} // namespace tolk

405
tolk/type-system.h Normal file
View file

@ -0,0 +1,405 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "src-file.h"
#include <cstdint>
#include <string>
#include <functional>
namespace tolk {
/*
* TypeData is both a user-given and an inferred type representation.
* `int`, `cell`, `T`, `(int, [tuple])` are instances of TypeData.
* Every unique TypeData is created only once, so for example TypeDataTensor::create(int, int)
* returns one and the same pointer always. This "uniqueness" is called type_id, calculated before creation.
*
* In Tolk code, types after colon `var v: (int, T)` are parsed to TypeData.
* See parse_type_from_tokens().
* So, AST nodes which can have declared types (local/global variables and others) store a pointer to TypeData.
*
* Type inferring also creates TypeData for inferred expressions. All AST expression nodes have inferred_type.
* For example, `1 + 2`, both operands are TypeDataInt, its result is also TypeDataInt.
* Type checking also uses TypeData. For example, `var i: slice = 1 + 2`, at first rhs (TypeDataInt) is inferred,
* then lhs (TypeDataSlice from declaration) is checked whether rhs can be assigned.
* See can_rhs_be_assigned().
*
* Note, that while initial parsing Tolk files to AST, known types (`int`, `cell`, etc.) are created as-is,
* but user-defined types (`T`, `MyStruct`, `MyAlias`) are saved as TypeDataUnresolved.
* After all symbols have been registered, resolving identifiers step is executed, where particularly
* all TypeDataUnresolved instances are converted to a resolved type. At inferring, no unresolved remain.
* For instance, `fun f<T>(v: T)`, at first "T" of `v` is unresolved, and then converted to TypeDataGenericT.
*/
class TypeData {
// all unique types have unique type_id; it's used both for allocating memory once and for tagged unions
const uint64_t type_id;
// bits of flag_mask, to store often-used properties and return them without tree traversing
const int flags;
friend class TypeDataTypeIdCalculation;
protected:
enum flag_mask {
flag_contains_unknown_inside = 1 << 1,
flag_contains_genericT_inside = 1 << 2,
flag_contains_unresolved_inside = 1 << 3,
};
explicit TypeData(uint64_t type_id, int flags_with_children)
: type_id(type_id)
, flags(flags_with_children) {
}
public:
virtual ~TypeData() = default;
template<class Derived>
const Derived* try_as() const {
return dynamic_cast<const Derived*>(this);
}
uint64_t get_type_id() const { return type_id; }
bool has_unknown_inside() const { return flags & flag_contains_unknown_inside; }
bool has_genericT_inside() const { return flags & flag_contains_genericT_inside; }
bool has_unresolved_inside() const { return flags & flag_contains_unresolved_inside; }
using TraverserCallbackT = std::function<void(TypePtr child)>;
using ReplacerCallbackT = std::function<TypePtr(TypePtr child)>;
virtual std::string as_human_readable() const = 0;
virtual bool can_rhs_be_assigned(TypePtr rhs) const = 0;
virtual bool can_be_casted_with_as_operator(TypePtr cast_to) const = 0;
virtual void traverse(const TraverserCallbackT& callback) const {
callback(this);
}
virtual TypePtr replace_children_custom(const ReplacerCallbackT& callback) const {
return callback(this);
}
virtual int calc_width_on_stack() const {
return 1;
}
virtual void extract_components(std::vector<TypePtr>& comp_types) const {
comp_types.push_back(this);
}
};
/*
* `int` is TypeDataInt, representation of TVM int.
*/
class TypeDataInt final : public TypeData {
TypeDataInt() : TypeData(1ULL, 0) {}
static TypePtr singleton;
friend void type_system_init();
public:
static TypePtr create() { return singleton; }
std::string as_human_readable() const override { return "int"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
};
/*
* `cell` is TypeDataCell, representation of TVM cell.
*/
class TypeDataCell final : public TypeData {
TypeDataCell() : TypeData(3ULL, 0) {}
static TypePtr singleton;
friend void type_system_init();
public:
static TypePtr create() { return singleton; }
std::string as_human_readable() const override { return "cell"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
};
/*
* `slice` is TypeDataSlice, representation of TVM slice.
*/
class TypeDataSlice final : public TypeData {
TypeDataSlice() : TypeData(4ULL, 0) {}
static TypePtr singleton;
friend void type_system_init();
public:
static TypePtr create() { return singleton; }
std::string as_human_readable() const override { return "slice"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
};
/*
* `builder` is TypeDataBuilder, representation of TVM builder.
*/
class TypeDataBuilder final : public TypeData {
TypeDataBuilder() : TypeData(5ULL, 0) {}
static TypePtr singleton;
friend void type_system_init();
public:
static TypePtr create() { return singleton; }
std::string as_human_readable() const override { return "builder"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
};
/*
* `tuple` is TypeDataTuple, representation of TVM tuple.
* Note, that it's UNTYPED tuple. It occupies 1 stack slot in TVM. Its elements are any TVM values at runtime,
* so getting its element results in TypeDataUnknown (which must be assigned/cast explicitly).
*/
class TypeDataTuple final : public TypeData {
TypeDataTuple() : TypeData(6ULL, 0) {}
static TypePtr singleton;
friend void type_system_init();
public:
static TypePtr create() { return singleton; }
std::string as_human_readable() const override { return "tuple"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
};
/*
* `continuation` is TypeDataContinuation, representation of TVM continuation.
* It's like "untyped callable", not compatible with other types.
*/
class TypeDataContinuation final : public TypeData {
TypeDataContinuation() : TypeData(7ULL, 0) {}
static TypePtr singleton;
friend void type_system_init();
public:
static TypePtr create() { return singleton; }
std::string as_human_readable() const override { return "continuation"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
};
/*
* `null` has TypeDataNullLiteral type.
* Currently, it can be assigned to int/slice/etc., but later Tolk will have T? types and null safety.
* Note, that `var i = null`, though valid (i would be constant null), fires an "always-null" compilation error
* (it's much better for user to see an error here than when he passes this variable somewhere).
*/
class TypeDataNullLiteral final : public TypeData {
TypeDataNullLiteral() : TypeData(8ULL, 0) {}
static TypePtr singleton;
friend void type_system_init();
public:
static TypePtr create() { return singleton; }
std::string as_human_readable() const override { return "null"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
};
/*
* `fun(int, int) -> void` is TypeDataFunCallable, think of is as a typed continuation.
* A type of function `fun f(x: int) { return x; }` is actually `fun(int) -> int`.
* So, when assigning it to a variable `var cb = f`, this variable also has this type.
*/
class TypeDataFunCallable final : public TypeData {
TypeDataFunCallable(uint64_t type_id, int children_flags, std::vector<TypePtr>&& params_types, TypePtr return_type)
: TypeData(type_id, children_flags)
, params_types(std::move(params_types))
, return_type(return_type) {}
public:
const std::vector<TypePtr> params_types;
const TypePtr return_type;
static TypePtr create(std::vector<TypePtr>&& params_types, TypePtr return_type);
int params_size() const { return static_cast<int>(params_types.size()); }
std::string as_human_readable() const override;
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
void traverse(const TraverserCallbackT& callback) const override;
TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override;
};
/*
* `T` inside generic functions is TypeDataGenericT.
* Example: `fun f<X,Y>(a: X, b: Y): [X, Y]` (here X and Y are).
* On instantiation like `f(1,"")`, a new function `f<int,slice>` is created with type `fun(int,slice)->[int,slice]`.
*/
class TypeDataGenericT final : public TypeData {
TypeDataGenericT(uint64_t type_id, std::string&& nameT)
: TypeData(type_id, flag_contains_genericT_inside)
, nameT(std::move(nameT)) {}
public:
const std::string nameT;
static TypePtr create(std::string&& nameT);
std::string as_human_readable() const override { return nameT; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
int calc_width_on_stack() const override;
void extract_components(std::vector<TypePtr>& comp_types) const override;
};
/*
* `(int, slice)` is TypeDataTensor of 2 elements. Tensor of N elements occupies N stack slots.
* Of course, there may be nested tensors, like `(int, (int, slice), cell)`.
* Arguments, variables, globals, return values, etc. can be tensors.
* A tensor can be empty.
*/
class TypeDataTensor final : public TypeData {
TypeDataTensor(uint64_t type_id, int children_flags, std::vector<TypePtr>&& items)
: TypeData(type_id, children_flags)
, items(std::move(items)) {}
public:
const std::vector<TypePtr> items;
static TypePtr create(std::vector<TypePtr>&& items);
int size() const { return static_cast<int>(items.size()); }
std::string as_human_readable() const override;
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
void traverse(const TraverserCallbackT& callback) const override;
TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override;
int calc_width_on_stack() const override;
void extract_components(std::vector<TypePtr>& comp_types) const override;
};
/*
* `[int, slice]` is TypeDataTypedTuple, a TVM 'tuple' under the hood, contained in 1 stack slot.
* Unlike TypeDataTuple (untyped tuples), it has a predefined inner structure and can be assigned as
* `var [i, cs] = [0, ""]` (where a and b become two separate variables on a stack, int and slice).
*/
class TypeDataTypedTuple final : public TypeData {
TypeDataTypedTuple(uint64_t type_id, int children_flags, std::vector<TypePtr>&& items)
: TypeData(type_id, children_flags)
, items(std::move(items)) {}
public:
const std::vector<TypePtr> items;
static TypePtr create(std::vector<TypePtr>&& items);
int size() const { return static_cast<int>(items.size()); }
std::string as_human_readable() const override;
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
void traverse(const TraverserCallbackT& callback) const override;
TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override;
};
/*
* `unknown` is a special type, which can appear in corner cases.
* The type of exception argument (which can hold any TVM value at runtime) is unknown.
* The type of `_` used as rvalue is unknown.
* The only thing available to do with unknown is to cast it: `catch (excNo, arg) { var i = arg as int; }`
*/
class TypeDataUnknown final : public TypeData {
TypeDataUnknown() : TypeData(20ULL, flag_contains_unknown_inside) {}
static TypePtr singleton;
friend void type_system_init();
public:
static TypePtr create() { return singleton; }
std::string as_human_readable() const override { return "unknown"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
};
/*
* "Unresolved" is not actually a type it's an intermediate state between parsing and resolving.
* At parsing to AST, unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved,
* and after all source files parsed and global symbols registered, they are replaced by actual ones.
* Example: `fun f<T>(v: T)` at first v is TypeDataUnresolved("T"), later becomes TypeDataGenericT.
*/
class TypeDataUnresolved final : public TypeData {
TypeDataUnresolved(uint64_t type_id, std::string&& text, SrcLocation loc)
: TypeData(type_id, flag_contains_unresolved_inside)
, text(std::move(text))
, loc(loc) {}
public:
const std::string text;
const SrcLocation loc;
static TypePtr create(std::string&& text, SrcLocation loc);
std::string as_human_readable() const override { return text + "*"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
int calc_width_on_stack() const override;
void extract_components(std::vector<TypePtr>& comp_types) const override;
};
/*
* `void` is TypeDataVoid.
* From the type system point of view, `void` functions return nothing.
* Empty tensor is not compatible with void, although at IR level they are similar, 0 stack slots.
*/
class TypeDataVoid final : public TypeData {
TypeDataVoid() : TypeData(10ULL, 0) {}
static TypePtr singleton;
friend void type_system_init();
public:
static TypePtr create() { return singleton; }
std::string as_human_readable() const override { return "void"; }
bool can_rhs_be_assigned(TypePtr rhs) const override;
bool can_be_casted_with_as_operator(TypePtr cast_to) const override;
int calc_width_on_stack() const override;
void extract_components(std::vector<TypePtr>& comp_types) const override;
};
// --------------------------------------------
class Lexer;
TypePtr parse_type_from_tokens(Lexer& lex);
void type_system_init();
} // namespace tolk

Some files were not shown because too many files have changed in this diff Show more