2024-10-31 07:03:33 +00:00
/*
This file is part of TON Blockchain Library .
TON Blockchain Library is free software : you can redistribute it and / or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation , either version 2 of the License , or
( at your option ) any later version .
TON Blockchain Library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library . If not , see < http : //www.gnu.org/licenses/>.
*/
# include "ast-from-tokens.h"
# include "ast.h"
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
# include "type-system.h"
2024-10-31 07:03:33 +00:00
# include "platform-utils.h"
2024-10-31 07:11:41 +00:00
# include "tolk-version.h"
2024-10-31 07:03:33 +00:00
/*
* Here we construct AST for a tolk file .
* While constructing , no global state is modified .
* Historically , in FunC , there was no AST : while lexing , symbols were registered , types were inferred , and so on .
* There was no way to perform any more or less semantic analysis .
* Implementing AST gives a giant advance for future modifications and stability .
*/
namespace tolk {
// given a token, determine whether it's <, or >, or similar
static bool is_comparison_binary_op ( TokenType tok ) {
return tok = = tok_lt | | tok = = tok_gt | | tok = = tok_leq | | tok = = tok_geq | | tok = = tok_eq | | tok = = tok_neq | | tok = = tok_spaceship ;
}
// same as above, but to detect bitwise operators: & | ^
static bool is_bitwise_binary_op ( TokenType tok ) {
return tok = = tok_bitwise_and | | tok = = tok_bitwise_or | | tok = = tok_bitwise_xor ;
}
2024-10-31 07:11:41 +00:00
// same as above, but to detect logical operators: && ||
static bool is_logical_binary_op ( TokenType tok ) {
return tok = = tok_logical_and | | tok = = tok_logical_or ;
}
2024-10-31 07:03:33 +00:00
// same as above, but to detect addition/subtraction
static bool is_add_or_sub_binary_op ( TokenType tok ) {
return tok = = tok_plus | | tok = = tok_minus ;
}
// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected)
// it would better be a warning, but we decided to make it a strict error
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_lower_precedence ( SrcLocation loc , std : : string_view op_lower , std : : string_view op_higher ) {
std : : string name_lower = static_cast < std : : string > ( op_lower ) ;
std : : string name_higher = static_cast < std : : string > ( op_higher ) ;
throw ParseError ( loc , name_lower + " has lower precedence than " + name_higher +
" , probably this code won't work as you expected. "
" Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error. " ) ;
}
// fire an error for a case "arg1 & arg2 | arg3"
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
2024-10-31 07:11:41 +00:00
static void fire_error_mix_and_or_no_parenthesis ( SrcLocation loc , std : : string_view op1 , std : : string_view op2 ) {
2024-10-31 07:03:33 +00:00
std : : string name1 = static_cast < std : : string > ( op1 ) ;
std : : string name2 = static_cast < std : : string > ( op2 ) ;
2024-10-31 07:11:41 +00:00
throw ParseError ( loc , " mixing " + name1 + " with " + name2 + " without parenthesis may lead to accidental errors. "
2024-10-31 07:03:33 +00:00
" Use parenthesis to emphasize operator precedence. " ) ;
}
// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence
// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected
// the only way to suppress this error for the programmer is to use parenthesis
// (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator},
// that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static void diagnose_bitwise_precedence ( SrcLocation loc , std : : string_view operator_name , AnyExprV lhs , AnyExprV rhs ) {
2024-10-31 07:03:33 +00:00
// handle "flags & 0xFF != 0" (rhs = "0xFF != 0")
if ( rhs - > type = = ast_binary_operator & & is_comparison_binary_op ( rhs - > as < ast_binary_operator > ( ) - > tok ) ) {
fire_error_lower_precedence ( loc , operator_name , rhs - > as < ast_binary_operator > ( ) - > operator_name ) ;
}
// handle "0 != flags & 0xFF" (lhs = "0 != flags")
if ( lhs - > type = = ast_binary_operator & & is_comparison_binary_op ( lhs - > as < ast_binary_operator > ( ) - > tok ) ) {
fire_error_lower_precedence ( loc , operator_name , lhs - > as < ast_binary_operator > ( ) - > operator_name ) ;
}
2024-10-31 07:11:41 +00:00
}
// similar to above, but detect potentially invalid usage of && and ||
// since anyway, using parenthesis when both && and || occur in the same expression,
// && and || have equal operator precedence in Tolk
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static void diagnose_and_or_precedence ( SrcLocation loc , AnyExprV lhs , TokenType rhs_tok , std : : string_view rhs_operator_name ) {
2024-10-31 07:11:41 +00:00
if ( auto lhs_op = lhs - > try_as < ast_binary_operator > ( ) ) {
// handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2")
if ( is_bitwise_binary_op ( lhs_op - > tok ) & & is_bitwise_binary_op ( rhs_tok ) & & lhs_op - > tok ! = rhs_tok ) {
fire_error_mix_and_or_no_parenthesis ( loc , lhs_op - > operator_name , rhs_operator_name ) ;
}
2024-10-31 07:03:33 +00:00
2024-10-31 07:11:41 +00:00
// handle "arg1 && arg2 || arg3" (lhs = "arg1 && arg2")
if ( is_logical_binary_op ( lhs_op - > tok ) & & is_logical_binary_op ( rhs_tok ) & & lhs_op - > tok ! = rhs_tok ) {
fire_error_mix_and_or_no_parenthesis ( loc , lhs_op - > operator_name , rhs_operator_name ) ;
}
2024-10-31 07:03:33 +00:00
}
}
// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static void diagnose_addition_in_bitshift ( SrcLocation loc , std : : string_view bitshift_operator_name , AnyExprV rhs ) {
2024-10-31 07:03:33 +00:00
if ( rhs - > type = = ast_binary_operator & & is_add_or_sub_binary_op ( rhs - > as < ast_binary_operator > ( ) - > tok ) ) {
fire_error_lower_precedence ( loc , bitshift_operator_name , rhs - > as < ast_binary_operator > ( ) - > operator_name ) ;
}
}
2024-10-31 07:11:41 +00:00
// replace (a == null) and similar to isNull(a) (call of a built-in function)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV maybe_replace_eq_null_with_isNull_call ( V < ast_binary_operator > v ) {
2024-10-31 07:11:41 +00:00
bool has_null = v - > get_lhs ( ) - > type = = ast_null_keyword | | v - > get_rhs ( ) - > type = = ast_null_keyword ;
bool replace = has_null & & ( v - > tok = = tok_eq | | v - > tok = = tok_neq ) ;
if ( ! replace ) {
return v ;
}
auto v_ident = createV < ast_identifier > ( v - > loc , " __isNull " ) ; // built-in function
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
auto v_ref = createV < ast_reference > ( v - > loc , v_ident , nullptr ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV v_null = v - > get_lhs ( ) - > type = = ast_null_keyword ? v - > get_rhs ( ) : v - > get_lhs ( ) ;
AnyExprV v_arg = createV < ast_argument > ( v - > loc , v_null , false ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
AnyExprV v_isNull = createV < ast_function_call > ( v - > loc , v_ref , createV < ast_argument_list > ( v - > loc , { v_arg } ) ) ;
2024-10-31 07:11:41 +00:00
if ( v - > tok = = tok_neq ) {
v_isNull = createV < ast_unary_operator > ( v - > loc , " ! " , tok_logical_not , v_isNull ) ;
}
return v_isNull ;
}
2024-10-31 07:03:33 +00:00
/*
*
* PARSE SOURCE
*
*/
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV parse_expr ( Lexer & lex ) ;
2024-10-31 07:11:41 +00:00
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
static AnyV parse_parameter ( Lexer & lex , bool is_first ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
2024-10-31 07:18:54 +00:00
// optional keyword `mutate` meaning that a function will mutate a passed argument (like passed by reference)
bool declared_as_mutate = false ;
if ( lex . tok ( ) = = tok_mutate ) {
lex . next ( ) ;
declared_as_mutate = true ;
}
// parameter name (or underscore for an unnamed parameter)
2024-10-31 07:11:41 +00:00
std : : string_view param_name ;
if ( lex . tok ( ) = = tok_identifier ) {
param_name = lex . cur_str ( ) ;
2024-10-31 07:18:54 +00:00
} else if ( lex . tok ( ) = = tok_self ) {
if ( ! is_first ) {
lex . error ( " `self` can only be the first parameter " ) ;
}
param_name = " self " ;
2024-10-31 07:11:41 +00:00
} else if ( lex . tok ( ) ! = tok_underscore ) {
lex . unexpected ( " parameter name " ) ;
2024-10-31 07:03:33 +00:00
}
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
// parameter type after colon are mandatory
2024-10-31 07:11:41 +00:00
lex . expect ( tok_colon , " `: <parameter_type>` " ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
TypePtr param_type = parse_type_from_tokens ( lex ) ;
2024-10-31 07:11:41 +00:00
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
return createV < ast_parameter > ( loc , param_name , param_type , declared_as_mutate ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_global_var_declaration ( Lexer & lex , const std : : vector < V < ast_annotation > > & annotations ) {
if ( ! annotations . empty ( ) ) {
lex . error ( " @annotations are not applicable to global var declaration " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_global , " `global` " ) ;
2024-10-31 07:03:33 +00:00
lex . check ( tok_identifier , " global variable name " ) ;
2024-10-31 07:04:58 +00:00
auto v_ident = createV < ast_identifier > ( lex . cur_location ( ) , lex . cur_str ( ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_colon , " `:` " ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
TypePtr declared_type = parse_type_from_tokens ( lex ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_comma ) {
lex . error ( " multiple declarations are not allowed, split globals on separate lines " ) ;
}
if ( lex . tok ( ) = = tok_assign ) {
lex . error ( " assigning to a global is not allowed at declaration " ) ;
}
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:04:58 +00:00
return createV < ast_global_var_declaration > ( loc , v_ident , declared_type ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_constant_declaration ( Lexer & lex , const std : : vector < V < ast_annotation > > & annotations ) {
if ( ! annotations . empty ( ) ) {
lex . error ( " @annotations are not applicable to global var declaration " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_const , " `const` " ) ;
2024-10-31 07:03:33 +00:00
lex . check ( tok_identifier , " constant name " ) ;
2024-10-31 07:04:58 +00:00
auto v_ident = createV < ast_identifier > ( lex . cur_location ( ) , lex . cur_str ( ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
TypePtr declared_type = nullptr ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_colon ) {
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
declared_type = parse_type_from_tokens ( lex ) ;
2024-10-31 07:11:41 +00:00
}
lex . expect ( tok_assign , " `=` " ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV init_value = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_comma ) {
lex . error ( " multiple declarations are not allowed, split constants on separate lines " ) ;
}
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:04:58 +00:00
return createV < ast_constant_declaration > ( loc , v_ident , declared_type , init_value ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:18:54 +00:00
// "parameters" are at function declaration: `fun f(param1: int, mutate param2: slice)`
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
static V < ast_parameter_list > parse_parameter_list ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
std : : vector < AnyV > params ;
lex . expect ( tok_oppar , " parameter list " ) ;
2024-10-31 07:03:33 +00:00
if ( lex . tok ( ) ! = tok_clpar ) {
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
params . push_back ( parse_parameter ( lex , true ) ) ;
2024-10-31 07:03:33 +00:00
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
params . push_back ( parse_parameter ( lex , false ) ) ;
2024-10-31 07:03:33 +00:00
}
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
return createV < ast_parameter_list > ( loc , std : : move ( params ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:18:54 +00:00
// "arguments" are at function call: `f(arg1, mutate arg2)`
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_argument ( Lexer & lex ) {
2024-10-31 07:18:54 +00:00
SrcLocation loc = lex . cur_location ( ) ;
// keyword `mutate` is necessary when a parameter is declared `mutate` (to make mutation obvious for the reader)
bool passed_as_mutate = false ;
if ( lex . tok ( ) = = tok_mutate ) {
lex . next ( ) ;
passed_as_mutate = true ;
}
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV expr = parse_expr ( lex ) ;
2024-10-31 07:18:54 +00:00
return createV < ast_argument > ( loc , expr , passed_as_mutate ) ;
}
static V < ast_argument_list > parse_argument_list ( Lexer & lex ) {
SrcLocation loc = lex . cur_location ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
std : : vector < AnyExprV > args ;
2024-10-31 07:18:54 +00:00
lex . expect ( tok_oppar , " `(` " ) ;
if ( lex . tok ( ) ! = tok_clpar ) {
args . push_back ( parse_argument ( lex ) ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
args . push_back ( parse_argument ( lex ) ) ;
}
}
lex . expect ( tok_clpar , " `)` " ) ;
return createV < ast_argument_list > ( loc , std : : move ( args ) ) ;
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
static V < ast_instantiationT_list > parse_maybe_instantiationTs_after_identifier ( Lexer & lex ) {
lex . check ( tok_lt , " `<` " ) ;
Lexer : : SavedPositionForLookahead backup = lex . save_parsing_position ( ) ;
try {
SrcLocation loc = lex . cur_location ( ) ;
lex . next ( ) ;
std : : vector < AnyV > instantiationTs ;
instantiationTs . push_back ( createV < ast_instantiationT_item > ( lex . cur_location ( ) , parse_type_from_tokens ( lex ) ) ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
instantiationTs . push_back ( createV < ast_instantiationT_item > ( lex . cur_location ( ) , parse_type_from_tokens ( lex ) ) ) ;
}
lex . expect ( tok_gt , " `>` " ) ;
return createV < ast_instantiationT_list > ( loc , std : : move ( instantiationTs ) ) ;
} catch ( const ParseError & ) {
lex . restore_position ( backup ) ;
return nullptr ;
}
}
2024-10-31 07:11:41 +00:00
// parse (expr) / [expr] / identifier / number
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr100 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
switch ( lex . tok ( ) ) {
case tok_oppar : {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_clpar ) {
lex . next ( ) ;
return createV < ast_tensor > ( loc , { } ) ;
}
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV first = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_clpar ) {
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
return createV < ast_parenthesized_expression > ( loc , first ) ;
2024-10-31 07:11:41 +00:00
}
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
std : : vector < AnyExprV > items ( 1 , first ) ;
2024-10-31 07:11:41 +00:00
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
items . emplace_back ( parse_expr ( lex ) ) ;
}
lex . expect ( tok_clpar , " `)` " ) ;
return createV < ast_tensor > ( loc , std : : move ( items ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_opbracket : {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_clbracket ) {
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return createV < ast_typed_tuple > ( loc , { } ) ;
2024-10-31 07:03:33 +00:00
}
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
std : : vector < AnyExprV > items ( 1 , parse_expr ( lex ) ) ;
2024-10-31 07:11:41 +00:00
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
items . emplace_back ( parse_expr ( lex ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clbracket , " `]` " ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return createV < ast_typed_tuple > ( loc , std : : move ( items ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_int_const : {
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
std : : string_view orig_str = lex . cur_str ( ) ;
td : : RefInt256 intval = td : : string_to_int256 ( static_cast < std : : string > ( orig_str ) ) ;
if ( intval . is_null ( ) | | ! intval - > signed_fits_bits ( 257 ) ) {
lex . error ( " invalid integer constant " ) ;
}
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
return createV < ast_int_const > ( loc , std : : move ( intval ) , orig_str ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_string_const : {
std : : string_view str_val = lex . cur_str ( ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
char modifier = 0 ;
if ( lex . tok ( ) = = tok_string_modifier ) {
modifier = lex . cur_str ( ) [ 0 ] ;
lex . next ( ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
return createV < ast_string_const > ( loc , str_val , modifier ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_underscore : {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
return createV < ast_underscore > ( loc ) ;
}
case tok_true : {
lex . next ( ) ;
return createV < ast_bool_const > ( loc , true ) ;
}
case tok_false : {
lex . next ( ) ;
return createV < ast_bool_const > ( loc , false ) ;
}
case tok_null : {
lex . next ( ) ;
return createV < ast_null_keyword > ( loc ) ;
}
2024-10-31 07:18:54 +00:00
case tok_self : {
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
auto v_ident = createV < ast_identifier > ( loc , " self " ) ;
return createV < ast_reference > ( loc , v_ident , nullptr ) ;
2024-10-31 07:18:54 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_identifier : {
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
auto v_ident = createV < ast_identifier > ( loc , lex . cur_str ( ) ) ;
V < ast_instantiationT_list > v_instantiationTs = nullptr ;
2024-10-31 07:11:41 +00:00
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
if ( lex . tok ( ) = = tok_lt ) {
v_instantiationTs = parse_maybe_instantiationTs_after_identifier ( lex ) ;
}
return createV < ast_reference > ( loc , v_ident , v_instantiationTs ) ;
2024-10-31 07:11:41 +00:00
}
2025-01-27 07:33:24 +00:00
default :
2024-10-31 07:11:41 +00:00
lex . unexpected ( " <expression> " ) ;
2024-10-31 07:03:33 +00:00
}
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
// parse E(...) (left-to-right)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr90 ( Lexer & lex ) {
AnyExprV res = parse_expr100 ( lex ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
while ( lex . tok ( ) = = tok_oppar ) {
res = createV < ast_function_call > ( res - > loc , res , parse_argument_list ( lex ) ) ;
2024-10-31 07:03:33 +00:00
}
return res ;
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
// parse E.field and E.method(...) (left-to-right)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr80 ( Lexer & lex ) {
AnyExprV lhs = parse_expr90 ( lex ) ;
2024-10-31 07:18:54 +00:00
while ( lex . tok ( ) = = tok_dot ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
V < ast_identifier > v_ident = nullptr ;
V < ast_instantiationT_list > v_instantiationTs = nullptr ;
2025-01-27 07:29:17 +00:00
if ( lex . tok ( ) = = tok_identifier ) { // obj.field / obj.method
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
v_ident = createV < ast_identifier > ( lex . cur_location ( ) , lex . cur_str ( ) ) ;
lex . next ( ) ;
if ( lex . tok ( ) = = tok_lt ) {
v_instantiationTs = parse_maybe_instantiationTs_after_identifier ( lex ) ;
}
2025-01-27 07:29:17 +00:00
} else if ( lex . tok ( ) = = tok_int_const ) { // obj.0 (indexed access)
v_ident = createV < ast_identifier > ( lex . cur_location ( ) , lex . cur_str ( ) ) ;
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
} else {
lex . unexpected ( " method name " ) ;
}
lhs = createV < ast_dot_access > ( loc , lhs , v_ident , v_instantiationTs ) ;
while ( lex . tok ( ) = = tok_oppar ) {
lhs = createV < ast_function_call > ( lex . cur_location ( ) , lhs , parse_argument_list ( lex ) ) ;
}
2024-10-31 07:03:33 +00:00
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse ! ~ - + E (unary)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr75 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
TokenType t = lex . tok ( ) ;
2024-10-31 07:11:41 +00:00
if ( t = = tok_logical_not | | t = = tok_bitwise_not | | t = = tok_minus | | t = = tok_plus ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV rhs = parse_expr75 ( lex ) ;
2024-10-31 07:03:33 +00:00
return createV < ast_unary_operator > ( loc , operator_name , t , rhs ) ;
}
2024-10-31 07:11:41 +00:00
return parse_expr80 ( lex ) ;
2024-10-31 07:03:33 +00:00
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
// parse E as <type>
static AnyExprV parse_expr40 ( Lexer & lex ) {
AnyExprV lhs = parse_expr75 ( lex ) ;
if ( lex . tok ( ) = = tok_as ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . next ( ) ;
TypePtr cast_to_type = parse_type_from_tokens ( lex ) ;
lhs = createV < ast_cast_as_operator > ( loc , lhs , cast_to_type ) ;
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E * / % ^/ ~/ E (left-to-right)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr30 ( Lexer & lex ) {
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
AnyExprV lhs = parse_expr40 ( lex ) ;
2024-10-31 07:03:33 +00:00
TokenType t = lex . tok ( ) ;
2024-10-31 07:11:41 +00:00
while ( t = = tok_mul | | t = = tok_div | | t = = tok_mod | | t = = tok_divC | | t = = tok_divR ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
AnyExprV rhs = parse_expr40 ( lex ) ;
2024-10-31 07:03:33 +00:00
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E + - E (left-to-right)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr20 ( Lexer & lex ) {
AnyExprV lhs = parse_expr30 ( lex ) ;
2024-10-31 07:03:33 +00:00
TokenType t = lex . tok ( ) ;
while ( t = = tok_minus | | t = = tok_plus ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV rhs = parse_expr30 ( lex ) ;
2024-10-31 07:03:33 +00:00
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E << >> ~>> ^>> E (left-to-right)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr17 ( Lexer & lex ) {
AnyExprV lhs = parse_expr20 ( lex ) ;
2024-10-31 07:03:33 +00:00
TokenType t = lex . tok ( ) ;
while ( t = = tok_lshift | | t = = tok_rshift | | t = = tok_rshiftC | | t = = tok_rshiftR ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV rhs = parse_expr20 ( lex ) ;
2024-10-31 07:03:33 +00:00
diagnose_addition_in_bitshift ( loc , operator_name , rhs ) ;
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E == < > <= >= != <=> E (left-to-right)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr15 ( Lexer & lex ) {
AnyExprV lhs = parse_expr17 ( lex ) ;
2024-10-31 07:03:33 +00:00
TokenType t = lex . tok ( ) ;
if ( t = = tok_eq | | t = = tok_lt | | t = = tok_gt | | t = = tok_leq | | t = = tok_geq | | t = = tok_neq | | t = = tok_spaceship ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV rhs = parse_expr17 ( lex ) ;
2024-10-31 07:03:33 +00:00
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
2024-10-31 07:11:41 +00:00
if ( t = = tok_eq | | t = = tok_neq ) {
lhs = maybe_replace_eq_null_with_isNull_call ( lhs - > as < ast_binary_operator > ( ) ) ;
}
2024-10-31 07:03:33 +00:00
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E & | ^ E (left-to-right)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr14 ( Lexer & lex ) {
AnyExprV lhs = parse_expr15 ( lex ) ;
2024-10-31 07:03:33 +00:00
TokenType t = lex . tok ( ) ;
while ( t = = tok_bitwise_and | | t = = tok_bitwise_or | | t = = tok_bitwise_xor ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV rhs = parse_expr15 ( lex ) ;
2024-10-31 07:03:33 +00:00
diagnose_bitwise_precedence ( loc , operator_name , lhs , rhs ) ;
2024-10-31 07:11:41 +00:00
diagnose_and_or_precedence ( loc , lhs , t , operator_name ) ;
2024-10-31 07:03:33 +00:00
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E && || E (left-to-right)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr13 ( Lexer & lex ) {
AnyExprV lhs = parse_expr14 ( lex ) ;
2024-10-31 07:11:41 +00:00
TokenType t = lex . tok ( ) ;
while ( t = = tok_logical_and | | t = = tok_logical_or ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
std : : string_view operator_name = lex . cur_str ( ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV rhs = parse_expr14 ( lex ) ;
2024-10-31 07:11:41 +00:00
diagnose_and_or_precedence ( loc , lhs , t , operator_name ) ;
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
return lhs ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
// parse E = += -= E and E ? E : E (right-to-left)
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_expr10 ( Lexer & lex ) {
AnyExprV lhs = parse_expr13 ( lex ) ;
2024-10-31 07:03:33 +00:00
TokenType t = lex . tok ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
if ( t = = tok_assign ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . next ( ) ;
AnyExprV rhs = parse_expr10 ( lex ) ;
return createV < ast_assign > ( loc , lhs , rhs ) ;
}
2024-10-31 07:11:41 +00:00
if ( t = = tok_set_plus | | t = = tok_set_minus | | t = = tok_set_mul | | t = = tok_set_div | |
t = = tok_set_mod | | t = = tok_set_lshift | | t = = tok_set_rshift | |
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
t = = tok_set_bitwise_and | | t = = tok_set_bitwise_or | | t = = tok_set_bitwise_xor ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
std : : string_view operator_name = lex . cur_str ( ) . substr ( 0 , lex . cur_str ( ) . size ( ) - 1 ) ; // "+" for +=
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV rhs = parse_expr10 ( lex ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return createV < ast_set_assign > ( loc , operator_name , t , lhs , rhs ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
if ( t = = tok_question ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV when_true = parse_expr10 ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_colon , " `:` " ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV when_false = parse_expr10 ( lex ) ;
2024-10-31 07:11:41 +00:00
return createV < ast_ternary_operator > ( loc , lhs , when_true , when_false ) ;
}
2024-10-31 07:03:33 +00:00
return lhs ;
}
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV parse_expr ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
return parse_expr10 ( lex ) ;
}
2024-10-31 07:11:41 +00:00
AnyV parse_statement ( Lexer & lex ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_var_declaration_lhs ( Lexer & lex , bool is_immutable ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_oppar ) {
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV first = parse_var_declaration_lhs ( lex , is_immutable ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_clpar ) {
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return first ;
2024-10-31 07:11:41 +00:00
}
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
std : : vector < AnyExprV > args ( 1 , first ) ;
2024-10-31 07:11:41 +00:00
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
2024-10-31 07:18:54 +00:00
args . push_back ( parse_var_declaration_lhs ( lex , is_immutable ) ) ;
2024-10-31 07:11:41 +00:00
}
lex . expect ( tok_clpar , " `)` " ) ;
return createV < ast_tensor > ( loc , std : : move ( args ) ) ;
}
if ( lex . tok ( ) = = tok_opbracket ) {
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
std : : vector < AnyExprV > args ( 1 , parse_var_declaration_lhs ( lex , is_immutable ) ) ;
2024-10-31 07:11:41 +00:00
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
2024-10-31 07:18:54 +00:00
args . push_back ( parse_var_declaration_lhs ( lex , is_immutable ) ) ;
2024-10-31 07:11:41 +00:00
}
lex . expect ( tok_clbracket , " `]` " ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return createV < ast_typed_tuple > ( loc , std : : move ( args ) ) ;
2024-10-31 07:11:41 +00:00
}
if ( lex . tok ( ) = = tok_identifier ) {
auto v_ident = createV < ast_identifier > ( loc , lex . cur_str ( ) ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
TypePtr declared_type = nullptr ;
2024-10-31 07:11:41 +00:00
bool marked_as_redef = false ;
lex . next ( ) ;
if ( lex . tok ( ) = = tok_colon ) {
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
declared_type = parse_type_from_tokens ( lex ) ;
2024-10-31 07:11:41 +00:00
} else if ( lex . tok ( ) = = tok_redef ) {
lex . next ( ) ;
marked_as_redef = true ;
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return createV < ast_local_var_lhs > ( loc , v_ident , declared_type , is_immutable , marked_as_redef ) ;
2024-10-31 07:11:41 +00:00
}
if ( lex . tok ( ) = = tok_underscore ) {
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
TypePtr declared_type = nullptr ;
2024-10-31 07:11:41 +00:00
lex . next ( ) ;
if ( lex . tok ( ) = = tok_colon ) {
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
declared_type = parse_type_from_tokens ( lex ) ;
2024-10-31 07:11:41 +00:00
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return createV < ast_local_var_lhs > ( loc , createV < ast_identifier > ( loc , " " ) , declared_type , true , false ) ;
2024-10-31 07:11:41 +00:00
}
lex . unexpected ( " variable name " ) ;
2024-10-31 07:03:33 +00:00
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
static AnyV parse_local_vars_declaration_assignment ( Lexer & lex ) {
2024-10-31 07:11:41 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:18:54 +00:00
bool is_immutable = lex . tok ( ) = = tok_val ;
2024-10-31 07:11:41 +00:00
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
AnyExprV lhs = createV < ast_local_vars_declaration > ( loc , parse_var_declaration_lhs ( lex , is_immutable ) ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) ! = tok_assign ) {
lex . error ( " variables declaration must be followed by assignment: `var xxx = ...` " ) ;
}
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
AnyExprV rhs = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_comma ) {
lex . error ( " multiple declarations are not allowed, split variables on separate lines " ) ;
}
lex . expect ( tok_semicolon , " `;` " ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return createV < ast_assign > ( loc , lhs , rhs ) ;
2024-10-31 07:11:41 +00:00
}
2024-10-31 07:03:33 +00:00
2024-10-31 07:04:58 +00:00
static V < ast_sequence > parse_sequence ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_opbrace , " `{` " ) ;
2024-10-31 07:03:33 +00:00
std : : vector < AnyV > items ;
while ( lex . tok ( ) ! = tok_clbrace ) {
items . push_back ( parse_statement ( lex ) ) ;
}
SrcLocation loc_end = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clbrace , " `}` " ) ;
2024-10-31 07:03:33 +00:00
return createV < ast_sequence > ( loc , loc_end , items ) ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_return_statement ( Lexer & lex ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_return , " `return` " ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
AnyExprV child = lex . tok ( ) = = tok_semicolon // `return;` actually means "nothing" (inferred as void)
? createV < ast_empty_expression > ( lex . cur_location ( ) )
2024-10-31 07:11:41 +00:00
: parse_expr ( lex ) ;
lex . expect ( tok_semicolon , " `;` " ) ;
return createV < ast_return_statement > ( loc , child ) ;
}
2025-01-13 08:21:24 +00:00
static AnyV parse_if_statement ( Lexer & lex ) {
2024-10-31 07:11:41 +00:00
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_if , " `if` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV cond = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
V < ast_sequence > if_body = parse_sequence ( lex ) ;
V < ast_sequence > else_body = nullptr ;
if ( lex . tok ( ) = = tok_else ) { // else if(e) { } or else { }
lex . next ( ) ;
if ( lex . tok ( ) = = tok_if ) {
2025-01-13 08:21:24 +00:00
AnyV v_inner_if = parse_if_statement ( lex ) ;
2024-10-31 07:11:41 +00:00
else_body = createV < ast_sequence > ( v_inner_if - > loc , lex . cur_location ( ) , { v_inner_if } ) ;
} else {
else_body = parse_sequence ( lex ) ;
}
} else { // no 'else', create empty block
else_body = createV < ast_sequence > ( lex . cur_location ( ) , lex . cur_location ( ) , { } ) ;
}
2025-01-13 08:21:24 +00:00
return createV < ast_if_statement > ( loc , false , cond , if_body , else_body ) ;
2024-10-31 07:11:41 +00:00
}
2024-10-31 07:04:58 +00:00
static AnyV parse_repeat_statement ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_repeat , " `repeat` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV cond = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
2024-10-31 07:03:33 +00:00
V < ast_sequence > body = parse_sequence ( lex ) ;
return createV < ast_repeat_statement > ( loc , cond , body ) ;
}
2024-10-31 07:04:58 +00:00
static AnyV parse_while_statement ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_while , " `while` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV cond = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
2024-10-31 07:03:33 +00:00
V < ast_sequence > body = parse_sequence ( lex ) ;
return createV < ast_while_statement > ( loc , cond , body ) ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_do_while_statement ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_do , " `do` " ) ;
2024-10-31 07:03:33 +00:00
V < ast_sequence > body = parse_sequence ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_while , " `while` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV cond = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
lex . expect ( tok_semicolon , " `;` " ) ;
return createV < ast_do_while_statement > ( loc , body , cond ) ;
2024-10-31 07:03:33 +00:00
}
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
static AnyExprV parse_catch_variable ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_identifier ) {
std : : string_view var_name = lex . cur_str ( ) ;
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
auto v_ident = createV < ast_identifier > ( loc , var_name ) ;
return createV < ast_reference > ( loc , v_ident , nullptr ) ;
2024-10-31 07:11:41 +00:00
}
if ( lex . tok ( ) = = tok_underscore ) {
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
auto v_ident = createV < ast_identifier > ( loc , " " ) ;
return createV < ast_reference > ( loc , v_ident , nullptr ) ;
2024-10-31 07:11:41 +00:00
}
lex . unexpected ( " identifier " ) ;
2024-10-31 07:03:33 +00:00
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
static AnyExprV create_catch_underscore_variable ( const Lexer & lex ) {
auto v_ident = createV < ast_identifier > ( lex . cur_location ( ) , " " ) ;
return createV < ast_reference > ( lex . cur_location ( ) , v_ident , nullptr ) ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_throw_statement ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_throw , " `throw` " ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV thrown_code , thrown_arg ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_oppar ) { // throw (code) or throw (code, arg)
lex . next ( ) ;
thrown_code = parse_expr ( lex ) ;
if ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
thrown_arg = parse_expr ( lex ) ;
} else {
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
thrown_arg = createV < ast_empty_expression > ( loc ) ;
2024-10-31 07:11:41 +00:00
}
lex . expect ( tok_clpar , " `)` " ) ;
} else { // throw code
thrown_code = parse_expr ( lex ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
thrown_arg = createV < ast_empty_expression > ( loc ) ;
2024-10-31 07:11:41 +00:00
}
lex . expect ( tok_semicolon , " `;` " ) ;
return createV < ast_throw_statement > ( loc , thrown_code , thrown_arg ) ;
}
static AnyV parse_assert_statement ( Lexer & lex ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_assert , " `assert` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV cond = parse_expr ( lex ) ;
AnyExprV thrown_code ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_comma ) { // assert(cond, code)
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
thrown_code = parse_expr ( lex ) ;
lex . expect ( tok_clpar , " `)` " ) ;
} else { // assert(cond) throw code
lex . expect ( tok_clpar , " `)` " ) ;
lex . expect ( tok_throw , " `throw excNo` after assert " ) ;
thrown_code = parse_expr ( lex ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_semicolon , " `;` " ) ;
return createV < ast_assert_statement > ( loc , cond , thrown_code ) ;
}
static AnyV parse_try_catch_statement ( Lexer & lex ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_try , " `try` " ) ;
V < ast_sequence > try_body = parse_sequence ( lex ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
std : : vector < AnyExprV > catch_args ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_catch , " `catch` " ) ;
SrcLocation catch_loc = lex . cur_location ( ) ;
if ( lex . tok ( ) = = tok_oppar ) {
lex . next ( ) ;
catch_args . push_back ( parse_catch_variable ( lex ) ) ;
if ( lex . tok ( ) = = tok_comma ) { // catch (excNo, arg)
lex . next ( ) ;
catch_args . push_back ( parse_catch_variable ( lex ) ) ;
} else { // catch (excNo) -> catch (excNo, _)
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
catch_args . push_back ( create_catch_underscore_variable ( lex ) ) ;
2024-10-31 07:11:41 +00:00
}
lex . expect ( tok_clpar , " `)` " ) ;
} else { // catch -> catch (_, _)
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
catch_args . push_back ( create_catch_underscore_variable ( lex ) ) ;
catch_args . push_back ( create_catch_underscore_variable ( lex ) ) ;
2024-10-31 07:11:41 +00:00
}
V < ast_tensor > catch_expr = createV < ast_tensor > ( catch_loc , std : : move ( catch_args ) ) ;
V < ast_sequence > catch_body = parse_sequence ( lex ) ;
return createV < ast_try_catch_statement > ( loc , try_body , catch_expr , catch_body ) ;
2024-10-31 07:03:33 +00:00
}
AnyV parse_statement ( Lexer & lex ) {
switch ( lex . tok ( ) ) {
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
case tok_var : // `var x = 0` is technically an expression, but can not appear in "any place",
case tok_val : // only as a separate declaration
return parse_local_vars_declaration_assignment ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_opbrace :
return parse_sequence ( lex ) ;
2024-10-31 07:11:41 +00:00
case tok_return :
return parse_return_statement ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_if :
2025-01-13 08:21:24 +00:00
return parse_if_statement ( lex ) ;
2024-10-31 07:11:41 +00:00
case tok_repeat :
return parse_repeat_statement ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_do :
2024-10-31 07:11:41 +00:00
return parse_do_while_statement ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_while :
return parse_while_statement ( lex ) ;
2024-10-31 07:11:41 +00:00
case tok_throw :
return parse_throw_statement ( lex ) ;
case tok_assert :
return parse_assert_statement ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_try :
return parse_try_catch_statement ( lex ) ;
case tok_semicolon : {
2024-10-31 07:04:58 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
return createV < ast_empty_statement > ( loc ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_break :
case tok_continue :
lex . error ( " break/continue from loops are not supported yet " ) ;
2024-10-31 07:03:33 +00:00
default : {
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
AnyExprV expr = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:03:33 +00:00
return expr ;
}
}
}
2024-10-31 07:04:58 +00:00
static AnyV parse_func_body ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
return parse_sequence ( lex ) ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_asm_func_body ( Lexer & lex , V < ast_parameter_list > param_list ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_asm , " `asm` " ) ;
size_t n_params = param_list - > size ( ) ;
if ( n_params > 16 ) {
2024-10-31 07:03:33 +00:00
throw ParseError { loc , " assembler built-in function can have at most 16 arguments " } ;
}
std : : vector < int > arg_order , ret_order ;
if ( lex . tok ( ) = = tok_oppar ) {
lex . next ( ) ;
2024-10-31 07:18:54 +00:00
while ( lex . tok ( ) = = tok_identifier | | lex . tok ( ) = = tok_self ) {
2024-10-31 07:11:41 +00:00
int arg_idx = param_list - > lookup_idx ( lex . cur_str ( ) ) ;
2024-10-31 07:03:33 +00:00
if ( arg_idx = = - 1 ) {
2024-10-31 07:18:54 +00:00
lex . unexpected ( " parameter name " ) ;
2024-10-31 07:03:33 +00:00
}
arg_order . push_back ( arg_idx ) ;
lex . next ( ) ;
}
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_arrow ) {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
while ( lex . tok ( ) = = tok_int_const ) {
int ret_idx = std : : atoi ( static_cast < std : : string > ( lex . cur_str ( ) ) . c_str ( ) ) ;
ret_order . push_back ( ret_idx ) ;
lex . next ( ) ;
}
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
2024-10-31 07:03:33 +00:00
}
std : : vector < AnyV > asm_commands ;
lex . check ( tok_string_const , " \" ASM COMMAND \" " ) ;
while ( lex . tok ( ) = = tok_string_const ) {
std : : string_view asm_command = lex . cur_str ( ) ;
asm_commands . push_back ( createV < ast_string_const > ( lex . cur_location ( ) , asm_command , 0 ) ) ;
lex . next ( ) ;
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:03:33 +00:00
return createV < ast_asm_body > ( loc , std : : move ( arg_order ) , std : : move ( ret_order ) , std : : move ( asm_commands ) ) ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_genericsT_list ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
std : : vector < AnyV > genericsT_items ;
lex . expect ( tok_lt , " `<` " ) ;
2024-10-31 07:03:33 +00:00
while ( true ) {
2024-10-31 07:11:41 +00:00
lex . check ( tok_identifier , " T " ) ;
2024-10-31 07:03:33 +00:00
std : : string_view nameT = lex . cur_str ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
genericsT_items . emplace_back ( createV < ast_genericsT_item > ( lex . cur_location ( ) , nameT ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
if ( lex . tok ( ) ! = tok_comma ) {
break ;
}
lex . next ( ) ;
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_gt , " `>` " ) ;
return createV < ast_genericsT_list > { loc , std : : move ( genericsT_items ) } ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static V < ast_annotation > parse_annotation ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . check ( tok_annotation_at , " `@` " ) ;
std : : string_view name = lex . cur_str ( ) ;
AnnotationKind kind = Vertex < ast_annotation > : : parse_kind ( name ) ;
lex . next ( ) ;
V < ast_tensor > v_arg = nullptr ;
if ( lex . tok ( ) = = tok_oppar ) {
SrcLocation loc_args = lex . cur_location ( ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
std : : vector < AnyExprV > args ;
2024-10-31 07:11:41 +00:00
args . push_back ( parse_expr ( lex ) ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
args . push_back ( parse_expr ( lex ) ) ;
}
lex . expect ( tok_clpar , " `)` " ) ;
v_arg = createV < ast_tensor > ( loc_args , std : : move ( args ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
switch ( kind ) {
case AnnotationKind : : unknown :
throw ParseError ( loc , " unknown annotation " + static_cast < std : : string > ( name ) ) ;
case AnnotationKind : : inline_simple :
case AnnotationKind : : inline_ref :
case AnnotationKind : : pure :
case AnnotationKind : : deprecated :
if ( v_arg ) {
throw ParseError ( v_arg - > loc , " arguments aren't allowed for " + static_cast < std : : string > ( name ) ) ;
}
v_arg = createV < ast_tensor > ( loc , { } ) ;
break ;
case AnnotationKind : : method_id :
if ( ! v_arg | | v_arg - > size ( ) ! = 1 | | v_arg - > get_item ( 0 ) - > type ! = ast_int_const ) {
throw ParseError ( loc , " expecting `(number) ` after " + static_cast<std::string>(name)) ;
}
break ;
}
return createV < ast_annotation > ( loc , kind , v_arg ) ;
}
static AnyV parse_function_declaration ( Lexer & lex , const std : : vector < V < ast_annotation > > & annotations ) {
SrcLocation loc = lex . cur_location ( ) ;
bool is_get_method = lex . tok ( ) = = tok_get ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
if ( is_get_method & & lex . tok ( ) = = tok_fun ) {
lex . next ( ) ; // 'get f()' and 'get fun f()' both correct
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
lex . check ( tok_identifier , " function name identifier " ) ;
std : : string_view f_name = lex . cur_str ( ) ;
bool is_entrypoint =
f_name = = " main " | | f_name = = " onInternalMessage " | | f_name = = " onExternalMessage " | |
f_name = = " onRunTickTock " | | f_name = = " onSplitPrepare " | | f_name = = " onSplitInstall " ;
bool is_FunC_entrypoint =
f_name = = " recv_internal " | | f_name = = " recv_external " | |
f_name = = " run_ticktock " | | f_name = = " split_prepare " | | f_name = = " split_install " ;
if ( is_FunC_entrypoint ) {
lex . error ( " this is a reserved FunC/Fift identifier; you need `onInternalMessage` " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
auto v_ident = createV < ast_identifier > ( lex . cur_location ( ) , f_name ) ;
lex . next ( ) ;
V < ast_genericsT_list > genericsT_list = nullptr ;
if ( lex . tok ( ) = = tok_lt ) { // 'fun f<T1,T2>'
genericsT_list = parse_genericsT_list ( lex ) - > as < ast_genericsT_list > ( ) ;
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
V < ast_parameter_list > v_param_list = parse_parameter_list ( lex ) - > as < ast_parameter_list > ( ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
bool accepts_self = ! v_param_list - > empty ( ) & & v_param_list - > get_param ( 0 ) - > param_name = = " self " ;
2024-10-31 07:18:54 +00:00
int n_mutate_params = v_param_list - > get_mutate_params_count ( ) ;
2024-10-31 07:11:41 +00:00
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
TypePtr ret_type = nullptr ;
2024-10-31 07:18:54 +00:00
bool returns_self = false ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_colon ) { // : <ret_type> (if absent, it means "auto infer", not void)
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:18:54 +00:00
if ( lex . tok ( ) = = tok_self ) {
if ( ! accepts_self ) {
lex . error ( " only a member function can return `self` (which accepts `self` first parameter) " ) ;
}
lex . next ( ) ;
returns_self = true ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
ret_type = TypeDataVoid : : create ( ) ;
2024-10-31 07:18:54 +00:00
} else {
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
ret_type = parse_type_from_tokens ( lex ) ;
2024-10-31 07:18:54 +00:00
}
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:18:54 +00:00
if ( is_entrypoint & & ( is_get_method | | genericsT_list | | n_mutate_params | | accepts_self ) ) {
2024-10-31 07:11:41 +00:00
throw ParseError ( loc , " invalid declaration of a reserved function " ) ;
}
2024-10-31 07:18:54 +00:00
if ( is_get_method & & ( genericsT_list | | n_mutate_params | | accepts_self ) ) {
throw ParseError ( loc , " get methods can't have `mutate` and `self` params " ) ;
}
2024-10-31 07:11:41 +00:00
AnyV v_body = nullptr ;
2024-10-31 07:03:33 +00:00
if ( lex . tok ( ) = = tok_builtin ) {
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
v_body = createV < ast_empty_statement > ( lex . cur_location ( ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:03:33 +00:00
} else if ( lex . tok ( ) = = tok_opbrace ) {
2024-10-31 07:11:41 +00:00
v_body = parse_func_body ( lex ) ;
2024-10-31 07:03:33 +00:00
} else if ( lex . tok ( ) = = tok_asm ) {
2024-10-31 07:11:41 +00:00
if ( ! ret_type ) {
lex . error ( " asm function must specify return type " ) ;
}
2024-10-31 07:18:54 +00:00
v_body = parse_asm_func_body ( lex , v_param_list ) ;
2024-10-31 07:03:33 +00:00
} else {
2024-10-31 07:11:41 +00:00
lex . unexpected ( " { function body } " ) ;
2024-10-31 07:03:33 +00:00
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
int flags = 0 ;
if ( is_entrypoint ) {
flags | = FunctionData : : flagIsEntrypoint ;
}
if ( is_get_method ) {
flags | = FunctionData : : flagGetMethod ;
}
if ( accepts_self ) {
flags | = FunctionData : : flagAcceptsSelf ;
}
if ( returns_self ) {
flags | = FunctionData : : flagReturnsSelf ;
}
2024-10-31 07:11:41 +00:00
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
td : : RefInt256 method_id ;
2024-10-31 07:11:41 +00:00
for ( auto v_annotation : annotations ) {
switch ( v_annotation - > kind ) {
case AnnotationKind : : inline_simple :
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
flags | = FunctionData : : flagInline ;
2024-10-31 07:11:41 +00:00
break ;
case AnnotationKind : : inline_ref :
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
flags | = FunctionData : : flagInlineRef ;
2024-10-31 07:11:41 +00:00
break ;
case AnnotationKind : : pure :
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
flags | = FunctionData : : flagMarkedAsPure ;
2024-10-31 07:11:41 +00:00
break ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
case AnnotationKind : : method_id : {
2024-10-31 07:18:54 +00:00
if ( is_get_method | | genericsT_list | | is_entrypoint | | n_mutate_params | | accepts_self ) {
2024-10-31 07:11:41 +00:00
v_annotation - > error ( " @method_id can be specified only for regular functions " ) ;
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
auto v_int = v_annotation - > get_arg ( ) - > get_item ( 0 ) - > as < ast_int_const > ( ) ;
if ( v_int - > intval . is_null ( ) | | ! v_int - > intval - > signed_fits_bits ( 32 ) ) {
v_int - > error ( " invalid integer constant " ) ;
}
method_id = v_int - > intval ;
2024-10-31 07:11:41 +00:00
break ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
}
2024-10-31 07:11:41 +00:00
case AnnotationKind : : deprecated :
// no special handling
break ;
default :
v_annotation - > error ( " this annotation is not applicable to functions " ) ;
}
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return createV < ast_function_declaration > ( loc , v_ident , v_param_list , v_body , ret_type , genericsT_list , std : : move ( method_id ) , flags ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_tolk_required_version ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . next_special ( tok_semver , " semver " ) ; // syntax: "tolk 0.6"
std : : string semver = static_cast < std : : string > ( lex . cur_str ( ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
// for simplicity, there is no syntax ">= version" and so on, just strict compare
if ( TOLK_VERSION ! = semver & & TOLK_VERSION ! = semver + " .0 " ) { // 0.6 = 0.6.0
loc . show_warning ( " the contract is written in Tolk v " + semver + " , but you use Tolk compiler v " + TOLK_VERSION + " ; probably, it will lead to compilation errors or hash changes " ) ;
}
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
return createV < ast_tolk_required_version > ( loc , semver ) ; // semicolon is not necessary
2024-10-31 07:03:33 +00:00
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
static AnyV parse_import_directive ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_import , " `import` " ) ;
2024-10-31 07:03:33 +00:00
lex . check ( tok_string_const , " source file name " ) ;
std : : string_view rel_filename = lex . cur_str ( ) ;
if ( rel_filename . empty ( ) ) {
lex . error ( " imported file name is an empty string " ) ;
}
2024-10-31 07:04:58 +00:00
auto v_str = createV < ast_string_const > ( lex . cur_location ( ) , rel_filename , 0 ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
return createV < ast_import_directive > ( loc , v_str ) ; // semicolon is not necessary
2024-10-31 07:03:33 +00:00
}
// the main (exported) function
2024-10-31 07:04:58 +00:00
AnyV parse_src_file_to_ast ( const SrcFile * file ) {
2024-10-31 07:03:33 +00:00
std : : vector < AnyV > toplevel_declarations ;
2024-10-31 07:11:41 +00:00
std : : vector < V < ast_annotation > > annotations ;
2024-10-31 07:03:33 +00:00
Lexer lex ( file ) ;
2024-10-31 07:11:41 +00:00
2024-10-31 07:03:33 +00:00
while ( ! lex . is_eof ( ) ) {
2024-10-31 07:11:41 +00:00
switch ( lex . tok ( ) ) {
case tok_tolk :
if ( ! annotations . empty ( ) ) {
lex . unexpected ( " declaration after @annotations " ) ;
}
toplevel_declarations . push_back ( parse_tolk_required_version ( lex ) ) ;
break ;
case tok_import :
if ( ! annotations . empty ( ) ) {
lex . unexpected ( " declaration after @annotations " ) ;
}
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
toplevel_declarations . push_back ( parse_import_directive ( lex ) ) ;
2024-10-31 07:11:41 +00:00
break ;
case tok_semicolon :
if ( ! annotations . empty ( ) ) {
lex . unexpected ( " declaration after @annotations " ) ;
}
lex . next ( ) ; // don't add ast_empty, no need
break ;
case tok_annotation_at :
annotations . push_back ( parse_annotation ( lex ) ) ;
break ;
case tok_global :
toplevel_declarations . push_back ( parse_global_var_declaration ( lex , annotations ) ) ;
annotations . clear ( ) ;
break ;
case tok_const :
toplevel_declarations . push_back ( parse_constant_declaration ( lex , annotations ) ) ;
annotations . clear ( ) ;
break ;
case tok_fun :
case tok_get :
toplevel_declarations . push_back ( parse_function_declaration ( lex , annotations ) ) ;
annotations . clear ( ) ;
break ;
case tok_export :
case tok_struct :
case tok_enum :
case tok_operator :
case tok_infix :
lex . error ( " ` " + static_cast < std : : string > ( lex . cur_str ( ) ) + " ` is not supported yet " ) ;
default :
lex . unexpected ( " fun or get " ) ;
2024-10-31 07:03:33 +00:00
}
}
2024-10-31 07:11:41 +00:00
2024-10-31 07:03:33 +00:00
return createV < ast_tolk_file > ( file , std : : move ( toplevel_declarations ) ) ;
}
} // namespace tolk