2024-10-31 06:51:07 +00:00
/*
This file is part of TON Blockchain Library .
TON Blockchain Library is free software : you can redistribute it and / or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation , either version 2 of the License , or
( at your option ) any later version .
TON Blockchain Library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library . If not , see < http : //www.gnu.org/licenses/>.
*/
# include "tolk.h"
2024-10-31 07:02:01 +00:00
# include "compiler-state.h"
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
# include "type-system.h"
2024-10-31 06:51:07 +00:00
namespace tolk {
/*
*
* ABSTRACT CODE
*
*/
2025-01-27 07:29:17 +00:00
void TmpVar : : show_as_stack_comment ( std : : ostream & os ) const {
if ( ! name . empty ( ) ) {
os < < name ;
} else {
os < < ' \' ' < < ir_idx ;
}
# ifdef TOLK_DEBUG
// uncomment for detailed stack output, like `'15(binary-op) '16(glob-var)`
// if (desc) os << desc;
# endif
2024-10-31 06:51:07 +00:00
}
2025-01-27 07:29:17 +00:00
void TmpVar : : show ( std : : ostream & os ) const {
os < < ' \' ' < < ir_idx ; // vars are printed out as `'1 '2` (in stack comments, debug info, etc.)
if ( ! name . empty ( ) ) {
os < < ' _ ' < < name ;
}
# ifdef TOLK_DEBUG
if ( desc ) {
os < < ' ' < < desc ; // "origin" of implicitly created tmp var, like `'15 (binary-op) '16 (glob-var)`
2024-10-31 06:51:07 +00:00
}
2025-01-27 07:29:17 +00:00
# endif
2024-10-31 06:51:07 +00:00
}
std : : ostream & operator < < ( std : : ostream & os , const TmpVar & var ) {
var . show ( os ) ;
return os ;
}
void VarDescr : : show_value ( std : : ostream & os ) const {
if ( val & _Int ) {
os < < ' i ' ;
}
if ( val & _Const ) {
os < < ' c ' ;
}
if ( val & _Zero ) {
os < < ' 0 ' ;
}
if ( val & _NonZero ) {
os < < ' ! ' ;
}
if ( val & _Pos ) {
os < < ' > ' ;
}
if ( val & _Neg ) {
os < < ' < ' ;
}
if ( val & _Even ) {
os < < ' E ' ;
}
if ( val & _Odd ) {
os < < ' O ' ;
}
if ( val & _Finite ) {
os < < ' f ' ;
}
if ( val & _Nan ) {
os < < ' N ' ;
}
if ( int_const . not_null ( ) ) {
os < < ' = ' < < int_const ;
}
}
void VarDescr : : show ( std : : ostream & os , const char * name ) const {
if ( flags & _Last ) {
os < < ' * ' ;
}
if ( flags & _Unused ) {
os < < ' ? ' ;
}
if ( name ) {
os < < name ;
}
2025-01-27 07:29:17 +00:00
os < < ' \' ' < < idx ;
2024-10-31 06:51:07 +00:00
show_value ( os ) ;
}
void VarDescr : : set_const ( long long value ) {
return set_const ( td : : make_refint ( value ) ) ;
}
void VarDescr : : set_const ( td : : RefInt256 value ) {
int_const = std : : move ( value ) ;
if ( ! int_const - > signed_fits_bits ( 257 ) ) {
int_const . write ( ) . invalidate ( ) ;
}
val = _Const | _Int ;
int s = sgn ( int_const ) ;
if ( s < - 1 ) {
val | = _Nan | _NonZero ;
} else if ( s < 0 ) {
val | = _NonZero | _Neg | _Finite ;
} else if ( s > 0 ) {
val | = _NonZero | _Pos | _Finite ;
2024-10-31 07:11:41 +00:00
} else {
val | = _Zero | _Neg | _Pos | _Finite ;
2024-10-31 06:51:07 +00:00
}
if ( val & _Finite ) {
val | = int_const - > get_bit ( 0 ) ? _Odd : _Even ;
}
}
void VarDescr : : set_const ( std : : string value ) {
str_const = value ;
val = _Const ;
}
void VarDescr : : operator | = ( const VarDescr & y ) {
val & = y . val ;
if ( is_int_const ( ) & & y . is_int_const ( ) & & cmp ( int_const , y . int_const ) ! = 0 ) {
val & = ~ _Const ;
}
if ( ! ( val & _Const ) ) {
int_const . clear ( ) ;
}
}
void VarDescr : : operator & = ( const VarDescr & y ) {
val | = y . val ;
if ( y . int_const . not_null ( ) & & int_const . is_null ( ) ) {
int_const = y . int_const ;
}
}
void VarDescr : : set_value ( const VarDescr & y ) {
val = y . val ;
int_const = y . int_const ;
}
void VarDescr : : set_value ( VarDescr & & y ) {
val = y . val ;
int_const = std : : move ( y . int_const ) ;
}
void VarDescr : : clear_value ( ) {
val = 0 ;
int_const . clear ( ) ;
}
void VarDescrList : : show ( std : : ostream & os ) const {
if ( unreachable ) {
os < < " <unreachable> " ;
}
os < < " [ " ;
for ( const auto & v : list ) {
os < < ' ' < < v ;
}
os < < " ] \n " ;
}
void Op : : show ( std : : ostream & os , const std : : vector < TmpVar > & vars , std : : string pfx , int mode ) const {
if ( mode & 2 ) {
os < < pfx < < " [ " ;
for ( const auto & v : var_info . list ) {
os < < ' ' ;
if ( v . flags & VarDescr : : _Last ) {
os < < ' * ' ;
}
if ( v . flags & VarDescr : : _Unused ) {
os < < ' ? ' ;
}
os < < vars [ v . idx ] ;
if ( mode & 4 ) {
os < < ' : ' ;
v . show_value ( os ) ;
}
}
os < < " ] \n " ;
}
std : : string dis = disabled ( ) ? " <disabled> " : " " ;
if ( noreturn ( ) ) {
dis + = " <noret> " ;
}
2024-10-31 06:54:05 +00:00
if ( impure ( ) ) {
2024-10-31 06:51:07 +00:00
dis + = " <impure> " ;
}
switch ( cl ) {
case _Undef :
os < < pfx < < dis < < " ??? \n " ;
break ;
case _Nop :
os < < pfx < < dis < < " NOP \n " ;
break ;
case _Call :
os < < pfx < < dis < < " CALL: " ;
show_var_list ( os , left , vars ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
os < < " := " < < ( f_sym ? f_sym - > name : " (null) " ) < < " " ;
2024-10-31 06:51:07 +00:00
if ( ( mode & 4 ) & & args . size ( ) = = right . size ( ) ) {
show_var_list ( os , args , vars ) ;
} else {
show_var_list ( os , right , vars ) ;
}
os < < std : : endl ;
break ;
case _CallInd :
os < < pfx < < dis < < " CALLIND: " ;
show_var_list ( os , left , vars ) ;
os < < " := EXEC " ;
show_var_list ( os , right , vars ) ;
os < < std : : endl ;
break ;
case _Let :
os < < pfx < < dis < < " LET " ;
show_var_list ( os , left , vars ) ;
os < < " := " ;
show_var_list ( os , right , vars ) ;
os < < std : : endl ;
break ;
case _Tuple :
os < < pfx < < dis < < " MKTUPLE " ;
show_var_list ( os , left , vars ) ;
os < < " := " ;
show_var_list ( os , right , vars ) ;
os < < std : : endl ;
break ;
case _UnTuple :
os < < pfx < < dis < < " UNTUPLE " ;
show_var_list ( os , left , vars ) ;
os < < " := " ;
show_var_list ( os , right , vars ) ;
os < < std : : endl ;
break ;
case _IntConst :
os < < pfx < < dis < < " CONST " ;
show_var_list ( os , left , vars ) ;
os < < " := " < < int_const < < std : : endl ;
break ;
case _SliceConst :
os < < pfx < < dis < < " SCONST " ;
show_var_list ( os , left , vars ) ;
os < < " := " < < str_const < < std : : endl ;
break ;
case _Import :
os < < pfx < < dis < < " IMPORT " ;
show_var_list ( os , left , vars ) ;
os < < std : : endl ;
break ;
case _Return :
os < < pfx < < dis < < " RETURN " ;
show_var_list ( os , left , vars ) ;
os < < std : : endl ;
break ;
case _GlobVar :
os < < pfx < < dis < < " GLOBVAR " ;
show_var_list ( os , left , vars ) ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
os < < " := " < < ( g_sym ? g_sym - > name : " (null) " ) < < std : : endl ;
2024-10-31 06:51:07 +00:00
break ;
case _SetGlob :
os < < pfx < < dis < < " SETGLOB " ;
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
os < < ( g_sym ? g_sym - > name : " (null) " ) < < " := " ;
2024-10-31 06:51:07 +00:00
show_var_list ( os , right , vars ) ;
os < < std : : endl ;
break ;
case _Repeat :
os < < pfx < < dis < < " REPEAT " ;
show_var_list ( os , left , vars ) ;
os < < ' ' ;
show_block ( os , block0 . get ( ) , vars , pfx , mode ) ;
os < < std : : endl ;
break ;
case _If :
os < < pfx < < dis < < " IF " ;
show_var_list ( os , left , vars ) ;
os < < ' ' ;
show_block ( os , block0 . get ( ) , vars , pfx , mode ) ;
os < < " ELSE " ;
show_block ( os , block1 . get ( ) , vars , pfx , mode ) ;
os < < std : : endl ;
break ;
case _While :
os < < pfx < < dis < < " WHILE " ;
show_var_list ( os , left , vars ) ;
os < < ' ' ;
show_block ( os , block0 . get ( ) , vars , pfx , mode ) ;
os < < " DO " ;
show_block ( os , block1 . get ( ) , vars , pfx , mode ) ;
os < < std : : endl ;
break ;
case _Until :
os < < pfx < < dis < < " UNTIL " ;
show_var_list ( os , left , vars ) ;
os < < ' ' ;
show_block ( os , block0 . get ( ) , vars , pfx , mode ) ;
os < < std : : endl ;
break ;
case _Again :
os < < pfx < < dis < < " AGAIN " ;
show_var_list ( os , left , vars ) ;
os < < ' ' ;
show_block ( os , block0 . get ( ) , vars , pfx , mode ) ;
os < < std : : endl ;
break ;
default :
os < < pfx < < dis < < " <??? " < < cl < < " > " ;
show_var_list ( os , left , vars ) ;
os < < " -- " ;
show_var_list ( os , right , vars ) ;
os < < std : : endl ;
break ;
}
}
void Op : : show_var_list ( std : : ostream & os , const std : : vector < var_idx_t > & idx_list ,
const std : : vector < TmpVar > & vars ) const {
if ( ! idx_list . size ( ) ) {
os < < " () " ;
} else if ( idx_list . size ( ) = = 1 ) {
os < < vars . at ( idx_list [ 0 ] ) ;
} else {
os < < " ( " < < vars . at ( idx_list [ 0 ] ) ;
for ( std : : size_t i = 1 ; i < idx_list . size ( ) ; i + + ) {
2025-01-27 07:29:17 +00:00
os < < " , " < < vars . at ( idx_list [ i ] ) ;
2024-10-31 06:51:07 +00:00
}
os < < " ) " ;
}
}
void Op : : show_var_list ( std : : ostream & os , const std : : vector < VarDescr > & list , const std : : vector < TmpVar > & vars ) const {
auto n = list . size ( ) ;
if ( ! n ) {
os < < " () " ;
} else {
os < < " ( " ;
for ( std : : size_t i = 0 ; i < list . size ( ) ; i + + ) {
if ( i ) {
os < < " , " ;
}
if ( list [ i ] . is_unused ( ) ) {
os < < ' ? ' ;
}
os < < vars . at ( list [ i ] . idx ) < < ' : ' ;
list [ i ] . show_value ( os ) ;
}
os < < " ) " ;
}
}
void Op : : show_block ( std : : ostream & os , const Op * block , const std : : vector < TmpVar > & vars , std : : string pfx , int mode ) {
os < < " { " < < std : : endl ;
std : : string pfx2 = pfx + " " ;
for ( const Op & op : block ) {
op . show ( os , vars , pfx2 , mode ) ;
}
os < < pfx < < " } " ;
}
std : : ostream & operator < < ( std : : ostream & os , const CodeBlob & code ) {
code . print ( os ) ;
return os ;
}
// flags: +1 = show variable definition locations; +2 = show vars after each op; +4 = show var abstract value info after each op; +8 = show all variables at start
void CodeBlob : : print ( std : : ostream & os , int flags ) const {
os < < " CODE BLOB: " < < var_cnt < < " variables, " < < in_var_cnt < < " input \n " ;
if ( ( flags & 8 ) ! = 0 ) {
for ( const auto & var : vars ) {
2025-01-27 07:29:17 +00:00
var . show ( os ) ;
os < < " : " < < var . v_type < < std : : endl ;
if ( var . loc . is_defined ( ) & & ( flags & 1 ) ! = 0 ) {
var . loc . show ( os ) ;
2024-10-31 06:51:07 +00:00
os < < " defined here: \n " ;
2025-01-27 07:29:17 +00:00
var . loc . show_context ( os ) ;
2024-10-31 06:51:07 +00:00
}
}
}
os < < " ------- BEGIN -------- \n " ;
for ( const auto & op : ops ) {
op . show ( os , vars , " " , flags ) ;
}
os < < " -------- END --------- \n \n " ;
}
2025-01-27 07:29:17 +00:00
std : : vector < var_idx_t > CodeBlob : : create_var ( TypePtr var_type , SrcLocation loc , std : : string name ) {
2024-12-18 16:26:26 +00:00
std : : vector < var_idx_t > ir_idx ;
2025-01-27 07:29:17 +00:00
int stack_w = var_type - > calc_width_on_stack ( ) ;
ir_idx . reserve ( stack_w ) ;
2024-12-18 16:26:26 +00:00
if ( const TypeDataTensor * t_tensor = var_type - > try_as < TypeDataTensor > ( ) ) {
2025-01-27 07:29:17 +00:00
for ( int i = 0 ; i < t_tensor - > size ( ) ; + + i ) {
std : : string sub_name = name . empty ( ) ? name : name + " . " + std : : to_string ( i ) ;
std : : vector < var_idx_t > nested = create_var ( t_tensor - > items [ i ] , loc , std : : move ( sub_name ) ) ;
2024-12-18 16:26:26 +00:00
ir_idx . insert ( ir_idx . end ( ) , nested . begin ( ) , nested . end ( ) ) ;
}
} else if ( var_type ! = TypeDataVoid : : create ( ) ) {
2025-01-27 07:29:17 +00:00
# ifdef TOLK_DEBUG
tolk_assert ( stack_w = = 1 ) ;
# endif
vars . emplace_back ( var_cnt , var_type , std : : move ( name ) , loc ) ;
2024-12-18 16:26:26 +00:00
ir_idx . emplace_back ( var_cnt ) ;
var_cnt + + ;
}
2025-01-27 07:29:17 +00:00
tolk_assert ( static_cast < int > ( ir_idx . size ( ) ) = = stack_w ) ;
2024-12-18 16:26:26 +00:00
return ir_idx ;
2024-10-31 06:51:07 +00:00
}
} // namespace tolk