2024-10-31 07:03:33 +00:00
/*
This file is part of TON Blockchain Library .
TON Blockchain Library is free software : you can redistribute it and / or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation , either version 2 of the License , or
( at your option ) any later version .
TON Blockchain Library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library . If not , see < http : //www.gnu.org/licenses/>.
*/
# include "ast-from-tokens.h"
# include "ast.h"
# include "platform-utils.h"
# include "type-expr.h"
2024-10-31 07:11:41 +00:00
# include "tolk-version.h"
2024-10-31 07:03:33 +00:00
/*
* Here we construct AST for a tolk file .
* While constructing , no global state is modified .
* Historically , in FunC , there was no AST : while lexing , symbols were registered , types were inferred , and so on .
* There was no way to perform any more or less semantic analysis .
* Implementing AST gives a giant advance for future modifications and stability .
*/
namespace tolk {
// given a token, determine whether it's <, or >, or similar
static bool is_comparison_binary_op ( TokenType tok ) {
return tok = = tok_lt | | tok = = tok_gt | | tok = = tok_leq | | tok = = tok_geq | | tok = = tok_eq | | tok = = tok_neq | | tok = = tok_spaceship ;
}
// same as above, but to detect bitwise operators: & | ^
static bool is_bitwise_binary_op ( TokenType tok ) {
return tok = = tok_bitwise_and | | tok = = tok_bitwise_or | | tok = = tok_bitwise_xor ;
}
2024-10-31 07:11:41 +00:00
// same as above, but to detect logical operators: && ||
static bool is_logical_binary_op ( TokenType tok ) {
return tok = = tok_logical_and | | tok = = tok_logical_or ;
}
2024-10-31 07:03:33 +00:00
// same as above, but to detect addition/subtraction
static bool is_add_or_sub_binary_op ( TokenType tok ) {
return tok = = tok_plus | | tok = = tok_minus ;
}
// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected)
// it would better be a warning, but we decided to make it a strict error
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_lower_precedence ( SrcLocation loc , std : : string_view op_lower , std : : string_view op_higher ) {
std : : string name_lower = static_cast < std : : string > ( op_lower ) ;
std : : string name_higher = static_cast < std : : string > ( op_higher ) ;
throw ParseError ( loc , name_lower + " has lower precedence than " + name_higher +
" , probably this code won't work as you expected. "
" Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error. " ) ;
}
// fire an error for a case "arg1 & arg2 | arg3"
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
2024-10-31 07:11:41 +00:00
static void fire_error_mix_and_or_no_parenthesis ( SrcLocation loc , std : : string_view op1 , std : : string_view op2 ) {
2024-10-31 07:03:33 +00:00
std : : string name1 = static_cast < std : : string > ( op1 ) ;
std : : string name2 = static_cast < std : : string > ( op2 ) ;
2024-10-31 07:11:41 +00:00
throw ParseError ( loc , " mixing " + name1 + " with " + name2 + " without parenthesis may lead to accidental errors. "
2024-10-31 07:03:33 +00:00
" Use parenthesis to emphasize operator precedence. " ) ;
}
// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence
// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected
// the only way to suppress this error for the programmer is to use parenthesis
// (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator},
// that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis)
static void diagnose_bitwise_precedence ( SrcLocation loc , std : : string_view operator_name , AnyV lhs , AnyV rhs ) {
// handle "flags & 0xFF != 0" (rhs = "0xFF != 0")
if ( rhs - > type = = ast_binary_operator & & is_comparison_binary_op ( rhs - > as < ast_binary_operator > ( ) - > tok ) ) {
fire_error_lower_precedence ( loc , operator_name , rhs - > as < ast_binary_operator > ( ) - > operator_name ) ;
}
// handle "0 != flags & 0xFF" (lhs = "0 != flags")
if ( lhs - > type = = ast_binary_operator & & is_comparison_binary_op ( lhs - > as < ast_binary_operator > ( ) - > tok ) ) {
fire_error_lower_precedence ( loc , operator_name , lhs - > as < ast_binary_operator > ( ) - > operator_name ) ;
}
2024-10-31 07:11:41 +00:00
}
// similar to above, but detect potentially invalid usage of && and ||
// since anyway, using parenthesis when both && and || occur in the same expression,
// && and || have equal operator precedence in Tolk
static void diagnose_and_or_precedence ( SrcLocation loc , AnyV lhs , TokenType rhs_tok , std : : string_view rhs_operator_name ) {
if ( auto lhs_op = lhs - > try_as < ast_binary_operator > ( ) ) {
// handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2")
if ( is_bitwise_binary_op ( lhs_op - > tok ) & & is_bitwise_binary_op ( rhs_tok ) & & lhs_op - > tok ! = rhs_tok ) {
fire_error_mix_and_or_no_parenthesis ( loc , lhs_op - > operator_name , rhs_operator_name ) ;
}
2024-10-31 07:03:33 +00:00
2024-10-31 07:11:41 +00:00
// handle "arg1 && arg2 || arg3" (lhs = "arg1 && arg2")
if ( is_logical_binary_op ( lhs_op - > tok ) & & is_logical_binary_op ( rhs_tok ) & & lhs_op - > tok ! = rhs_tok ) {
fire_error_mix_and_or_no_parenthesis ( loc , lhs_op - > operator_name , rhs_operator_name ) ;
}
2024-10-31 07:03:33 +00:00
}
}
// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected)
static void diagnose_addition_in_bitshift ( SrcLocation loc , std : : string_view bitshift_operator_name , AnyV rhs ) {
if ( rhs - > type = = ast_binary_operator & & is_add_or_sub_binary_op ( rhs - > as < ast_binary_operator > ( ) - > tok ) ) {
fire_error_lower_precedence ( loc , bitshift_operator_name , rhs - > as < ast_binary_operator > ( ) - > operator_name ) ;
}
}
2024-10-31 07:11:41 +00:00
// fire an error for FunC-style variable declaration, like "int i"
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
static void fire_error_FunC_style_var_declaration ( Lexer & lex ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string type_str = static_cast < std : : string > ( lex . cur_str ( ) ) ; // int / slice / etc.
lex . next ( ) ;
std : : string var_name = lex . tok ( ) = = tok_identifier ? static_cast < std : : string > ( lex . cur_str ( ) ) : " name " ;
throw ParseError ( loc , " can't parse; probably, you use FunC-like declarations; valid syntax is `var " + var_name + " : " + type_str + " = ...` " ) ;
}
// replace (a == null) and similar to isNull(a) (call of a built-in function)
static AnyV maybe_replace_eq_null_with_isNull_call ( V < ast_binary_operator > v ) {
bool has_null = v - > get_lhs ( ) - > type = = ast_null_keyword | | v - > get_rhs ( ) - > type = = ast_null_keyword ;
bool replace = has_null & & ( v - > tok = = tok_eq | | v - > tok = = tok_neq ) ;
if ( ! replace ) {
return v ;
}
auto v_ident = createV < ast_identifier > ( v - > loc , " __isNull " ) ; // built-in function
AnyV v_null = v - > get_lhs ( ) - > type = = ast_null_keyword ? v - > get_rhs ( ) : v - > get_lhs ( ) ;
AnyV v_isNull = createV < ast_function_call > ( v - > loc , v_ident , createV < ast_tensor > ( v - > loc , { v_null } ) ) ;
if ( v - > tok = = tok_neq ) {
v_isNull = createV < ast_unary_operator > ( v - > loc , " ! " , tok_logical_not , v_isNull ) ;
}
return v_isNull ;
}
2024-10-31 07:03:33 +00:00
/*
*
* PARSE SOURCE
*
*/
// TE ::= TA | TA -> TE
// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ]
2024-10-31 07:11:41 +00:00
static TypeExpr * parse_type ( Lexer & lex , V < ast_genericsT_list > genericsT_list ) ;
2024-10-31 07:03:33 +00:00
2024-10-31 07:11:41 +00:00
static TypeExpr * parse_type1 ( Lexer & lex , V < ast_genericsT_list > genericsT_list ) {
2024-10-31 07:03:33 +00:00
switch ( lex . tok ( ) ) {
case tok_int :
lex . next ( ) ;
return TypeExpr : : new_atomic ( TypeExpr : : _Int ) ;
case tok_cell :
lex . next ( ) ;
return TypeExpr : : new_atomic ( TypeExpr : : _Cell ) ;
case tok_slice :
lex . next ( ) ;
return TypeExpr : : new_atomic ( TypeExpr : : _Slice ) ;
case tok_builder :
lex . next ( ) ;
return TypeExpr : : new_atomic ( TypeExpr : : _Builder ) ;
2024-10-31 07:11:41 +00:00
case tok_continuation :
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
return TypeExpr : : new_atomic ( TypeExpr : : _Cont ) ;
case tok_tuple :
lex . next ( ) ;
return TypeExpr : : new_atomic ( TypeExpr : : _Tuple ) ;
2024-10-31 07:11:41 +00:00
case tok_auto :
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
return TypeExpr : : new_hole ( ) ;
2024-10-31 07:11:41 +00:00
case tok_void :
lex . next ( ) ;
return TypeExpr : : new_tensor ( { } ) ;
case tok_bool :
lex . error ( " bool type is not supported yet " ) ;
case tok_identifier :
if ( int idx = genericsT_list ? genericsT_list - > lookup_idx ( lex . cur_str ( ) ) : - 1 ; idx ! = - 1 ) {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
return genericsT_list - > get_item ( idx ) - > created_type ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
break ;
case tok_oppar : {
lex . next ( ) ;
if ( lex . tok ( ) = = tok_clpar ) {
lex . next ( ) ;
return TypeExpr : : new_unit ( ) ;
}
std : : vector < TypeExpr * > sub { 1 , parse_type ( lex , genericsT_list ) } ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
sub . push_back ( parse_type ( lex , genericsT_list ) ) ;
}
lex . expect ( tok_clpar , " `)` " ) ;
return TypeExpr : : new_tensor ( std : : move ( sub ) ) ;
}
case tok_opbracket : {
lex . next ( ) ;
if ( lex . tok ( ) = = tok_clbracket ) {
lex . next ( ) ;
return TypeExpr : : new_tuple ( { } ) ;
}
std : : vector < TypeExpr * > sub { 1 , parse_type ( lex , genericsT_list ) } ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
sub . push_back ( parse_type ( lex , genericsT_list ) ) ;
}
lex . expect ( tok_clbracket , " `]` " ) ;
return TypeExpr : : new_tuple ( std : : move ( sub ) ) ;
2024-10-31 07:03:33 +00:00
}
default :
break ;
}
2024-10-31 07:11:41 +00:00
lex . unexpected ( " <type> " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static TypeExpr * parse_type ( Lexer & lex , V < ast_genericsT_list > genericsT_list ) {
TypeExpr * res = parse_type1 ( lex , genericsT_list ) ;
if ( lex . tok ( ) = = tok_arrow ) {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
TypeExpr * to = parse_type ( lex , genericsT_list ) ;
2024-10-31 07:03:33 +00:00
return TypeExpr : : new_map ( res , to ) ;
}
return res ;
}
2024-10-31 07:11:41 +00:00
AnyV parse_expr ( Lexer & lex ) ;
static AnyV parse_parameter ( Lexer & lex , V < ast_genericsT_list > genericsT_list ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
// argument name (or underscore for an unnamed parameter)
std : : string_view param_name ;
if ( lex . tok ( ) = = tok_identifier ) {
param_name = lex . cur_str ( ) ;
} else if ( lex . tok ( ) ! = tok_underscore ) {
lex . unexpected ( " parameter name " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
auto v_ident = createV < ast_identifier > ( lex . cur_location ( ) , param_name ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
// parameter type after colon, also mandatory (even explicit ":auto")
lex . expect ( tok_colon , " `: <parameter_type>` " ) ;
TypeExpr * param_type = parse_type ( lex , genericsT_list ) ;
return createV < ast_parameter > ( loc , v_ident , param_type ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_global_var_declaration ( Lexer & lex , const std : : vector < V < ast_annotation > > & annotations ) {
if ( ! annotations . empty ( ) ) {
lex . error ( " @annotations are not applicable to global var declaration " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_global , " `global` " ) ;
2024-10-31 07:03:33 +00:00
lex . check ( tok_identifier , " global variable name " ) ;
2024-10-31 07:04:58 +00:00
auto v_ident = createV < ast_identifier > ( lex . cur_location ( ) , lex . cur_str ( ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_colon , " `:` " ) ;
TypeExpr * declared_type = parse_type ( lex , nullptr ) ;
if ( lex . tok ( ) = = tok_comma ) {
lex . error ( " multiple declarations are not allowed, split globals on separate lines " ) ;
}
if ( lex . tok ( ) = = tok_assign ) {
lex . error ( " assigning to a global is not allowed at declaration " ) ;
}
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:04:58 +00:00
return createV < ast_global_var_declaration > ( loc , v_ident , declared_type ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_constant_declaration ( Lexer & lex , const std : : vector < V < ast_annotation > > & annotations ) {
if ( ! annotations . empty ( ) ) {
lex . error ( " @annotations are not applicable to global var declaration " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_const , " `const` " ) ;
2024-10-31 07:03:33 +00:00
lex . check ( tok_identifier , " constant name " ) ;
2024-10-31 07:04:58 +00:00
auto v_ident = createV < ast_identifier > ( lex . cur_location ( ) , lex . cur_str ( ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
TypeExpr * declared_type = nullptr ;
if ( lex . tok ( ) = = tok_colon ) {
lex . next ( ) ;
if ( lex . tok ( ) = = tok_int ) {
declared_type = TypeExpr : : new_atomic ( TypeExpr : : _Int ) ;
lex . next ( ) ;
} else if ( lex . tok ( ) = = tok_slice ) {
declared_type = TypeExpr : : new_atomic ( TypeExpr : : _Slice ) ;
lex . next ( ) ;
} else {
lex . error ( " a constant can be int or slice only " ) ;
}
}
lex . expect ( tok_assign , " `=` " ) ;
2024-10-31 07:03:33 +00:00
AnyV init_value = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_comma ) {
lex . error ( " multiple declarations are not allowed, split constants on separate lines " ) ;
}
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:04:58 +00:00
return createV < ast_constant_declaration > ( loc , v_ident , declared_type , init_value ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_parameter_list ( Lexer & lex , V < ast_genericsT_list > genericsT_list ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
std : : vector < AnyV > params ;
lex . expect ( tok_oppar , " parameter list " ) ;
2024-10-31 07:03:33 +00:00
if ( lex . tok ( ) ! = tok_clpar ) {
2024-10-31 07:11:41 +00:00
params . push_back ( parse_parameter ( lex , genericsT_list ) ) ;
2024-10-31 07:03:33 +00:00
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
params . push_back ( parse_parameter ( lex , genericsT_list ) ) ;
2024-10-31 07:03:33 +00:00
}
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
return createV < ast_parameter_list > ( loc , std : : move ( params ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
// parse (expr) / [expr] / identifier / number
2024-10-31 07:04:58 +00:00
static AnyV parse_expr100 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
switch ( lex . tok ( ) ) {
case tok_oppar : {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_clpar ) {
lex . next ( ) ;
return createV < ast_tensor > ( loc , { } ) ;
}
AnyV first = parse_expr ( lex ) ;
if ( lex . tok ( ) = = tok_clpar ) {
lex . next ( ) ;
return createV < ast_parenthesized_expr > ( loc , first ) ;
}
std : : vector < AnyV > items ( 1 , first ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
items . emplace_back ( parse_expr ( lex ) ) ;
}
lex . expect ( tok_clpar , " `)` " ) ;
return createV < ast_tensor > ( loc , std : : move ( items ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_opbracket : {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_clbracket ) {
lex . next ( ) ;
return createV < ast_tensor_square > ( loc , { } ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
std : : vector < AnyV > items ( 1 , parse_expr ( lex ) ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
items . emplace_back ( parse_expr ( lex ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clbracket , " `]` " ) ;
return createV < ast_tensor_square > ( loc , std : : move ( items ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_int_const : {
std : : string_view int_val = lex . cur_str ( ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
return createV < ast_int_const > ( loc , int_val ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_string_const : {
std : : string_view str_val = lex . cur_str ( ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
char modifier = 0 ;
if ( lex . tok ( ) = = tok_string_modifier ) {
modifier = lex . cur_str ( ) [ 0 ] ;
lex . next ( ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
return createV < ast_string_const > ( loc , str_val , modifier ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_underscore : {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
return createV < ast_underscore > ( loc ) ;
}
case tok_true : {
lex . next ( ) ;
return createV < ast_bool_const > ( loc , true ) ;
}
case tok_false : {
lex . next ( ) ;
return createV < ast_bool_const > ( loc , false ) ;
}
case tok_null : {
lex . next ( ) ;
return createV < ast_null_keyword > ( loc ) ;
}
case tok_identifier : {
std : : string_view str_val = lex . cur_str ( ) ;
lex . next ( ) ;
return createV < ast_identifier > ( loc , str_val ) ;
}
default : {
// show a proper error for `int i` (FunC-style declarations)
TokenType t = lex . tok ( ) ;
if ( t = = tok_int | | t = = tok_cell | | t = = tok_slice | | t = = tok_builder | | t = = tok_tuple ) {
fire_error_FunC_style_var_declaration ( lex ) ;
}
lex . unexpected ( " <expression> " ) ;
2024-10-31 07:03:33 +00:00
}
}
}
2024-10-31 07:11:41 +00:00
// parse E(expr)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr90 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
AnyV res = parse_expr100 ( lex ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_oppar ) {
lex . next ( ) ;
SrcLocation loc = lex . cur_location ( ) ;
std : : vector < AnyV > args ;
if ( lex . tok ( ) ! = tok_clpar ) {
args . push_back ( parse_expr ( lex ) ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
args . push_back ( parse_expr ( lex ) ) ;
}
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
return createV < ast_function_call > ( res - > loc , res , createV < ast_tensor > ( loc , std : : move ( args ) ) ) ;
2024-10-31 07:03:33 +00:00
}
return res ;
}
2024-10-31 07:11:41 +00:00
// parse E .method ~method E (left-to-right)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr80 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
AnyV lhs = parse_expr90 ( lex ) ;
while ( lex . tok ( ) = = tok_identifier & & ( lex . cur_str ( ) [ 0 ] = = ' . ' | | lex . cur_str ( ) [ 0 ] = = ' ~ ' ) ) {
std : : string_view method_name = lex . cur_str ( ) ;
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
SrcLocation loc = lex . cur_location ( ) ;
std : : vector < AnyV > args ;
lex . expect ( tok_oppar , " `(` " ) ;
if ( lex . tok ( ) ! = tok_clpar ) {
args . push_back ( parse_expr ( lex ) ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
args . push_back ( parse_expr ( lex ) ) ;
}
}
lex . expect ( tok_clpar , " `)` " ) ;
lhs = createV < ast_dot_tilde_call > ( lhs - > loc , method_name , lhs , createV < ast_tensor > ( loc , std : : move ( args ) ) ) ;
2024-10-31 07:03:33 +00:00
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse ! ~ - + E (unary)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr75 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
TokenType t = lex . tok ( ) ;
2024-10-31 07:11:41 +00:00
if ( t = = tok_logical_not | | t = = tok_bitwise_not | | t = = tok_minus | | t = = tok_plus ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
AnyV rhs = parse_expr75 ( lex ) ;
return createV < ast_unary_operator > ( loc , operator_name , t , rhs ) ;
}
2024-10-31 07:11:41 +00:00
return parse_expr80 ( lex ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
// parse E * / % ^/ ~/ E (left-to-right)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr30 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
AnyV lhs = parse_expr75 ( lex ) ;
TokenType t = lex . tok ( ) ;
2024-10-31 07:11:41 +00:00
while ( t = = tok_mul | | t = = tok_div | | t = = tok_mod | | t = = tok_divC | | t = = tok_divR ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
AnyV rhs = parse_expr75 ( lex ) ;
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E + - E (left-to-right)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr20 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
AnyV lhs = parse_expr30 ( lex ) ;
TokenType t = lex . tok ( ) ;
while ( t = = tok_minus | | t = = tok_plus ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
AnyV rhs = parse_expr30 ( lex ) ;
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E << >> ~>> ^>> E (left-to-right)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr17 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
AnyV lhs = parse_expr20 ( lex ) ;
TokenType t = lex . tok ( ) ;
while ( t = = tok_lshift | | t = = tok_rshift | | t = = tok_rshiftC | | t = = tok_rshiftR ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
AnyV rhs = parse_expr20 ( lex ) ;
diagnose_addition_in_bitshift ( loc , operator_name , rhs ) ;
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E == < > <= >= != <=> E (left-to-right)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr15 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
AnyV lhs = parse_expr17 ( lex ) ;
TokenType t = lex . tok ( ) ;
if ( t = = tok_eq | | t = = tok_lt | | t = = tok_gt | | t = = tok_leq | | t = = tok_geq | | t = = tok_neq | | t = = tok_spaceship ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
AnyV rhs = parse_expr17 ( lex ) ;
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
2024-10-31 07:11:41 +00:00
if ( t = = tok_eq | | t = = tok_neq ) {
lhs = maybe_replace_eq_null_with_isNull_call ( lhs - > as < ast_binary_operator > ( ) ) ;
}
2024-10-31 07:03:33 +00:00
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E & | ^ E (left-to-right)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr14 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
AnyV lhs = parse_expr15 ( lex ) ;
TokenType t = lex . tok ( ) ;
while ( t = = tok_bitwise_and | | t = = tok_bitwise_or | | t = = tok_bitwise_xor ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
AnyV rhs = parse_expr15 ( lex ) ;
diagnose_bitwise_precedence ( loc , operator_name , lhs , rhs ) ;
2024-10-31 07:11:41 +00:00
diagnose_and_or_precedence ( loc , lhs , t , operator_name ) ;
2024-10-31 07:03:33 +00:00
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
}
return lhs ;
}
2024-10-31 07:11:41 +00:00
// parse E && || E (left-to-right)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr13 ( Lexer & lex ) {
2024-10-31 07:11:41 +00:00
AnyV lhs = parse_expr14 ( lex ) ;
TokenType t = lex . tok ( ) ;
while ( t = = tok_logical_and | | t = = tok_logical_or ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
std : : string_view operator_name = lex . cur_str ( ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
AnyV rhs = parse_expr14 ( lex ) ;
diagnose_and_or_precedence ( loc , lhs , t , operator_name ) ;
lhs = createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
t = lex . tok ( ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
return lhs ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
// parse E = += -= E and E ? E : E (right-to-left)
2024-10-31 07:04:58 +00:00
static AnyV parse_expr10 ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
AnyV lhs = parse_expr13 ( lex ) ;
TokenType t = lex . tok ( ) ;
2024-10-31 07:11:41 +00:00
if ( t = = tok_set_plus | | t = = tok_set_minus | | t = = tok_set_mul | | t = = tok_set_div | |
t = = tok_set_mod | | t = = tok_set_lshift | | t = = tok_set_rshift | |
t = = tok_set_bitwise_and | | t = = tok_set_bitwise_or | | t = = tok_set_bitwise_xor | |
2024-10-31 07:03:33 +00:00
t = = tok_assign ) {
SrcLocation loc = lex . cur_location ( ) ;
std : : string_view operator_name = lex . cur_str ( ) ;
lex . next ( ) ;
AnyV rhs = parse_expr10 ( lex ) ;
return createV < ast_binary_operator > ( loc , operator_name , t , lhs , rhs ) ;
}
2024-10-31 07:11:41 +00:00
if ( t = = tok_question ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . next ( ) ;
AnyV when_true = parse_expr10 ( lex ) ;
lex . expect ( tok_colon , " `:` " ) ;
AnyV when_false = parse_expr10 ( lex ) ;
return createV < ast_ternary_operator > ( loc , lhs , when_true , when_false ) ;
}
2024-10-31 07:03:33 +00:00
return lhs ;
}
AnyV parse_expr ( Lexer & lex ) {
return parse_expr10 ( lex ) ;
}
2024-10-31 07:11:41 +00:00
AnyV parse_statement ( Lexer & lex ) ;
static AnyV parse_var_declaration_lhs ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_oppar ) {
lex . next ( ) ;
AnyV first = parse_var_declaration_lhs ( lex ) ;
if ( lex . tok ( ) = = tok_clpar ) {
lex . next ( ) ;
return createV < ast_parenthesized_expr > ( loc , first ) ;
}
std : : vector < AnyV > args ( 1 , first ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
args . push_back ( parse_var_declaration_lhs ( lex ) ) ;
}
lex . expect ( tok_clpar , " `)` " ) ;
return createV < ast_tensor > ( loc , std : : move ( args ) ) ;
}
if ( lex . tok ( ) = = tok_opbracket ) {
lex . next ( ) ;
std : : vector < AnyV > args ( 1 , parse_var_declaration_lhs ( lex ) ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
args . push_back ( parse_var_declaration_lhs ( lex ) ) ;
}
lex . expect ( tok_clbracket , " `]` " ) ;
return createV < ast_tensor_square > ( loc , std : : move ( args ) ) ;
}
if ( lex . tok ( ) = = tok_identifier ) {
auto v_ident = createV < ast_identifier > ( loc , lex . cur_str ( ) ) ;
TypeExpr * declared_type = nullptr ;
bool marked_as_redef = false ;
lex . next ( ) ;
if ( lex . tok ( ) = = tok_colon ) {
lex . next ( ) ;
declared_type = parse_type ( lex , nullptr ) ;
} else if ( lex . tok ( ) = = tok_redef ) {
lex . next ( ) ;
marked_as_redef = true ;
}
return createV < ast_local_var > ( loc , v_ident , declared_type , marked_as_redef ) ;
}
if ( lex . tok ( ) = = tok_underscore ) {
TypeExpr * declared_type = nullptr ;
lex . next ( ) ;
if ( lex . tok ( ) = = tok_colon ) {
lex . next ( ) ;
declared_type = parse_type ( lex , nullptr ) ;
}
return createV < ast_local_var > ( loc , createV < ast_underscore > ( loc ) , declared_type , false ) ;
}
lex . unexpected ( " variable name " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_local_vars_declaration ( Lexer & lex ) {
SrcLocation loc = lex . cur_location ( ) ;
bool immutable = lex . tok ( ) = = tok_val ;
lex . next ( ) ;
if ( immutable ) {
lex . error ( " immutable variables are not supported yet " ) ;
}
AnyV lhs = parse_var_declaration_lhs ( lex ) ;
if ( lex . tok ( ) ! = tok_assign ) {
lex . error ( " variables declaration must be followed by assignment: `var xxx = ...` " ) ;
}
lex . next ( ) ;
AnyV assigned_val = parse_expr ( lex ) ;
if ( lex . tok ( ) = = tok_comma ) {
lex . error ( " multiple declarations are not allowed, split variables on separate lines " ) ;
}
lex . expect ( tok_semicolon , " `;` " ) ;
return createV < ast_local_vars_declaration > ( loc , lhs , assigned_val ) ;
}
2024-10-31 07:03:33 +00:00
2024-10-31 07:04:58 +00:00
static V < ast_sequence > parse_sequence ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_opbrace , " `{` " ) ;
2024-10-31 07:03:33 +00:00
std : : vector < AnyV > items ;
while ( lex . tok ( ) ! = tok_clbrace ) {
items . push_back ( parse_statement ( lex ) ) ;
}
SrcLocation loc_end = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clbrace , " `}` " ) ;
2024-10-31 07:03:33 +00:00
return createV < ast_sequence > ( loc , loc_end , items ) ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_return_statement ( Lexer & lex ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_return , " `return` " ) ;
AnyV child = lex . tok ( ) = = tok_semicolon // `return;` actually means `return ();` (which is void)
? createV < ast_tensor > ( lex . cur_location ( ) , { } )
: parse_expr ( lex ) ;
lex . expect ( tok_semicolon , " `;` " ) ;
return createV < ast_return_statement > ( loc , child ) ;
}
static AnyV parse_if_statement ( Lexer & lex , bool is_ifnot ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_if , " `if` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
AnyV cond = parse_expr ( lex ) ;
lex . expect ( tok_clpar , " `)` " ) ;
// replace if(!expr) with ifnot(expr) (this should be done later, but for now, let this be right at parsing time)
if ( auto v_not = cond - > try_as < ast_unary_operator > ( ) ; v_not & & v_not - > tok = = tok_logical_not ) {
is_ifnot = ! is_ifnot ;
cond = v_not - > get_rhs ( ) ;
}
V < ast_sequence > if_body = parse_sequence ( lex ) ;
V < ast_sequence > else_body = nullptr ;
if ( lex . tok ( ) = = tok_else ) { // else if(e) { } or else { }
lex . next ( ) ;
if ( lex . tok ( ) = = tok_if ) {
AnyV v_inner_if = parse_if_statement ( lex , false ) ;
else_body = createV < ast_sequence > ( v_inner_if - > loc , lex . cur_location ( ) , { v_inner_if } ) ;
} else {
else_body = parse_sequence ( lex ) ;
}
} else { // no 'else', create empty block
else_body = createV < ast_sequence > ( lex . cur_location ( ) , lex . cur_location ( ) , { } ) ;
}
return createV < ast_if_statement > ( loc , is_ifnot , cond , if_body , else_body ) ;
}
2024-10-31 07:04:58 +00:00
static AnyV parse_repeat_statement ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_repeat , " `repeat` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
2024-10-31 07:03:33 +00:00
AnyV cond = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
2024-10-31 07:03:33 +00:00
V < ast_sequence > body = parse_sequence ( lex ) ;
return createV < ast_repeat_statement > ( loc , cond , body ) ;
}
2024-10-31 07:04:58 +00:00
static AnyV parse_while_statement ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_while , " `while` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
2024-10-31 07:03:33 +00:00
AnyV cond = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
2024-10-31 07:03:33 +00:00
V < ast_sequence > body = parse_sequence ( lex ) ;
return createV < ast_while_statement > ( loc , cond , body ) ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_do_while_statement ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_do , " `do` " ) ;
2024-10-31 07:03:33 +00:00
V < ast_sequence > body = parse_sequence ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_while , " `while` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
2024-10-31 07:03:33 +00:00
AnyV cond = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
lex . expect ( tok_semicolon , " `;` " ) ;
return createV < ast_do_while_statement > ( loc , body , cond ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_catch_variable ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_identifier ) {
std : : string_view var_name = lex . cur_str ( ) ;
lex . next ( ) ;
return createV < ast_identifier > ( loc , var_name ) ;
}
if ( lex . tok ( ) = = tok_underscore ) {
lex . next ( ) ;
return createV < ast_underscore > ( loc ) ;
}
lex . unexpected ( " identifier " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_throw_statement ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_throw , " `throw` " ) ;
AnyV thrown_code , thrown_arg ;
if ( lex . tok ( ) = = tok_oppar ) { // throw (code) or throw (code, arg)
lex . next ( ) ;
thrown_code = parse_expr ( lex ) ;
if ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
thrown_arg = parse_expr ( lex ) ;
} else {
thrown_arg = createV < ast_empty > ( loc ) ;
}
lex . expect ( tok_clpar , " `)` " ) ;
} else { // throw code
thrown_code = parse_expr ( lex ) ;
thrown_arg = createV < ast_empty > ( loc ) ;
}
lex . expect ( tok_semicolon , " `;` " ) ;
return createV < ast_throw_statement > ( loc , thrown_code , thrown_arg ) ;
}
static AnyV parse_assert_statement ( Lexer & lex ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_assert , " `assert` " ) ;
lex . expect ( tok_oppar , " `(` " ) ;
2024-10-31 07:03:33 +00:00
AnyV cond = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
AnyV thrown_code ;
if ( lex . tok ( ) = = tok_comma ) { // assert(cond, code)
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
thrown_code = parse_expr ( lex ) ;
lex . expect ( tok_clpar , " `)` " ) ;
} else { // assert(cond) throw code
lex . expect ( tok_clpar , " `)` " ) ;
lex . expect ( tok_throw , " `throw excNo` after assert " ) ;
thrown_code = parse_expr ( lex ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_semicolon , " `;` " ) ;
return createV < ast_assert_statement > ( loc , cond , thrown_code ) ;
}
static AnyV parse_try_catch_statement ( Lexer & lex ) {
SrcLocation loc = lex . cur_location ( ) ;
lex . expect ( tok_try , " `try` " ) ;
V < ast_sequence > try_body = parse_sequence ( lex ) ;
std : : vector < AnyV > catch_args ;
lex . expect ( tok_catch , " `catch` " ) ;
SrcLocation catch_loc = lex . cur_location ( ) ;
if ( lex . tok ( ) = = tok_oppar ) {
lex . next ( ) ;
catch_args . push_back ( parse_catch_variable ( lex ) ) ;
if ( lex . tok ( ) = = tok_comma ) { // catch (excNo, arg)
lex . next ( ) ;
catch_args . push_back ( parse_catch_variable ( lex ) ) ;
} else { // catch (excNo) -> catch (excNo, _)
catch_args . push_back ( createV < ast_underscore > ( catch_loc ) ) ;
}
lex . expect ( tok_clpar , " `)` " ) ;
} else { // catch -> catch (_, _)
catch_args . push_back ( createV < ast_underscore > ( catch_loc ) ) ;
catch_args . push_back ( createV < ast_underscore > ( catch_loc ) ) ;
}
V < ast_tensor > catch_expr = createV < ast_tensor > ( catch_loc , std : : move ( catch_args ) ) ;
V < ast_sequence > catch_body = parse_sequence ( lex ) ;
return createV < ast_try_catch_statement > ( loc , try_body , catch_expr , catch_body ) ;
2024-10-31 07:03:33 +00:00
}
AnyV parse_statement ( Lexer & lex ) {
switch ( lex . tok ( ) ) {
2024-10-31 07:11:41 +00:00
case tok_var :
case tok_val :
return parse_local_vars_declaration ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_opbrace :
return parse_sequence ( lex ) ;
2024-10-31 07:11:41 +00:00
case tok_return :
return parse_return_statement ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_if :
return parse_if_statement ( lex , false ) ;
2024-10-31 07:11:41 +00:00
case tok_repeat :
return parse_repeat_statement ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_do :
2024-10-31 07:11:41 +00:00
return parse_do_while_statement ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_while :
return parse_while_statement ( lex ) ;
2024-10-31 07:11:41 +00:00
case tok_throw :
return parse_throw_statement ( lex ) ;
case tok_assert :
return parse_assert_statement ( lex ) ;
2024-10-31 07:03:33 +00:00
case tok_try :
return parse_try_catch_statement ( lex ) ;
case tok_semicolon : {
2024-10-31 07:04:58 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:04:58 +00:00
return createV < ast_empty > ( loc ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
case tok_break :
case tok_continue :
lex . error ( " break/continue from loops are not supported yet " ) ;
2024-10-31 07:03:33 +00:00
default : {
AnyV expr = parse_expr ( lex ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:03:33 +00:00
return expr ;
}
}
}
2024-10-31 07:04:58 +00:00
static AnyV parse_func_body ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
return parse_sequence ( lex ) ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_asm_func_body ( Lexer & lex , V < ast_parameter_list > param_list ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_asm , " `asm` " ) ;
size_t n_params = param_list - > size ( ) ;
if ( n_params > 16 ) {
2024-10-31 07:03:33 +00:00
throw ParseError { loc , " assembler built-in function can have at most 16 arguments " } ;
}
std : : vector < int > arg_order , ret_order ;
if ( lex . tok ( ) = = tok_oppar ) {
lex . next ( ) ;
while ( lex . tok ( ) = = tok_identifier | | lex . tok ( ) = = tok_int_const ) {
2024-10-31 07:11:41 +00:00
int arg_idx = param_list - > lookup_idx ( lex . cur_str ( ) ) ;
2024-10-31 07:03:33 +00:00
if ( arg_idx = = - 1 ) {
2024-10-31 07:11:41 +00:00
lex . unexpected ( " argument name " ) ;
2024-10-31 07:03:33 +00:00
}
arg_order . push_back ( arg_idx ) ;
lex . next ( ) ;
}
2024-10-31 07:11:41 +00:00
if ( lex . tok ( ) = = tok_arrow ) {
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
while ( lex . tok ( ) = = tok_int_const ) {
int ret_idx = std : : atoi ( static_cast < std : : string > ( lex . cur_str ( ) ) . c_str ( ) ) ;
ret_order . push_back ( ret_idx ) ;
lex . next ( ) ;
}
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_clpar , " `)` " ) ;
2024-10-31 07:03:33 +00:00
}
std : : vector < AnyV > asm_commands ;
lex . check ( tok_string_const , " \" ASM COMMAND \" " ) ;
while ( lex . tok ( ) = = tok_string_const ) {
std : : string_view asm_command = lex . cur_str ( ) ;
asm_commands . push_back ( createV < ast_string_const > ( lex . cur_location ( ) , asm_command , 0 ) ) ;
lex . next ( ) ;
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:03:33 +00:00
return createV < ast_asm_body > ( loc , std : : move ( arg_order ) , std : : move ( ret_order ) , std : : move ( asm_commands ) ) ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_genericsT_list ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
std : : vector < AnyV > genericsT_items ;
lex . expect ( tok_lt , " `<` " ) ;
2024-10-31 07:03:33 +00:00
int idx = 0 ;
while ( true ) {
2024-10-31 07:11:41 +00:00
lex . check ( tok_identifier , " T " ) ;
2024-10-31 07:03:33 +00:00
std : : string_view nameT = lex . cur_str ( ) ;
TypeExpr * type = TypeExpr : : new_var ( idx + + ) ;
2024-10-31 07:11:41 +00:00
genericsT_items . emplace_back ( createV < ast_genericsT_item > ( lex . cur_location ( ) , type , nameT ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
if ( lex . tok ( ) ! = tok_comma ) {
break ;
}
lex . next ( ) ;
}
2024-10-31 07:11:41 +00:00
lex . expect ( tok_gt , " `>` " ) ;
return createV < ast_genericsT_list > { loc , std : : move ( genericsT_items ) } ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static V < ast_annotation > parse_annotation ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . check ( tok_annotation_at , " `@` " ) ;
std : : string_view name = lex . cur_str ( ) ;
AnnotationKind kind = Vertex < ast_annotation > : : parse_kind ( name ) ;
lex . next ( ) ;
V < ast_tensor > v_arg = nullptr ;
if ( lex . tok ( ) = = tok_oppar ) {
SrcLocation loc_args = lex . cur_location ( ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
std : : vector < AnyV > args ;
args . push_back ( parse_expr ( lex ) ) ;
while ( lex . tok ( ) = = tok_comma ) {
lex . next ( ) ;
args . push_back ( parse_expr ( lex ) ) ;
}
lex . expect ( tok_clpar , " `)` " ) ;
v_arg = createV < ast_tensor > ( loc_args , std : : move ( args ) ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
switch ( kind ) {
case AnnotationKind : : unknown :
throw ParseError ( loc , " unknown annotation " + static_cast < std : : string > ( name ) ) ;
case AnnotationKind : : inline_simple :
case AnnotationKind : : inline_ref :
case AnnotationKind : : pure :
case AnnotationKind : : deprecated :
if ( v_arg ) {
throw ParseError ( v_arg - > loc , " arguments aren't allowed for " + static_cast < std : : string > ( name ) ) ;
}
v_arg = createV < ast_tensor > ( loc , { } ) ;
break ;
case AnnotationKind : : method_id :
if ( ! v_arg | | v_arg - > size ( ) ! = 1 | | v_arg - > get_item ( 0 ) - > type ! = ast_int_const ) {
throw ParseError ( loc , " expecting `(number) ` after " + static_cast<std::string>(name)) ;
}
break ;
}
return createV < ast_annotation > ( loc , kind , v_arg ) ;
}
static AnyV parse_function_declaration ( Lexer & lex , const std : : vector < V < ast_annotation > > & annotations ) {
SrcLocation loc = lex . cur_location ( ) ;
bool is_get_method = lex . tok ( ) = = tok_get ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
if ( is_get_method & & lex . tok ( ) = = tok_fun ) {
lex . next ( ) ; // 'get f()' and 'get fun f()' both correct
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
lex . check ( tok_identifier , " function name identifier " ) ;
std : : string_view f_name = lex . cur_str ( ) ;
bool is_entrypoint =
f_name = = " main " | | f_name = = " onInternalMessage " | | f_name = = " onExternalMessage " | |
f_name = = " onRunTickTock " | | f_name = = " onSplitPrepare " | | f_name = = " onSplitInstall " ;
bool is_FunC_entrypoint =
f_name = = " recv_internal " | | f_name = = " recv_external " | |
f_name = = " run_ticktock " | | f_name = = " split_prepare " | | f_name = = " split_install " ;
if ( is_FunC_entrypoint ) {
lex . error ( " this is a reserved FunC/Fift identifier; you need `onInternalMessage` " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
auto v_ident = createV < ast_identifier > ( lex . cur_location ( ) , f_name ) ;
lex . next ( ) ;
V < ast_genericsT_list > genericsT_list = nullptr ;
if ( lex . tok ( ) = = tok_lt ) { // 'fun f<T1,T2>'
genericsT_list = parse_genericsT_list ( lex ) - > as < ast_genericsT_list > ( ) ;
}
V < ast_parameter_list > param_list = parse_parameter_list ( lex , genericsT_list ) - > as < ast_parameter_list > ( ) ;
TypeExpr * ret_type = nullptr ;
if ( lex . tok ( ) = = tok_colon ) { // : <ret_type> (if absent, it means "auto infer", not void)
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
ret_type = parse_type ( lex , genericsT_list ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
if ( is_entrypoint & & ( is_get_method | | genericsT_list | | ! annotations . empty ( ) ) ) {
throw ParseError ( loc , " invalid declaration of a reserved function " ) ;
}
AnyV v_body = nullptr ;
2024-10-31 07:03:33 +00:00
if ( lex . tok ( ) = = tok_builtin ) {
2024-10-31 07:11:41 +00:00
v_body = createV < ast_empty > ( lex . cur_location ( ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_semicolon , " `;` " ) ;
2024-10-31 07:03:33 +00:00
} else if ( lex . tok ( ) = = tok_opbrace ) {
2024-10-31 07:11:41 +00:00
v_body = parse_func_body ( lex ) ;
2024-10-31 07:03:33 +00:00
} else if ( lex . tok ( ) = = tok_asm ) {
2024-10-31 07:11:41 +00:00
if ( ! ret_type ) {
lex . error ( " asm function must specify return type " ) ;
}
v_body = parse_asm_func_body ( lex , param_list ) ;
2024-10-31 07:03:33 +00:00
} else {
2024-10-31 07:11:41 +00:00
lex . unexpected ( " { function body } " ) ;
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
auto f_declaration = createV < ast_function_declaration > ( loc , v_ident , param_list , v_body ) ;
f_declaration - > ret_type = ret_type ? ret_type : TypeExpr : : new_hole ( ) ;
f_declaration - > is_entrypoint = is_entrypoint ;
f_declaration - > genericsT_list = genericsT_list ;
2024-10-31 07:03:33 +00:00
f_declaration - > marked_as_get_method = is_get_method ;
2024-10-31 07:11:41 +00:00
f_declaration - > marked_as_builtin = v_body - > type = = ast_empty ;
for ( auto v_annotation : annotations ) {
switch ( v_annotation - > kind ) {
case AnnotationKind : : inline_simple :
f_declaration - > marked_as_inline = true ;
break ;
case AnnotationKind : : inline_ref :
f_declaration - > marked_as_inline_ref = true ;
break ;
case AnnotationKind : : pure :
f_declaration - > marked_as_pure = true ;
break ;
case AnnotationKind : : method_id :
if ( is_get_method | | genericsT_list | | is_entrypoint ) {
v_annotation - > error ( " @method_id can be specified only for regular functions " ) ;
}
f_declaration - > method_id = v_annotation - > get_arg ( ) - > get_item ( 0 ) - > as < ast_int_const > ( ) ;
break ;
case AnnotationKind : : deprecated :
// no special handling
break ;
default :
v_annotation - > error ( " this annotation is not applicable to functions " ) ;
}
}
2024-10-31 07:03:33 +00:00
return f_declaration ;
}
2024-10-31 07:11:41 +00:00
static AnyV parse_tolk_required_version ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . next_special ( tok_semver , " semver " ) ; // syntax: "tolk 0.6"
std : : string semver = static_cast < std : : string > ( lex . cur_str ( ) ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
// for simplicity, there is no syntax ">= version" and so on, just strict compare
if ( TOLK_VERSION ! = semver & & TOLK_VERSION ! = semver + " .0 " ) { // 0.6 = 0.6.0
loc . show_warning ( " the contract is written in Tolk v " + semver + " , but you use Tolk compiler v " + TOLK_VERSION + " ; probably, it will lead to compilation errors or hash changes " ) ;
}
return createV < ast_tolk_required_version > ( loc , tok_eq , semver ) ; // semicolon is not necessary
2024-10-31 07:03:33 +00:00
}
2024-10-31 07:11:41 +00:00
static AnyV parse_import_statement ( Lexer & lex ) {
2024-10-31 07:03:33 +00:00
SrcLocation loc = lex . cur_location ( ) ;
2024-10-31 07:11:41 +00:00
lex . expect ( tok_import , " `import` " ) ;
2024-10-31 07:03:33 +00:00
lex . check ( tok_string_const , " source file name " ) ;
std : : string_view rel_filename = lex . cur_str ( ) ;
if ( rel_filename . empty ( ) ) {
lex . error ( " imported file name is an empty string " ) ;
}
2024-10-31 07:04:58 +00:00
auto v_str = createV < ast_string_const > ( lex . cur_location ( ) , rel_filename , 0 ) ;
2024-10-31 07:03:33 +00:00
lex . next ( ) ;
2024-10-31 07:11:41 +00:00
return createV < ast_import_statement > ( loc , v_str ) ; // semicolon is not necessary
2024-10-31 07:03:33 +00:00
}
// the main (exported) function
2024-10-31 07:04:58 +00:00
AnyV parse_src_file_to_ast ( const SrcFile * file ) {
2024-10-31 07:03:33 +00:00
std : : vector < AnyV > toplevel_declarations ;
2024-10-31 07:11:41 +00:00
std : : vector < V < ast_annotation > > annotations ;
2024-10-31 07:03:33 +00:00
Lexer lex ( file ) ;
2024-10-31 07:11:41 +00:00
2024-10-31 07:03:33 +00:00
while ( ! lex . is_eof ( ) ) {
2024-10-31 07:11:41 +00:00
switch ( lex . tok ( ) ) {
case tok_tolk :
if ( ! annotations . empty ( ) ) {
lex . unexpected ( " declaration after @annotations " ) ;
}
toplevel_declarations . push_back ( parse_tolk_required_version ( lex ) ) ;
break ;
case tok_import :
if ( ! annotations . empty ( ) ) {
lex . unexpected ( " declaration after @annotations " ) ;
}
toplevel_declarations . push_back ( parse_import_statement ( lex ) ) ;
break ;
case tok_semicolon :
if ( ! annotations . empty ( ) ) {
lex . unexpected ( " declaration after @annotations " ) ;
}
lex . next ( ) ; // don't add ast_empty, no need
break ;
case tok_annotation_at :
annotations . push_back ( parse_annotation ( lex ) ) ;
break ;
case tok_global :
toplevel_declarations . push_back ( parse_global_var_declaration ( lex , annotations ) ) ;
annotations . clear ( ) ;
break ;
case tok_const :
toplevel_declarations . push_back ( parse_constant_declaration ( lex , annotations ) ) ;
annotations . clear ( ) ;
break ;
case tok_fun :
case tok_get :
toplevel_declarations . push_back ( parse_function_declaration ( lex , annotations ) ) ;
annotations . clear ( ) ;
break ;
case tok_export :
case tok_struct :
case tok_enum :
case tok_operator :
case tok_infix :
lex . error ( " ` " + static_cast < std : : string > ( lex . cur_str ( ) ) + " ` is not supported yet " ) ;
default :
lex . unexpected ( " fun or get " ) ;
2024-10-31 07:03:33 +00:00
}
}
2024-10-31 07:11:41 +00:00
2024-10-31 07:03:33 +00:00
return createV < ast_tolk_file > ( file , std : : move ( toplevel_declarations ) ) ;
}
} // namespace tolk