mirror of
https://github.com/ton-blockchain/ton
synced 2025-02-12 11:12:16 +00:00
799e2d1265
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner. This is a common approach for functional languages, where types are inferred from usage through unification. As a result, type declarations are not necessary: () f(a,b) { return a+b; } // a and b now int, since `+` (int, int) While this approach works for now, problems arise with the introduction of new types like bool, where `!x` must handle both int and bool. It will also become incompatible with int32 and other strict integers. This will clash with structure methods, struggle with proper generics, and become entirely impractical for union types. This PR completely rewrites the type system targeting the future. 1) type of any expression is inferred and never changed 2) this is available because dependent expressions already inferred 3) forall completely removed, generic functions introduced (they work like template functions actually, instantiated while inferring) 4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)` 5) `as` keyword, for example `t.tupleAt(0) as int` 6) methods binding is done along with type inferring, not before ("before", as worked previously, was always a wrong approach)
244 lines
5 KiB
C++
244 lines
5 KiB
C++
/*
|
|
This file is part of TON Blockchain Library.
|
|
|
|
TON Blockchain Library is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
TON Blockchain Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#pragma once
|
|
|
|
#include "platform-utils.h"
|
|
#include "src-file.h"
|
|
#include <string>
|
|
|
|
namespace tolk {
|
|
|
|
enum TokenType {
|
|
tok_empty,
|
|
|
|
tok_fun,
|
|
tok_get,
|
|
tok_type,
|
|
tok_enum,
|
|
tok_struct,
|
|
tok_operator,
|
|
tok_infix,
|
|
|
|
tok_global,
|
|
tok_const,
|
|
tok_var,
|
|
tok_val,
|
|
tok_redef,
|
|
tok_mutate,
|
|
tok_self,
|
|
|
|
tok_annotation_at,
|
|
tok_colon,
|
|
tok_asm,
|
|
tok_builtin,
|
|
|
|
tok_int_const,
|
|
tok_string_const,
|
|
tok_string_modifier,
|
|
tok_true,
|
|
tok_false,
|
|
tok_null,
|
|
|
|
tok_identifier,
|
|
tok_dot,
|
|
|
|
tok_plus,
|
|
tok_set_plus,
|
|
tok_minus,
|
|
tok_set_minus,
|
|
tok_mul,
|
|
tok_set_mul,
|
|
tok_div,
|
|
tok_set_div,
|
|
tok_mod,
|
|
tok_set_mod,
|
|
tok_lshift,
|
|
tok_set_lshift,
|
|
tok_rshift,
|
|
tok_set_rshift,
|
|
tok_rshiftR,
|
|
tok_rshiftC,
|
|
tok_bitwise_and,
|
|
tok_set_bitwise_and,
|
|
tok_bitwise_or,
|
|
tok_set_bitwise_or,
|
|
tok_bitwise_xor,
|
|
tok_set_bitwise_xor,
|
|
tok_bitwise_not,
|
|
|
|
tok_question,
|
|
tok_comma,
|
|
tok_semicolon,
|
|
tok_oppar,
|
|
tok_clpar,
|
|
tok_opbracket,
|
|
tok_clbracket,
|
|
tok_opbrace,
|
|
tok_clbrace,
|
|
tok_assign,
|
|
tok_underscore,
|
|
tok_lt,
|
|
tok_gt,
|
|
tok_logical_not,
|
|
tok_logical_and,
|
|
tok_logical_or,
|
|
|
|
tok_eq,
|
|
tok_neq,
|
|
tok_leq,
|
|
tok_geq,
|
|
tok_spaceship,
|
|
tok_divR,
|
|
tok_divC,
|
|
|
|
tok_return,
|
|
tok_repeat,
|
|
tok_do,
|
|
tok_while,
|
|
tok_break,
|
|
tok_continue,
|
|
tok_try,
|
|
tok_catch,
|
|
tok_throw,
|
|
tok_assert,
|
|
tok_if,
|
|
tok_else,
|
|
|
|
tok_int,
|
|
tok_cell,
|
|
tok_bool,
|
|
tok_slice,
|
|
tok_builder,
|
|
tok_continuation,
|
|
tok_tuple,
|
|
tok_void,
|
|
tok_arrow,
|
|
tok_as,
|
|
|
|
tok_tolk,
|
|
tok_semver,
|
|
tok_import,
|
|
tok_export,
|
|
|
|
tok_eof
|
|
};
|
|
|
|
// All tolk language is parsed into tokens.
|
|
// Lexer::next() returns a Token.
|
|
struct Token {
|
|
TokenType type = tok_empty;
|
|
std::string_view str_val;
|
|
|
|
Token() = default;
|
|
Token(TokenType type, std::string_view str_val): type(type), str_val(str_val) {}
|
|
};
|
|
|
|
// Lexer::next() is a method to be used externally (while parsing tolk file to AST).
|
|
// It's streaming: `next()` parses a token on demand.
|
|
// For comments, see lexer.cpp, a comment above Lexer constructor.
|
|
class Lexer {
|
|
Token tokens_circularbuf[8]{};
|
|
int last_token_idx = -1;
|
|
int cur_token_idx = -1;
|
|
Token cur_token; // = tokens_circularbuf[cur_token_idx & 7]
|
|
|
|
const SrcFile* file;
|
|
const char *p_start, *p_end, *p_next;
|
|
SrcLocation location;
|
|
|
|
void update_location() {
|
|
location.char_offset = static_cast<int>(p_next - p_start);
|
|
}
|
|
|
|
public:
|
|
|
|
struct SavedPositionForLookahead {
|
|
const char* p_next = nullptr;
|
|
int cur_token_idx = 0;
|
|
Token cur_token;
|
|
};
|
|
|
|
explicit Lexer(const SrcFile* file);
|
|
Lexer(const Lexer&) = delete;
|
|
Lexer &operator=(const Lexer&) = delete;
|
|
|
|
void add_token(TokenType type, std::string_view str) {
|
|
tokens_circularbuf[++last_token_idx & 7] = Token(type, str);
|
|
}
|
|
|
|
void skip_spaces() {
|
|
while (std::isspace(*p_next)) {
|
|
++p_next;
|
|
}
|
|
}
|
|
|
|
void skip_line() {
|
|
while (p_next < p_end && *p_next != '\n' && *p_next != '\r') {
|
|
++p_next;
|
|
}
|
|
while (*p_next == '\n' || *p_next == '\r') {
|
|
++p_next;
|
|
}
|
|
}
|
|
|
|
void skip_chars(int n) {
|
|
p_next += n;
|
|
}
|
|
|
|
bool is_eof() const {
|
|
return p_next >= p_end;
|
|
}
|
|
|
|
char char_at() const { return *p_next; }
|
|
char char_at(int shift) const { return *(p_next + shift); }
|
|
const char* c_str() const { return p_next; }
|
|
|
|
TokenType tok() const { return cur_token.type; }
|
|
std::string_view cur_str() const { return cur_token.str_val; }
|
|
SrcLocation cur_location() const { return location; }
|
|
const SrcFile* cur_file() const { return file; }
|
|
|
|
void next();
|
|
void next_special(TokenType parse_next_as, const char* str_expected);
|
|
|
|
SavedPositionForLookahead save_parsing_position() const;
|
|
void restore_position(SavedPositionForLookahead saved);
|
|
|
|
void check(TokenType next_tok, const char* str_expected) const {
|
|
if (cur_token.type != next_tok) {
|
|
unexpected(str_expected); // unlikely path, not inlined
|
|
}
|
|
}
|
|
void expect(TokenType next_tok, const char* str_expected) {
|
|
if (cur_token.type != next_tok) {
|
|
unexpected(str_expected);
|
|
}
|
|
next();
|
|
}
|
|
|
|
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
|
|
void unexpected(const char* str_expected) const;
|
|
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
|
|
void error(const std::string& err_msg) const;
|
|
};
|
|
|
|
void lexer_init();
|
|
|
|
// todo #ifdef TOLK_PROFILING
|
|
void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse);
|
|
|
|
} // namespace tolk
|