1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-02-12 11:12:16 +00:00
ton/tolk/lexer.h
tolk-vm 5b44e01455
[Tolk] Allow cell and slice be valid identifiers
They are not keywords anymore.
> var cell = ...;
> var cell: cell = ...;
Motivation: in the future, when structures are implemented, this obviously should be valid:
> struct a { ... }
> var a = ...;
Struct fields will also be allowed to have names int/slice/cell.
2025-01-27 15:30:21 +03:00

237 lines
5 KiB
C++

/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "platform-utils.h"
#include "src-file.h"
#include <string>
namespace tolk {
enum TokenType {
tok_empty,
tok_fun,
tok_get,
tok_type,
tok_enum,
tok_struct,
tok_operator,
tok_infix,
tok_global,
tok_const,
tok_var,
tok_val,
tok_redef,
tok_mutate,
tok_self,
tok_annotation_at,
tok_colon,
tok_asm,
tok_builtin,
tok_int_const,
tok_string_const,
tok_string_modifier,
tok_true,
tok_false,
tok_null,
tok_identifier,
tok_dot,
tok_plus,
tok_set_plus,
tok_minus,
tok_set_minus,
tok_mul,
tok_set_mul,
tok_div,
tok_set_div,
tok_mod,
tok_set_mod,
tok_lshift,
tok_set_lshift,
tok_rshift,
tok_set_rshift,
tok_rshiftR,
tok_rshiftC,
tok_bitwise_and,
tok_set_bitwise_and,
tok_bitwise_or,
tok_set_bitwise_or,
tok_bitwise_xor,
tok_set_bitwise_xor,
tok_bitwise_not,
tok_question,
tok_comma,
tok_semicolon,
tok_oppar,
tok_clpar,
tok_opbracket,
tok_clbracket,
tok_opbrace,
tok_clbrace,
tok_assign,
tok_underscore,
tok_lt,
tok_gt,
tok_logical_not,
tok_logical_and,
tok_logical_or,
tok_eq,
tok_neq,
tok_leq,
tok_geq,
tok_spaceship,
tok_divR,
tok_divC,
tok_return,
tok_repeat,
tok_do,
tok_while,
tok_break,
tok_continue,
tok_try,
tok_catch,
tok_throw,
tok_assert,
tok_if,
tok_else,
tok_arrow,
tok_as,
tok_tolk,
tok_semver,
tok_import,
tok_export,
tok_eof
};
// All tolk language is parsed into tokens.
// Lexer::next() returns a Token.
struct Token {
TokenType type = tok_empty;
std::string_view str_val;
Token() = default;
Token(TokenType type, std::string_view str_val): type(type), str_val(str_val) {}
};
// Lexer::next() is a method to be used externally (while parsing tolk file to AST).
// It's streaming: `next()` parses a token on demand.
// For comments, see lexer.cpp, a comment above Lexer constructor.
class Lexer {
Token tokens_circularbuf[8]{};
int last_token_idx = -1;
int cur_token_idx = -1;
Token cur_token; // = tokens_circularbuf[cur_token_idx & 7]
const SrcFile* file;
const char *p_start, *p_end, *p_next;
SrcLocation location;
void update_location() {
location.char_offset = static_cast<int>(p_next - p_start);
}
public:
struct SavedPositionForLookahead {
const char* p_next = nullptr;
int cur_token_idx = 0;
Token cur_token;
};
explicit Lexer(const SrcFile* file);
explicit Lexer(std::string_view text);
Lexer(const Lexer&) = delete;
Lexer &operator=(const Lexer&) = delete;
void add_token(TokenType type, std::string_view str) {
tokens_circularbuf[++last_token_idx & 7] = Token(type, str);
}
void skip_spaces() {
while (std::isspace(*p_next)) {
++p_next;
}
}
void skip_line() {
while (p_next < p_end && *p_next != '\n' && *p_next != '\r') {
++p_next;
}
while (*p_next == '\n' || *p_next == '\r') {
++p_next;
}
}
void skip_chars(int n) {
p_next += n;
}
bool is_eof() const {
return p_next >= p_end;
}
char char_at() const { return *p_next; }
char char_at(int shift) const { return *(p_next + shift); }
const char* c_str() const { return p_next; }
TokenType tok() const { return cur_token.type; }
std::string_view cur_str() const { return cur_token.str_val; }
SrcLocation cur_location() const { return location; }
const SrcFile* cur_file() const { return file; }
void next();
void next_special(TokenType parse_next_as, const char* str_expected);
SavedPositionForLookahead save_parsing_position() const;
void restore_position(SavedPositionForLookahead saved);
void check(TokenType next_tok, const char* str_expected) const {
if (cur_token.type != next_tok) {
unexpected(str_expected); // unlikely path, not inlined
}
}
void expect(TokenType next_tok, const char* str_expected) {
if (cur_token.type != next_tok) {
unexpected(str_expected);
}
next();
}
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void unexpected(const char* str_expected) const;
GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
void error(const std::string& err_msg) const;
};
void lexer_init();
// todo #ifdef TOLK_PROFILING
void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse);
} // namespace tolk