mirror of
				https://github.com/ton-blockchain/ton
				synced 2025-03-09 15:40:10 +00:00 
			
		
		
		
	They are not keywords anymore.
> var cell = ...;
> var cell: cell = ...;
Motivation: in the future, when structures are implemented, this obviously should be valid:
> struct a { ... }
> var a = ...;
Struct fields will also be allowed to have names int/slice/cell.
		
	
			
		
			
				
	
	
		
			237 lines
		
	
	
	
		
			5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			237 lines
		
	
	
	
		
			5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
    This file is part of TON Blockchain Library.
 | 
						|
 | 
						|
    TON Blockchain Library is free software: you can redistribute it and/or modify
 | 
						|
    it under the terms of the GNU Lesser General Public License as published by
 | 
						|
    the Free Software Foundation, either version 2 of the License, or
 | 
						|
    (at your option) any later version.
 | 
						|
 | 
						|
    TON Blockchain Library is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
    GNU Lesser General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU Lesser General Public License
 | 
						|
    along with TON Blockchain Library.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
*/
 | 
						|
#pragma once
 | 
						|
 | 
						|
#include "platform-utils.h"
 | 
						|
#include "src-file.h"
 | 
						|
#include <string>
 | 
						|
 | 
						|
namespace tolk {
 | 
						|
 | 
						|
enum TokenType {
 | 
						|
  tok_empty,
 | 
						|
 | 
						|
  tok_fun,
 | 
						|
  tok_get,
 | 
						|
  tok_type,
 | 
						|
  tok_enum,
 | 
						|
  tok_struct,
 | 
						|
  tok_operator,
 | 
						|
  tok_infix,
 | 
						|
 | 
						|
  tok_global,
 | 
						|
  tok_const,
 | 
						|
  tok_var,
 | 
						|
  tok_val,
 | 
						|
  tok_redef,
 | 
						|
  tok_mutate,
 | 
						|
  tok_self,
 | 
						|
 | 
						|
  tok_annotation_at,
 | 
						|
  tok_colon,
 | 
						|
  tok_asm,
 | 
						|
  tok_builtin,
 | 
						|
 | 
						|
  tok_int_const,
 | 
						|
  tok_string_const,
 | 
						|
  tok_string_modifier,
 | 
						|
  tok_true,
 | 
						|
  tok_false,
 | 
						|
  tok_null,
 | 
						|
 | 
						|
  tok_identifier,
 | 
						|
  tok_dot,
 | 
						|
 | 
						|
  tok_plus,
 | 
						|
  tok_set_plus,
 | 
						|
  tok_minus,
 | 
						|
  tok_set_minus,
 | 
						|
  tok_mul,
 | 
						|
  tok_set_mul,
 | 
						|
  tok_div,
 | 
						|
  tok_set_div,
 | 
						|
  tok_mod,
 | 
						|
  tok_set_mod,
 | 
						|
  tok_lshift,
 | 
						|
  tok_set_lshift,
 | 
						|
  tok_rshift,
 | 
						|
  tok_set_rshift,
 | 
						|
  tok_rshiftR,
 | 
						|
  tok_rshiftC,
 | 
						|
  tok_bitwise_and,
 | 
						|
  tok_set_bitwise_and,
 | 
						|
  tok_bitwise_or,
 | 
						|
  tok_set_bitwise_or,
 | 
						|
  tok_bitwise_xor,
 | 
						|
  tok_set_bitwise_xor,
 | 
						|
  tok_bitwise_not,
 | 
						|
 | 
						|
  tok_question,
 | 
						|
  tok_comma,
 | 
						|
  tok_semicolon,
 | 
						|
  tok_oppar,
 | 
						|
  tok_clpar,
 | 
						|
  tok_opbracket,
 | 
						|
  tok_clbracket,
 | 
						|
  tok_opbrace,
 | 
						|
  tok_clbrace,
 | 
						|
  tok_assign,
 | 
						|
  tok_underscore,
 | 
						|
  tok_lt,
 | 
						|
  tok_gt,
 | 
						|
  tok_logical_not,
 | 
						|
  tok_logical_and,
 | 
						|
  tok_logical_or,
 | 
						|
 | 
						|
  tok_eq,
 | 
						|
  tok_neq,
 | 
						|
  tok_leq,
 | 
						|
  tok_geq,
 | 
						|
  tok_spaceship,
 | 
						|
  tok_divR,
 | 
						|
  tok_divC,
 | 
						|
 | 
						|
  tok_return,
 | 
						|
  tok_repeat,
 | 
						|
  tok_do,
 | 
						|
  tok_while,
 | 
						|
  tok_break,
 | 
						|
  tok_continue,
 | 
						|
  tok_try,
 | 
						|
  tok_catch,
 | 
						|
  tok_throw,
 | 
						|
  tok_assert,
 | 
						|
  tok_if,
 | 
						|
  tok_else,
 | 
						|
 | 
						|
  tok_arrow,
 | 
						|
  tok_as,
 | 
						|
 | 
						|
  tok_tolk,
 | 
						|
  tok_semver,
 | 
						|
  tok_import,
 | 
						|
  tok_export,
 | 
						|
 | 
						|
  tok_eof
 | 
						|
};
 | 
						|
 | 
						|
// All tolk language is parsed into tokens.
 | 
						|
// Lexer::next() returns a Token.
 | 
						|
struct Token {
 | 
						|
  TokenType type = tok_empty;
 | 
						|
  std::string_view str_val;
 | 
						|
 | 
						|
  Token() = default;
 | 
						|
  Token(TokenType type, std::string_view str_val): type(type), str_val(str_val) {}
 | 
						|
};
 | 
						|
 | 
						|
// Lexer::next() is a method to be used externally (while parsing tolk file to AST).
 | 
						|
// It's streaming: `next()` parses a token on demand.
 | 
						|
// For comments, see lexer.cpp, a comment above Lexer constructor.
 | 
						|
class Lexer {
 | 
						|
  Token tokens_circularbuf[8]{};
 | 
						|
  int last_token_idx = -1;
 | 
						|
  int cur_token_idx = -1;
 | 
						|
  Token cur_token;  // = tokens_circularbuf[cur_token_idx & 7]
 | 
						|
 | 
						|
  const SrcFile* file;
 | 
						|
  const char *p_start, *p_end, *p_next;
 | 
						|
  SrcLocation location;
 | 
						|
 | 
						|
  void update_location() {
 | 
						|
    location.char_offset = static_cast<int>(p_next - p_start);
 | 
						|
  }
 | 
						|
 | 
						|
public:
 | 
						|
 | 
						|
  struct SavedPositionForLookahead {
 | 
						|
    const char* p_next = nullptr;
 | 
						|
    int cur_token_idx = 0;
 | 
						|
    Token cur_token;
 | 
						|
  };
 | 
						|
 | 
						|
  explicit Lexer(const SrcFile* file);
 | 
						|
  explicit Lexer(std::string_view text);
 | 
						|
  Lexer(const Lexer&) = delete;
 | 
						|
  Lexer &operator=(const Lexer&) = delete;
 | 
						|
 | 
						|
  void add_token(TokenType type, std::string_view str) {
 | 
						|
    tokens_circularbuf[++last_token_idx & 7] = Token(type, str);
 | 
						|
  }
 | 
						|
 | 
						|
  void skip_spaces() {
 | 
						|
    while (std::isspace(*p_next)) {
 | 
						|
      ++p_next;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  void skip_line() {
 | 
						|
    while (p_next < p_end && *p_next != '\n' && *p_next != '\r') {
 | 
						|
      ++p_next;
 | 
						|
    }
 | 
						|
    while (*p_next == '\n' || *p_next == '\r') {
 | 
						|
      ++p_next;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  void skip_chars(int n) {
 | 
						|
    p_next += n;
 | 
						|
  }
 | 
						|
 | 
						|
  bool is_eof() const {
 | 
						|
    return p_next >= p_end;
 | 
						|
  }
 | 
						|
 | 
						|
  char char_at() const { return *p_next; }
 | 
						|
  char char_at(int shift) const { return *(p_next + shift); }
 | 
						|
  const char* c_str() const { return p_next; }
 | 
						|
 | 
						|
  TokenType tok() const { return cur_token.type; }
 | 
						|
  std::string_view cur_str() const { return cur_token.str_val; }
 | 
						|
  SrcLocation cur_location() const { return location; }
 | 
						|
  const SrcFile* cur_file() const { return file; }
 | 
						|
 | 
						|
  void next();
 | 
						|
  void next_special(TokenType parse_next_as, const char* str_expected);
 | 
						|
 | 
						|
  SavedPositionForLookahead save_parsing_position() const;
 | 
						|
  void restore_position(SavedPositionForLookahead saved);
 | 
						|
 | 
						|
  void check(TokenType next_tok, const char* str_expected) const {
 | 
						|
    if (cur_token.type != next_tok) {
 | 
						|
      unexpected(str_expected); // unlikely path, not inlined
 | 
						|
    }
 | 
						|
  }
 | 
						|
  void expect(TokenType next_tok, const char* str_expected) {
 | 
						|
    if (cur_token.type != next_tok) {
 | 
						|
      unexpected(str_expected);
 | 
						|
    }
 | 
						|
    next();
 | 
						|
  }
 | 
						|
 | 
						|
  GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
 | 
						|
  void unexpected(const char* str_expected) const;
 | 
						|
  GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD
 | 
						|
  void error(const std::string& err_msg) const;
 | 
						|
};
 | 
						|
 | 
						|
void lexer_init();
 | 
						|
 | 
						|
// todo #ifdef TOLK_PROFILING
 | 
						|
void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse);
 | 
						|
 | 
						|
}  // namespace tolk
 |