1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-02-13 03:32:22 +00:00
ton/tolk/lexer.h
tolk-vm ebbab54cda
[Tolk] Tolk v0.5.0 as FunC v0.5.0 could have been like
All changes from PR "FunC v0.5.0":
https://github.com/ton-blockchain/ton/pull/1026

Instead of developing FunC, we decided to fork it.
BTW, the first Tolk release will be v0.6,
a metaphor of FunC v0.5 that missed a chance to occur.
2024-11-02 01:33:08 +04:00

118 lines
3.5 KiB
C++

/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "srcread.h"
#include <array>
#include <memory>
#include <cstring>
namespace tolk {
/*
*
* LEXER
*
*/
struct Lexem {
enum { Undefined = -2, Eof = -1, Unknown = 0, Ident = 0, Number = 1, Special = 2, String = 3 };
int tp;
int val;
std::string str;
SrcLocation loc;
int classify();
Lexem(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0)
: tp(_tp), val(_val), str(_str), loc(_loc) {
classify();
}
int set(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0);
Lexem& clear(const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) {
tp = _tp;
val = _val;
loc = _loc;
str = "";
return *this;
}
bool valid() const {
return tp != Undefined;
}
std::string name_str() const;
void error(std::string _str) const {
throw ParseError{loc, _str};
}
void error_at(std::string str1, std::string str2) const {
error(str1 + str + str2);
}
static std::string lexem_name_str(int idx);
};
class Lexer {
SourceReader& src;
bool eof;
Lexem lexem, peek_lexem;
unsigned char char_class[128];
std::array<int, 3> eol_cmt, cmt_op, cmt_cl; // for ;; {- -}
std::array<int, 3> eol_cmt2, cmt_op2, cmt_cl2; // for // /* */
std::string multiline_quote;
enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 };
public:
bool eof_found() const {
return eof;
}
explicit Lexer(SourceReader& _src, std::string active_chars = ";,() ~.",
std::string quote_chars = "\"", std::string multiline_quote = "\"\"\"");
void set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts);
void set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2);
void start_parsing();
const Lexem& next();
const Lexem& cur() const {
return lexem;
}
const Lexem& peek();
int tp() const {
return lexem.tp;
}
void expect(int exp_tp, const char* msg = 0);
int classify_char(unsigned c) const {
return c < 0x80 ? char_class[c] : 0;
}
bool is_active(int c) const {
return (classify_char(c) & cc::active) == cc::active;
}
bool is_left_active(int c) const {
return (classify_char(c) & cc::left_active);
}
bool is_right_active(int c) const {
return (classify_char(c) & cc::right_active);
}
bool is_repeatable(int c) const {
return (classify_char(c) & cc::allow_repeat);
}
bool is_quote_char(int c) const {
return (classify_char(c) & cc::quote_char);
}
private:
void set_spec(std::array<int, 3>& arr, std::string setup);
bool is_multiline_quote(const char* begin, const char* end);
};
} // namespace tolk