mirror of
https://github.com/ton-blockchain/ton
synced 2025-03-09 15:40:10 +00:00
initial commit
This commit is contained in:
commit
c2da007f40
1610 changed files with 398047 additions and 0 deletions
288
crypto/parser/lexer.cpp
Normal file
288
crypto/parser/lexer.cpp
Normal file
|
@ -0,0 +1,288 @@
|
|||
/*
|
||||
This file is part of TON Blockchain Library.
|
||||
|
||||
TON Blockchain Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
TON Blockchain Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Copyright 2017-2019 Telegram Systems LLP
|
||||
*/
|
||||
#include "lexer.h"
|
||||
#include "symtable.h"
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
|
||||
namespace src {
|
||||
|
||||
/*
|
||||
*
|
||||
* LEXER
|
||||
*
|
||||
*/
|
||||
|
||||
std::string Lexem::lexem_name_str(int idx) {
|
||||
if (idx == Eof) {
|
||||
return "end of file";
|
||||
} else if (idx == Ident) {
|
||||
return "identifier";
|
||||
} else if (idx == Number) {
|
||||
return "number";
|
||||
} else if (idx == String) {
|
||||
return "string";
|
||||
} else if (idx == Special) {
|
||||
return "special";
|
||||
} else if (sym::symbols.get_keyword(idx)) {
|
||||
return "`" + sym::symbols.get_keyword(idx)->str + "`";
|
||||
} else {
|
||||
std::ostringstream os{"<unknown lexem of type "};
|
||||
os << idx << ">";
|
||||
return os.str();
|
||||
}
|
||||
}
|
||||
|
||||
std::string Lexem::name_str() const {
|
||||
if (tp == Ident) {
|
||||
return std::string{"identifier `"} + sym::symbols.get_name(val) + "`";
|
||||
} else if (tp == String) {
|
||||
return std::string{"string \""} + str + '"';
|
||||
} else {
|
||||
return lexem_name_str(tp);
|
||||
}
|
||||
}
|
||||
|
||||
bool is_number(std::string str) {
|
||||
auto st = str.begin(), en = str.end();
|
||||
if (st == en) {
|
||||
return false;
|
||||
}
|
||||
if (*st == '-') {
|
||||
st++;
|
||||
}
|
||||
bool hex = false;
|
||||
if (st + 1 < en && *st == '0' && st[1] == 'x') {
|
||||
st += 2;
|
||||
hex = true;
|
||||
}
|
||||
if (st == en) {
|
||||
return false;
|
||||
}
|
||||
while (st < en) {
|
||||
int c = *st;
|
||||
if (c >= '0' && c <= '9') {
|
||||
++st;
|
||||
continue;
|
||||
}
|
||||
if (!hex) {
|
||||
return false;
|
||||
}
|
||||
c |= 0x20;
|
||||
if (c < 'a' || c > 'f') {
|
||||
return false;
|
||||
}
|
||||
++st;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int Lexem::classify() {
|
||||
if (tp != Unknown) {
|
||||
return tp;
|
||||
}
|
||||
sym::sym_idx_t i = sym::symbols.lookup(str);
|
||||
if (i) {
|
||||
assert(str == sym::symbols[i]->str);
|
||||
str = sym::symbols[i]->str;
|
||||
sym::sym_idx_t idx = sym::symbols[i]->idx;
|
||||
tp = (idx < 0 ? -idx : Ident);
|
||||
val = i;
|
||||
} else if (is_number(str)) {
|
||||
tp = Number;
|
||||
} else {
|
||||
tp = lexem_is_special(str);
|
||||
}
|
||||
if (tp == Unknown) {
|
||||
tp = Ident;
|
||||
val = sym::symbols.lookup(str, 1);
|
||||
}
|
||||
return tp;
|
||||
}
|
||||
|
||||
int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) {
|
||||
str = _str;
|
||||
loc = _loc;
|
||||
tp = _tp;
|
||||
val = _val;
|
||||
return classify();
|
||||
}
|
||||
|
||||
Lexer::Lexer(SourceReader& _src, bool init, std::string active_chars, std::string eol_cmts, std::string open_cmts,
|
||||
std::string close_cmts, std::string quote_chars)
|
||||
: src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined) {
|
||||
std::memset(char_class, 0, sizeof(char_class));
|
||||
unsigned char activity = cc::active;
|
||||
for (char c : active_chars) {
|
||||
if (c == ' ') {
|
||||
if (!--activity) {
|
||||
activity = cc::allow_repeat;
|
||||
}
|
||||
} else if ((unsigned)c < 0x80) {
|
||||
char_class[(unsigned)c] |= activity;
|
||||
}
|
||||
}
|
||||
set_spec(eol_cmt, eol_cmts);
|
||||
set_spec(cmt_op, open_cmts);
|
||||
set_spec(cmt_cl, close_cmts);
|
||||
for (int c : quote_chars) {
|
||||
if (c > ' ' && c <= 0x7f) {
|
||||
char_class[(unsigned)c] |= cc::quote_char;
|
||||
}
|
||||
}
|
||||
if (init) {
|
||||
next();
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::set_spec(std::array<int, 3>& arr, std::string setup) {
|
||||
arr[0] = arr[1] = arr[2] = -0x100;
|
||||
std::size_t n = setup.size(), i;
|
||||
for (i = 0; i < n; i++) {
|
||||
if (setup[i] == ' ') {
|
||||
continue;
|
||||
}
|
||||
if (i == n - 1 || setup[i + 1] == ' ') {
|
||||
arr[0] = setup[i];
|
||||
} else if (i == n - 2 || (i < n - 2 && setup[i + 2] == ' ')) {
|
||||
arr[1] = setup[i];
|
||||
arr[2] = setup[++i];
|
||||
} else {
|
||||
while (i < n && setup[i] != ' ') {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::expect(int exp_tp, const char* msg) {
|
||||
if (tp() != exp_tp) {
|
||||
throw ParseError{lexem.loc, (msg ? std::string{msg} : Lexem::lexem_name_str(exp_tp)) + " expected instead of " +
|
||||
cur().name_str()};
|
||||
}
|
||||
next();
|
||||
}
|
||||
|
||||
const Lexem& Lexer::next() {
|
||||
if (peek_lexem.valid()) {
|
||||
lexem = std::move(peek_lexem);
|
||||
peek_lexem.clear({}, Lexem::Undefined);
|
||||
eof = (lexem.tp == Lexem::Eof);
|
||||
return lexem;
|
||||
}
|
||||
if (eof) {
|
||||
return lexem.clear(src.here(), Lexem::Eof);
|
||||
}
|
||||
long long comm = 1;
|
||||
while (!src.seek_eof()) {
|
||||
int cc = src.cur_char(), nc = src.next_char();
|
||||
if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2])) {
|
||||
src.load_line();
|
||||
} else if (cc == cmt_op[1] && nc == cmt_op[2]) {
|
||||
src.advance(2);
|
||||
comm = comm * 2 + 1;
|
||||
} else if (cc == cmt_op[0]) {
|
||||
src.advance(1);
|
||||
comm *= 2;
|
||||
} else if (comm == 1) {
|
||||
break;
|
||||
} else if (cc == cmt_cl[1] && nc == cmt_cl[2]) {
|
||||
if (!(comm & 1)) {
|
||||
src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] +
|
||||
"`");
|
||||
}
|
||||
comm >>= 1;
|
||||
src.advance(2);
|
||||
} else if (cc == cmt_cl[0]) {
|
||||
if (!(comm & 1)) {
|
||||
src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] +
|
||||
"`");
|
||||
}
|
||||
comm >>= 1;
|
||||
src.advance(1);
|
||||
} else {
|
||||
src.advance(1);
|
||||
}
|
||||
if (comm < 0) {
|
||||
src.error("too many nested comments");
|
||||
}
|
||||
}
|
||||
if (src.seek_eof()) {
|
||||
eof = true;
|
||||
if (comm > 1) {
|
||||
if (comm & 1) {
|
||||
src.error(std::string{"`"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment extends past end of file");
|
||||
} else {
|
||||
src.error(std::string{"`"} + (char)cmt_op[0] + "` comment extends past end of file");
|
||||
}
|
||||
}
|
||||
return lexem.clear(src.here(), Lexem::Eof);
|
||||
}
|
||||
int c = src.cur_char();
|
||||
const char* end = src.get_ptr();
|
||||
if (is_quote_char(c) || c == '`') {
|
||||
int qc = c;
|
||||
++end;
|
||||
while (end < src.get_end_ptr() && *end != qc) {
|
||||
++end;
|
||||
}
|
||||
if (*end != qc) {
|
||||
src.error(qc == '`' ? "a `back-quoted` token extends past end of line" : "string extends past end of line");
|
||||
}
|
||||
lexem.set(std::string{src.get_ptr() + 1, end}, src.here(), qc == '`' ? Lexem::Unknown : Lexem::String);
|
||||
src.set_ptr(end + 1);
|
||||
// std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl;
|
||||
return lexem;
|
||||
}
|
||||
int len = 0, pc = -0x100;
|
||||
while (end < src.get_end_ptr()) {
|
||||
c = *end;
|
||||
bool repeated = (c == pc && is_repeatable(c));
|
||||
if (c == ' ' || c == 9 || (len && is_left_active(c) && !repeated)) {
|
||||
break;
|
||||
}
|
||||
++len;
|
||||
++end;
|
||||
if (is_right_active(c) && !repeated) {
|
||||
break;
|
||||
}
|
||||
pc = c;
|
||||
}
|
||||
lexem.set(std::string{src.get_ptr(), end}, src.here());
|
||||
src.set_ptr(end);
|
||||
// std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl;
|
||||
return lexem;
|
||||
}
|
||||
|
||||
const Lexem& Lexer::peek() {
|
||||
if (peek_lexem.valid()) {
|
||||
return peek_lexem;
|
||||
}
|
||||
if (eof) {
|
||||
return lexem.clear(src.here(), Lexem::Eof);
|
||||
}
|
||||
Lexem keep = std::move(lexem);
|
||||
next();
|
||||
peek_lexem = std::move(lexem);
|
||||
lexem = std::move(keep);
|
||||
eof = false;
|
||||
return peek_lexem;
|
||||
}
|
||||
|
||||
} // namespace src
|
114
crypto/parser/lexer.h
Normal file
114
crypto/parser/lexer.h
Normal file
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
This file is part of TON Blockchain Library.
|
||||
|
||||
TON Blockchain Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
TON Blockchain Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Copyright 2017-2019 Telegram Systems LLP
|
||||
*/
|
||||
#pragma once
|
||||
#include "srcread.h"
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <cstring>
|
||||
|
||||
namespace src {
|
||||
|
||||
/*
|
||||
*
|
||||
* LEXER
|
||||
*
|
||||
*/
|
||||
|
||||
int lexem_is_special(std::string str); // return 0 if no special lexems are needed
|
||||
|
||||
struct Lexem {
|
||||
enum { Undefined = -2, Eof = -1, Unknown = 0, Ident = 0, Number = 1, Special = 2, String = 3 };
|
||||
int tp;
|
||||
int val;
|
||||
std::string str;
|
||||
SrcLocation loc;
|
||||
int classify();
|
||||
Lexem(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0)
|
||||
: tp(_tp), val(_val), str(_str), loc(_loc) {
|
||||
classify();
|
||||
}
|
||||
int set(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0);
|
||||
Lexem& clear(const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) {
|
||||
tp = _tp;
|
||||
val = _val;
|
||||
loc = _loc;
|
||||
str = "";
|
||||
return *this;
|
||||
}
|
||||
bool valid() const {
|
||||
return tp != Undefined;
|
||||
}
|
||||
std::string name_str() const;
|
||||
void error(std::string _str) const {
|
||||
throw ParseError{loc, _str};
|
||||
}
|
||||
void error_at(std::string str1, std::string str2) const {
|
||||
error(str1 + str + str2);
|
||||
}
|
||||
|
||||
static std::string lexem_name_str(int idx);
|
||||
};
|
||||
|
||||
class Lexer {
|
||||
SourceReader& src;
|
||||
bool eof;
|
||||
Lexem lexem, peek_lexem;
|
||||
unsigned char char_class[128];
|
||||
std::array<int, 3> eol_cmt, cmt_op, cmt_cl;
|
||||
enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 };
|
||||
|
||||
public:
|
||||
bool eof_found() const {
|
||||
return eof;
|
||||
}
|
||||
Lexer(SourceReader& _src, bool init = false, std::string active_chars = ";,() ~.", std::string eol_cmts = ";;",
|
||||
std::string open_cmts = "{-", std::string close_cmts = "-}", std::string quote_chars = "\"");
|
||||
const Lexem& next();
|
||||
const Lexem& cur() const {
|
||||
return lexem;
|
||||
}
|
||||
const Lexem& peek();
|
||||
int tp() const {
|
||||
return lexem.tp;
|
||||
}
|
||||
void expect(int exp_tp, const char* msg = 0);
|
||||
int classify_char(unsigned c) const {
|
||||
return c < 0x80 ? char_class[c] : 0;
|
||||
}
|
||||
bool is_active(int c) const {
|
||||
return (classify_char(c) & cc::active) == cc::active;
|
||||
}
|
||||
bool is_left_active(int c) const {
|
||||
return (classify_char(c) & cc::left_active);
|
||||
}
|
||||
bool is_right_active(int c) const {
|
||||
return (classify_char(c) & cc::right_active);
|
||||
}
|
||||
bool is_repeatable(int c) const {
|
||||
return (classify_char(c) & cc::allow_repeat);
|
||||
}
|
||||
bool is_quote_char(int c) const {
|
||||
return (classify_char(c) & cc::quote_char);
|
||||
}
|
||||
|
||||
private:
|
||||
void set_spec(std::array<int, 3>& arr, std::string setup);
|
||||
};
|
||||
|
||||
} // namespace src
|
180
crypto/parser/srcread.cpp
Normal file
180
crypto/parser/srcread.cpp
Normal file
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
This file is part of TON Blockchain Library.
|
||||
|
||||
TON Blockchain Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
TON Blockchain Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Copyright 2017-2019 Telegram Systems LLP
|
||||
*/
|
||||
#include "srcread.h"
|
||||
|
||||
namespace src {
|
||||
|
||||
/*
|
||||
*
|
||||
* SOURCE FILE READER
|
||||
*
|
||||
*/
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const FileDescr* fdescr) {
|
||||
return os << (fdescr ? (fdescr->is_stdin ? "stdin" : fdescr->filename) : "unknown-location");
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Fatal& fatal) {
|
||||
return os << fatal.get_msg();
|
||||
}
|
||||
|
||||
void SrcLocation::show(std::ostream& os) const {
|
||||
os << fdescr;
|
||||
if (line_no > 0) {
|
||||
os << ':' << line_no;
|
||||
if (line_pos >= 0) {
|
||||
os << ':' << (line_pos + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SrcLocation::show_context(std::ostream& os) const {
|
||||
if (text.empty() || line_pos < 0 || (unsigned)line_pos > text.size()) {
|
||||
return false;
|
||||
}
|
||||
bool skip_left = (line_pos > 200), skip_right = (line_pos + 200u < text.size());
|
||||
const char* start = skip_left ? text.c_str() + line_pos - 100 : text.c_str();
|
||||
const char* end = skip_right ? text.c_str() + line_pos + 100 : text.c_str() + text.size();
|
||||
const char* here = text.c_str() + line_pos;
|
||||
os << " ";
|
||||
if (skip_left) {
|
||||
os << "... ";
|
||||
}
|
||||
for (const char* ptr = start; ptr < end; ptr++) {
|
||||
os << (char)*ptr;
|
||||
}
|
||||
if (skip_right) {
|
||||
os << " ...";
|
||||
}
|
||||
os << std::endl;
|
||||
os << " ";
|
||||
if (skip_left) {
|
||||
os << "... ";
|
||||
}
|
||||
for (const char* ptr = start; ptr < here; ptr++) {
|
||||
char c = *ptr;
|
||||
os << (c == 9 || c == 10 ? c : ' ');
|
||||
}
|
||||
os << '^' << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc) {
|
||||
loc.show(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
void SrcLocation::show_gen_error(std::ostream& os, std::string message, std::string err_type) const {
|
||||
show(os);
|
||||
if (!err_type.empty()) {
|
||||
os << ": " << err_type;
|
||||
}
|
||||
os << ": " << message << std::endl;
|
||||
show_context(os);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Error& error) {
|
||||
error.show(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
void ParseError::show(std::ostream& os) const {
|
||||
os << where << ": error: " << message << std::endl;
|
||||
where.show_context(os);
|
||||
}
|
||||
|
||||
SourceReader::SourceReader(std::istream* _is, const FileDescr* _fdescr)
|
||||
: ifs(_is), loc(_fdescr), eof(false), cur_line_len(0), start(0), cur(0), end(0) {
|
||||
load_line();
|
||||
}
|
||||
|
||||
void SourceReader::set_eof() {
|
||||
if (!eof) {
|
||||
eof = true;
|
||||
start = cur = end = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int SourceReader::skip_spc() {
|
||||
if (!cur) {
|
||||
return 0;
|
||||
}
|
||||
const char* ptr = cur;
|
||||
int res = 0;
|
||||
while (*ptr == ' ' || *ptr == 9) {
|
||||
++ptr;
|
||||
++res;
|
||||
}
|
||||
set_ptr(ptr);
|
||||
return res;
|
||||
}
|
||||
|
||||
bool SourceReader::seek_eof() {
|
||||
while (seek_eoln()) {
|
||||
if (!load_line()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* SourceReader::set_ptr(const char* ptr) {
|
||||
if (ptr != cur) {
|
||||
if (ptr < cur || ptr > end) {
|
||||
error("parsing position went outside of line");
|
||||
}
|
||||
loc.line_pos = (int)(ptr - start);
|
||||
cur = ptr;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
bool SourceReader::load_line() {
|
||||
if (eof) {
|
||||
return false;
|
||||
}
|
||||
if (ifs->eof()) {
|
||||
set_eof();
|
||||
return false;
|
||||
}
|
||||
++loc.line_no;
|
||||
loc.line_pos = -1;
|
||||
std::getline(*ifs, cur_line);
|
||||
if (ifs->fail()) {
|
||||
set_eof();
|
||||
if (!ifs->eof()) {
|
||||
error("cannot read line from source stream");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
std::size_t len = cur_line.size();
|
||||
if (len > 0xffffff) {
|
||||
set_eof();
|
||||
error("line too long");
|
||||
return false;
|
||||
}
|
||||
loc.text = cur_line;
|
||||
cur_line_len = (int)len;
|
||||
loc.line_pos = 0;
|
||||
cur = start = cur_line.c_str();
|
||||
end = start + cur_line_len;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace src
|
150
crypto/parser/srcread.h
Normal file
150
crypto/parser/srcread.h
Normal file
|
@ -0,0 +1,150 @@
|
|||
/*
|
||||
This file is part of TON Blockchain Library.
|
||||
|
||||
TON Blockchain Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
TON Blockchain Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Copyright 2017-2019 Telegram Systems LLP
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
namespace src {
|
||||
|
||||
/*
|
||||
*
|
||||
* SOURCE FILE READER
|
||||
*
|
||||
*/
|
||||
|
||||
struct FileDescr {
|
||||
std::string filename;
|
||||
bool is_stdin;
|
||||
FileDescr(std::string _fname, bool _stdin = false) : filename(std::move(_fname)), is_stdin(_stdin) {
|
||||
}
|
||||
};
|
||||
|
||||
struct Fatal {
|
||||
std::string message;
|
||||
Fatal(std::string _msg) : message(std::move(_msg)) {
|
||||
}
|
||||
std::string get_msg() const {
|
||||
return message;
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Fatal& fatal);
|
||||
|
||||
struct SrcLocation {
|
||||
const FileDescr* fdescr;
|
||||
int line_no;
|
||||
int line_pos;
|
||||
std::string text;
|
||||
SrcLocation() : fdescr(nullptr), line_no(0), line_pos(-1) {
|
||||
}
|
||||
SrcLocation(const FileDescr* _fdescr, int line = 0, int pos = -1) : fdescr(_fdescr), line_no(line), line_pos(pos) {
|
||||
}
|
||||
bool defined() const {
|
||||
return fdescr;
|
||||
}
|
||||
void show(std::ostream& os) const;
|
||||
bool show_context(std::ostream& os) const;
|
||||
void show_gen_error(std::ostream& os, std::string message, std::string err_type = "") const;
|
||||
void show_note(std::string err_msg) const {
|
||||
show_gen_error(std::cerr, err_msg, "note");
|
||||
}
|
||||
void show_warning(std::string err_msg) const {
|
||||
show_gen_error(std::cerr, err_msg, "warning");
|
||||
}
|
||||
void show_error(std::string err_msg) const {
|
||||
show_gen_error(std::cerr, err_msg, "error");
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc);
|
||||
|
||||
struct Error {
|
||||
virtual ~Error() = default;
|
||||
virtual void show(std::ostream& os) const = 0;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Error& error);
|
||||
|
||||
struct ParseError : Error {
|
||||
SrcLocation where;
|
||||
std::string message;
|
||||
ParseError(const SrcLocation& _where, std::string _msg) : where(_where), message(_msg) {
|
||||
}
|
||||
ParseError(const SrcLocation* _where, std::string _msg) : message(_msg) {
|
||||
if (_where) {
|
||||
where = *_where;
|
||||
}
|
||||
}
|
||||
~ParseError() override = default;
|
||||
void show(std::ostream& os) const override;
|
||||
};
|
||||
|
||||
class SourceReader {
|
||||
std::istream* ifs;
|
||||
SrcLocation loc;
|
||||
bool eof;
|
||||
std::string cur_line;
|
||||
int cur_line_len;
|
||||
void set_eof();
|
||||
const char *start, *cur, *end;
|
||||
|
||||
public:
|
||||
SourceReader(std::istream* _is, const FileDescr* _fdescr);
|
||||
bool load_line();
|
||||
bool is_eof() const {
|
||||
return eof;
|
||||
}
|
||||
int is_eoln() const {
|
||||
return cur == end;
|
||||
}
|
||||
int skip_spc();
|
||||
bool seek_eoln() {
|
||||
skip_spc();
|
||||
return is_eoln();
|
||||
}
|
||||
bool seek_eof();
|
||||
const char* cur_line_cstr() const {
|
||||
return cur_line.c_str();
|
||||
}
|
||||
const SrcLocation& here() const {
|
||||
return loc;
|
||||
}
|
||||
char cur_char() const {
|
||||
return *cur;
|
||||
}
|
||||
char next_char() const {
|
||||
return cur[1];
|
||||
}
|
||||
const char* get_ptr() const {
|
||||
return cur;
|
||||
}
|
||||
const char* get_end_ptr() const {
|
||||
return end;
|
||||
}
|
||||
const char* set_ptr(const char* ptr);
|
||||
void advance(int n) {
|
||||
set_ptr(get_ptr() + n);
|
||||
}
|
||||
void error(std::string err_msg) {
|
||||
throw ParseError{loc, err_msg};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace src
|
181
crypto/parser/symtable.cpp
Normal file
181
crypto/parser/symtable.cpp
Normal file
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
This file is part of TON Blockchain Library.
|
||||
|
||||
TON Blockchain Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
TON Blockchain Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Copyright 2017-2019 Telegram Systems LLP
|
||||
*/
|
||||
#include "symtable.h"
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
|
||||
namespace sym {
|
||||
|
||||
/*
|
||||
*
|
||||
* SYMBOL VALUES (DECLARED)
|
||||
*
|
||||
*/
|
||||
|
||||
int scope_level;
|
||||
|
||||
SymTable<100003> symbols;
|
||||
|
||||
SymDef* sym_def[symbols.hprime];
|
||||
SymDef* global_sym_def[symbols.hprime];
|
||||
std::vector<std::pair<int, SymDef>> symbol_stack;
|
||||
std::vector<src::SrcLocation> scope_opened_at;
|
||||
|
||||
std::string Symbol::unknown_symbol_name(sym_idx_t i) {
|
||||
if (!i) {
|
||||
return "_";
|
||||
} else {
|
||||
std::ostringstream os;
|
||||
os << "SYM#" << i;
|
||||
return os.str();
|
||||
}
|
||||
}
|
||||
|
||||
sym_idx_t SymTableBase::gen_lookup(std::string str, int mode, sym_idx_t idx) {
|
||||
unsigned long long h1 = 1, h2 = 1;
|
||||
for (char c : str) {
|
||||
h1 = ((h1 * 239) + (unsigned char)(c)) % p;
|
||||
h2 = ((h2 * 17) + (unsigned char)(c)) % (p - 1);
|
||||
}
|
||||
++h2;
|
||||
++h1;
|
||||
while (true) {
|
||||
if (sym_table[h1]) {
|
||||
if (sym_table[h1]->str == str) {
|
||||
return (mode & 2) ? not_found : sym_idx_t(h1);
|
||||
}
|
||||
h1 += h2;
|
||||
if (h1 > p) {
|
||||
h1 -= p;
|
||||
}
|
||||
} else {
|
||||
if (!(mode & 1)) {
|
||||
return not_found;
|
||||
}
|
||||
if (def_sym >= ((long)p * 3) / 4) {
|
||||
throw SymTableOverflow{def_sym};
|
||||
}
|
||||
sym_table[h1] = std::make_unique<Symbol>(str, idx <= 0 ? sym_idx_t(h1) : -idx);
|
||||
++def_sym;
|
||||
return sym_idx_t(h1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SymTableBase& SymTableBase::add_keyword(std::string str, sym_idx_t idx) {
|
||||
if (idx <= 0) {
|
||||
idx = ++def_kw;
|
||||
}
|
||||
sym_idx_t res = gen_lookup(str, -1, idx);
|
||||
if (!res) {
|
||||
throw SymTableKwRedef{str};
|
||||
}
|
||||
if (idx < max_kw_idx) {
|
||||
keywords[idx] = res;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void open_scope(src::Lexer& lex) {
|
||||
++scope_level;
|
||||
scope_opened_at.push_back(lex.cur().loc);
|
||||
}
|
||||
|
||||
void close_scope(src::Lexer& lex) {
|
||||
if (!scope_level) {
|
||||
throw src::Fatal{"cannot close the outer scope"};
|
||||
}
|
||||
while (!symbol_stack.empty() && symbol_stack.back().first == scope_level) {
|
||||
SymDef old_def = symbol_stack.back().second;
|
||||
auto idx = old_def.sym_idx;
|
||||
symbol_stack.pop_back();
|
||||
SymDef* cur_def = sym_def[idx];
|
||||
assert(cur_def);
|
||||
assert(cur_def->level == scope_level && cur_def->sym_idx == idx);
|
||||
//std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl;
|
||||
if (cur_def->value) {
|
||||
//std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl;
|
||||
delete cur_def->value;
|
||||
}
|
||||
if (!old_def.level && !old_def.value) {
|
||||
delete cur_def; // ??? keep the definition always?
|
||||
sym_def[idx] = nullptr;
|
||||
} else {
|
||||
cur_def->value = std::move(old_def.value);
|
||||
cur_def->level = old_def.level;
|
||||
}
|
||||
old_def.value = nullptr;
|
||||
}
|
||||
--scope_level;
|
||||
scope_opened_at.pop_back();
|
||||
}
|
||||
|
||||
SymDef* lookup_symbol(sym_idx_t idx, int flags) {
|
||||
if (!idx) {
|
||||
return nullptr;
|
||||
}
|
||||
if ((flags & 1) && sym_def[idx]) {
|
||||
return sym_def[idx];
|
||||
}
|
||||
if ((flags & 2) && global_sym_def[idx]) {
|
||||
return global_sym_def[idx];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SymDef* lookup_symbol(std::string name, int flags) {
|
||||
return lookup_symbol(symbols.lookup(name), flags);
|
||||
}
|
||||
|
||||
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const src::SrcLocation& loc) {
|
||||
if (!name_idx) {
|
||||
return nullptr;
|
||||
}
|
||||
auto found = global_sym_def[name_idx];
|
||||
if (found) {
|
||||
return force_new && found->value ? nullptr : found;
|
||||
}
|
||||
return global_sym_def[name_idx] = new SymDef(0, name_idx, loc);
|
||||
}
|
||||
|
||||
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const src::SrcLocation& loc) {
|
||||
if (!name_idx) {
|
||||
return nullptr;
|
||||
}
|
||||
if (!scope_level) {
|
||||
return define_global_symbol(name_idx, force_new, loc);
|
||||
}
|
||||
auto found = sym_def[name_idx];
|
||||
if (found) {
|
||||
if (found->level < scope_level) {
|
||||
symbol_stack.push_back(std::make_pair(scope_level, *found));
|
||||
found->level = scope_level;
|
||||
} else if (found->value && force_new) {
|
||||
return nullptr;
|
||||
}
|
||||
found->value = 0;
|
||||
found->loc = loc;
|
||||
return found;
|
||||
}
|
||||
found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc);
|
||||
symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx}));
|
||||
return found;
|
||||
}
|
||||
|
||||
} // namespace sym
|
177
crypto/parser/symtable.h
Normal file
177
crypto/parser/symtable.h
Normal file
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
This file is part of TON Blockchain Library.
|
||||
|
||||
TON Blockchain Library is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
TON Blockchain Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Copyright 2017-2019 Telegram Systems LLP
|
||||
*/
|
||||
#pragma once
|
||||
#include "srcread.h"
|
||||
#include "lexer.h"
|
||||
#include <vector>
|
||||
|
||||
namespace sym {
|
||||
|
||||
/*
|
||||
*
|
||||
* SYMBOL VALUES (DECLARED)
|
||||
*
|
||||
*/
|
||||
|
||||
typedef int var_idx_t;
|
||||
|
||||
struct SymValBase {
|
||||
enum { _Param, _Var, _Func, _Typename };
|
||||
int type;
|
||||
int idx;
|
||||
SymValBase(int _type, int _idx) : type(_type), idx(_idx) {
|
||||
}
|
||||
virtual ~SymValBase() = default;
|
||||
};
|
||||
|
||||
/*
|
||||
*
|
||||
* SYMBOL TABLE
|
||||
*
|
||||
*/
|
||||
|
||||
// defined outside this module (by the end user)
|
||||
int compute_symbol_subclass(std::string str); // return 0 if unneeded
|
||||
|
||||
typedef int sym_idx_t;
|
||||
|
||||
struct Symbol {
|
||||
std::string str;
|
||||
sym_idx_t idx;
|
||||
int subclass;
|
||||
Symbol(std::string _str, sym_idx_t _idx, int _sc) : str(_str), idx(_idx), subclass(_sc) {
|
||||
}
|
||||
Symbol(std::string _str, sym_idx_t _idx) : str(_str), idx(_idx) {
|
||||
subclass = compute_symbol_subclass(std::move(_str));
|
||||
}
|
||||
static std::string unknown_symbol_name(sym_idx_t i);
|
||||
};
|
||||
|
||||
class SymTableBase {
|
||||
unsigned p;
|
||||
std::unique_ptr<Symbol>* sym_table;
|
||||
sym_idx_t def_kw, def_sym;
|
||||
static constexpr int max_kw_idx = 10000;
|
||||
sym_idx_t keywords[max_kw_idx];
|
||||
|
||||
public:
|
||||
SymTableBase(unsigned p_, std::unique_ptr<Symbol>* sym_table_)
|
||||
: p(p_), sym_table(sym_table_), def_kw(0x100), def_sym(0) {
|
||||
std::memset(keywords, 0, sizeof(keywords));
|
||||
}
|
||||
static constexpr sym_idx_t not_found = 0;
|
||||
SymTableBase& add_keyword(std::string str, sym_idx_t idx = 0);
|
||||
SymTableBase& add_kw_char(char c) {
|
||||
return add_keyword(std::string{c}, c);
|
||||
}
|
||||
sym_idx_t lookup(std::string str, int mode = 0) {
|
||||
return gen_lookup(str, mode);
|
||||
}
|
||||
sym_idx_t lookup_add(std::string str) {
|
||||
return gen_lookup(str, 1);
|
||||
}
|
||||
Symbol* operator[](sym_idx_t i) const {
|
||||
return sym_table[i].get();
|
||||
}
|
||||
bool is_keyword(sym_idx_t i) const {
|
||||
return sym_table[i] && sym_table[i]->idx < 0;
|
||||
}
|
||||
std::string get_name(sym_idx_t i) const {
|
||||
return sym_table[i] ? sym_table[i]->str : Symbol::unknown_symbol_name(i);
|
||||
}
|
||||
int get_subclass(sym_idx_t i) const {
|
||||
return sym_table[i] ? sym_table[i]->subclass : 0;
|
||||
}
|
||||
Symbol* get_keyword(int i) const {
|
||||
return ((unsigned)i < (unsigned)max_kw_idx) ? sym_table[keywords[i]].get() : nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
sym_idx_t gen_lookup(std::string str, int mode = 0, sym_idx_t idx = 0);
|
||||
};
|
||||
|
||||
template <unsigned pp>
|
||||
class SymTable : public SymTableBase {
|
||||
public:
|
||||
static constexpr int hprime = pp;
|
||||
static int size() {
|
||||
return pp + 1;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<Symbol> sym[pp + 1];
|
||||
|
||||
public:
|
||||
SymTable() : SymTableBase(pp, sym) {
|
||||
}
|
||||
SymTable& add_keyword(std::string str, sym_idx_t idx = 0) {
|
||||
SymTableBase::add_keyword(str, idx);
|
||||
return *this;
|
||||
}
|
||||
SymTable& add_kw_char(char c) {
|
||||
return add_keyword(std::string{c}, c);
|
||||
}
|
||||
};
|
||||
|
||||
struct SymTableOverflow {
|
||||
int sym_def;
|
||||
SymTableOverflow(int x) : sym_def(x) {
|
||||
}
|
||||
};
|
||||
|
||||
struct SymTableKwRedef {
|
||||
std::string kw;
|
||||
SymTableKwRedef(std::string _kw) : kw(_kw) {
|
||||
}
|
||||
};
|
||||
|
||||
extern SymTable<100003> symbols;
|
||||
|
||||
extern int scope_level;
|
||||
|
||||
struct SymDef {
|
||||
int level;
|
||||
sym_idx_t sym_idx;
|
||||
SymValBase* value;
|
||||
src::SrcLocation loc;
|
||||
SymDef(int lvl, sym_idx_t idx, const src::SrcLocation& _loc = {}, SymValBase* val = 0)
|
||||
: level(lvl), sym_idx(idx), value(val), loc(_loc) {
|
||||
}
|
||||
bool has_name() const {
|
||||
return sym_idx;
|
||||
}
|
||||
std::string name() const {
|
||||
return symbols.get_name(sym_idx);
|
||||
}
|
||||
};
|
||||
|
||||
extern SymDef* sym_def[symbols.hprime];
|
||||
extern SymDef* global_sym_def[symbols.hprime];
|
||||
extern std::vector<std::pair<int, SymDef>> symbol_stack;
|
||||
extern std::vector<src::SrcLocation> scope_opened_at;
|
||||
|
||||
void open_scope(src::Lexer& lex);
|
||||
void close_scope(src::Lexer& lex);
|
||||
SymDef* lookup_symbol(sym_idx_t idx, int flags = 3);
|
||||
SymDef* lookup_symbol(std::string name, int flags = 3);
|
||||
|
||||
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, const src::SrcLocation& loc = {});
|
||||
SymDef* define_symbol(sym_idx_t name_idx, bool force_new = false, const src::SrcLocation& loc = {});
|
||||
|
||||
} // namespace sym
|
Loading…
Add table
Add a link
Reference in a new issue