1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

[Tolk] Initial commit of TOLK Language: fork all sources from FunC

The Tolk Language will be positioned as "next-generation FunC".
It's literally a fork of a FunC compiler,
introducing familiar syntax similar to TypeScript,
but leaving all low-level optimizations untouched.

Note, that FunC sources are partially stored
in the parser/ folder (shared with TL/B).
In Tolk, nothing is shared.
Everything from parser/ is copied into tolk/ folder.
This commit is contained in:
tolk-vm 2024-10-31 10:51:07 +04:00
parent eed3153ace
commit 82648ebd6a
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
43 changed files with 13674 additions and 18 deletions

51
tolk/CMakeLists.txt Normal file
View file

@ -0,0 +1,51 @@
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
set(TOLK_SOURCE
srcread.cpp
lexer.cpp
symtable.cpp
keywords.cpp
unify-types.cpp
parse-tolk.cpp
abscode.cpp
gen-abscode.cpp
analyzer.cpp
asmops.cpp
builtins.cpp
stack-transform.cpp
optimize.cpp
codegen.cpp
tolk.cpp
)
add_executable(tolk tolk-main.cpp ${TOLK_SOURCE})
target_include_directories(tolk PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
target_link_libraries(tolk PUBLIC git ton_crypto) # todo replace with ton_crypto_core in the future
if (WINGETOPT_FOUND)
target_link_libraries_system(tolk wingetopt)
endif ()
if (USE_EMSCRIPTEN)
add_executable(tolkfiftlib tolk-wasm.cpp ${TOLK_SOURCE})
target_include_directories(tolkfiftlib PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
target_link_libraries(tolkfiftlib PUBLIC fift-lib git)
target_link_options(tolkfiftlib PRIVATE
-sEXPORTED_RUNTIME_METHODS=FS,ccall,cwrap,UTF8ToString,stringToUTF8,lengthBytesUTF8,addFunction,removeFunction,setValue
-sEXPORTED_FUNCTIONS=_tolk_compile,_version,_malloc,_free,_setThrew
-sEXPORT_NAME=CompilerModule
-sERROR_ON_UNDEFINED_SYMBOLS=0
-sFILESYSTEM=1 -lnodefs.js
-Oz
-sIGNORE_MISSING_MAIN=1
-sAUTO_NATIVE_LIBRARIES=0
-sMODULARIZE=1
-sTOTAL_MEMORY=33554432
-sALLOW_MEMORY_GROWTH=1
-sALLOW_TABLE_GROWTH=1
--embed-file ${CMAKE_CURRENT_SOURCE_DIR}/../crypto/fift/lib@/fiftlib
-fexceptions
)
target_compile_options(tolkfiftlib PRIVATE -fexceptions -fno-stack-protector)
endif ()
install(TARGETS tolk RUNTIME DESTINATION bin)

526
tolk/abscode.cpp Normal file
View file

@ -0,0 +1,526 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
namespace tolk {
/*
*
* ABSTRACT CODE
*
*/
TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, const SrcLocation* loc)
: v_type(_type), idx(_idx), cls(_cls), coord(0) {
if (sym) {
name = sym->sym_idx;
sym->value->idx = _idx;
}
if (loc) {
where = std::make_unique<SrcLocation>(*loc);
}
if (!_type) {
v_type = TypeExpr::new_hole();
}
if (cls == _Named) {
undefined = true;
}
}
void TmpVar::set_location(const SrcLocation& loc) {
if (where) {
*where = loc;
} else {
where = std::make_unique<SrcLocation>(loc);
}
}
void TmpVar::dump(std::ostream& os) const {
show(os);
os << " : " << v_type << " (width ";
v_type->show_width(os);
os << ")";
if (coord > 0) {
os << " = _" << (coord >> 8) << '.' << (coord & 255);
} else if (coord < 0) {
int n = (~coord >> 8), k = (~coord & 0xff);
if (k) {
os << " = (_" << n << ".._" << (n + k - 1) << ")";
} else {
os << " = ()";
}
}
os << std::endl;
}
void TmpVar::show(std::ostream& os, int omit_idx) const {
if (cls & _Named) {
os << symbols.get_name(name);
if (omit_idx && (omit_idx >= 2 || (cls & _UniqueName))) {
return;
}
}
os << '_' << idx;
}
std::ostream& operator<<(std::ostream& os, const TmpVar& var) {
var.show(os);
return os;
}
void VarDescr::show_value(std::ostream& os) const {
if (val & _Int) {
os << 'i';
}
if (val & _Const) {
os << 'c';
}
if (val & _Zero) {
os << '0';
}
if (val & _NonZero) {
os << '!';
}
if (val & _Pos) {
os << '>';
}
if (val & _Neg) {
os << '<';
}
if (val & _Bool) {
os << 'B';
}
if (val & _Bit) {
os << 'b';
}
if (val & _Even) {
os << 'E';
}
if (val & _Odd) {
os << 'O';
}
if (val & _Finite) {
os << 'f';
}
if (val & _Nan) {
os << 'N';
}
if (int_const.not_null()) {
os << '=' << int_const;
}
}
void VarDescr::show(std::ostream& os, const char* name) const {
if (flags & _Last) {
os << '*';
}
if (flags & _Unused) {
os << '?';
}
if (name) {
os << name;
}
os << '_' << idx;
show_value(os);
}
void VarDescr::set_const(long long value) {
return set_const(td::make_refint(value));
}
void VarDescr::set_const(td::RefInt256 value) {
int_const = std::move(value);
if (!int_const->signed_fits_bits(257)) {
int_const.write().invalidate();
}
val = _Const | _Int;
int s = sgn(int_const);
if (s < -1) {
val |= _Nan | _NonZero;
} else if (s < 0) {
val |= _NonZero | _Neg | _Finite;
if (*int_const == -1) {
val |= _Bool;
}
} else if (s > 0) {
val |= _NonZero | _Pos | _Finite;
} else if (!s) {
//if (*int_const == 1) {
// val |= _Bit;
//}
val |= _Zero | _Neg | _Pos | _Finite | _Bool | _Bit;
}
if (val & _Finite) {
val |= int_const->get_bit(0) ? _Odd : _Even;
}
}
void VarDescr::set_const(std::string value) {
str_const = value;
val = _Const;
}
void VarDescr::set_const_nan() {
set_const(td::make_refint());
}
void VarDescr::operator|=(const VarDescr& y) {
val &= y.val;
if (is_int_const() && y.is_int_const() && cmp(int_const, y.int_const) != 0) {
val &= ~_Const;
}
if (!(val & _Const)) {
int_const.clear();
}
}
void VarDescr::operator&=(const VarDescr& y) {
val |= y.val;
if (y.int_const.not_null() && int_const.is_null()) {
int_const = y.int_const;
}
}
void VarDescr::set_value(const VarDescr& y) {
val = y.val;
int_const = y.int_const;
}
void VarDescr::set_value(VarDescr&& y) {
val = y.val;
int_const = std::move(y.int_const);
}
void VarDescr::clear_value() {
val = 0;
int_const.clear();
}
void VarDescrList::show(std::ostream& os) const {
if (unreachable) {
os << "<unreachable> ";
}
os << "[";
for (const auto& v : list) {
os << ' ' << v;
}
os << " ]\n";
}
void Op::flags_set_clear(int set, int clear) {
flags = (flags | set) & ~clear;
for (auto& op : block0) {
op.flags_set_clear(set, clear);
}
for (auto& op : block1) {
op.flags_set_clear(set, clear);
}
}
void Op::split_vars(const std::vector<TmpVar>& vars) {
split_var_list(left, vars);
split_var_list(right, vars);
for (auto& op : block0) {
op.split_vars(vars);
}
for (auto& op : block1) {
op.split_vars(vars);
}
}
void Op::split_var_list(std::vector<var_idx_t>& var_list, const std::vector<TmpVar>& vars) {
int new_size = 0, changes = 0;
for (var_idx_t v : var_list) {
int c = vars.at(v).coord;
if (c < 0) {
++changes;
new_size += (~c & 0xff);
} else {
++new_size;
}
}
if (!changes) {
return;
}
std::vector<var_idx_t> new_var_list;
new_var_list.reserve(new_size);
for (var_idx_t v : var_list) {
int c = vars.at(v).coord;
if (c < 0) {
int n = (~c >> 8), k = (~c & 0xff);
while (k-- > 0) {
new_var_list.push_back(n++);
}
} else {
new_var_list.push_back(v);
}
}
var_list = std::move(new_var_list);
}
void Op::show(std::ostream& os, const std::vector<TmpVar>& vars, std::string pfx, int mode) const {
if (mode & 2) {
os << pfx << " [";
for (const auto& v : var_info.list) {
os << ' ';
if (v.flags & VarDescr::_Last) {
os << '*';
}
if (v.flags & VarDescr::_Unused) {
os << '?';
}
os << vars[v.idx];
if (mode & 4) {
os << ':';
v.show_value(os);
}
}
os << " ]\n";
}
std::string dis = disabled() ? "<disabled> " : "";
if (noreturn()) {
dis += "<noret> ";
}
if (!is_pure()) {
dis += "<impure> ";
}
switch (cl) {
case _Undef:
os << pfx << dis << "???\n";
break;
case _Nop:
os << pfx << dis << "NOP\n";
break;
case _Call:
os << pfx << dis << "CALL: ";
show_var_list(os, left, vars);
os << " := " << (fun_ref ? fun_ref->name() : "(null)") << " ";
if ((mode & 4) && args.size() == right.size()) {
show_var_list(os, args, vars);
} else {
show_var_list(os, right, vars);
}
os << std::endl;
break;
case _CallInd:
os << pfx << dis << "CALLIND: ";
show_var_list(os, left, vars);
os << " := EXEC ";
show_var_list(os, right, vars);
os << std::endl;
break;
case _Let:
os << pfx << dis << "LET ";
show_var_list(os, left, vars);
os << " := ";
show_var_list(os, right, vars);
os << std::endl;
break;
case _Tuple:
os << pfx << dis << "MKTUPLE ";
show_var_list(os, left, vars);
os << " := ";
show_var_list(os, right, vars);
os << std::endl;
break;
case _UnTuple:
os << pfx << dis << "UNTUPLE ";
show_var_list(os, left, vars);
os << " := ";
show_var_list(os, right, vars);
os << std::endl;
break;
case _IntConst:
os << pfx << dis << "CONST ";
show_var_list(os, left, vars);
os << " := " << int_const << std::endl;
break;
case _SliceConst:
os << pfx << dis << "SCONST ";
show_var_list(os, left, vars);
os << " := " << str_const << std::endl;
break;
case _Import:
os << pfx << dis << "IMPORT ";
show_var_list(os, left, vars);
os << std::endl;
break;
case _Return:
os << pfx << dis << "RETURN ";
show_var_list(os, left, vars);
os << std::endl;
break;
case _GlobVar:
os << pfx << dis << "GLOBVAR ";
show_var_list(os, left, vars);
os << " := " << (fun_ref ? fun_ref->name() : "(null)") << std::endl;
break;
case _SetGlob:
os << pfx << dis << "SETGLOB ";
os << (fun_ref ? fun_ref->name() : "(null)") << " := ";
show_var_list(os, right, vars);
os << std::endl;
break;
case _Repeat:
os << pfx << dis << "REPEAT ";
show_var_list(os, left, vars);
os << ' ';
show_block(os, block0.get(), vars, pfx, mode);
os << std::endl;
break;
case _If:
os << pfx << dis << "IF ";
show_var_list(os, left, vars);
os << ' ';
show_block(os, block0.get(), vars, pfx, mode);
os << " ELSE ";
show_block(os, block1.get(), vars, pfx, mode);
os << std::endl;
break;
case _While:
os << pfx << dis << "WHILE ";
show_var_list(os, left, vars);
os << ' ';
show_block(os, block0.get(), vars, pfx, mode);
os << " DO ";
show_block(os, block1.get(), vars, pfx, mode);
os << std::endl;
break;
case _Until:
os << pfx << dis << "UNTIL ";
show_var_list(os, left, vars);
os << ' ';
show_block(os, block0.get(), vars, pfx, mode);
os << std::endl;
break;
case _Again:
os << pfx << dis << "AGAIN ";
show_var_list(os, left, vars);
os << ' ';
show_block(os, block0.get(), vars, pfx, mode);
os << std::endl;
break;
default:
os << pfx << dis << "<???" << cl << "> ";
show_var_list(os, left, vars);
os << " -- ";
show_var_list(os, right, vars);
os << std::endl;
break;
}
}
void Op::show_var_list(std::ostream& os, const std::vector<var_idx_t>& idx_list,
const std::vector<TmpVar>& vars) const {
if (!idx_list.size()) {
os << "()";
} else if (idx_list.size() == 1) {
os << vars.at(idx_list[0]);
} else {
os << "(" << vars.at(idx_list[0]);
for (std::size_t i = 1; i < idx_list.size(); i++) {
os << "," << vars.at(idx_list[i]);
}
os << ")";
}
}
void Op::show_var_list(std::ostream& os, const std::vector<VarDescr>& list, const std::vector<TmpVar>& vars) const {
auto n = list.size();
if (!n) {
os << "()";
} else {
os << "( ";
for (std::size_t i = 0; i < list.size(); i++) {
if (i) {
os << ", ";
}
if (list[i].is_unused()) {
os << '?';
}
os << vars.at(list[i].idx) << ':';
list[i].show_value(os);
}
os << " )";
}
}
void Op::show_block(std::ostream& os, const Op* block, const std::vector<TmpVar>& vars, std::string pfx, int mode) {
os << "{" << std::endl;
std::string pfx2 = pfx + " ";
for (const Op& op : block) {
op.show(os, vars, pfx2, mode);
}
os << pfx << "}";
}
void CodeBlob::flags_set_clear(int set, int clear) {
for (auto& op : ops) {
op.flags_set_clear(set, clear);
}
}
std::ostream& operator<<(std::ostream& os, const CodeBlob& code) {
code.print(os);
return os;
}
// flags: +1 = show variable definition locations; +2 = show vars after each op; +4 = show var abstract value info after each op; +8 = show all variables at start
void CodeBlob::print(std::ostream& os, int flags) const {
os << "CODE BLOB: " << var_cnt << " variables, " << in_var_cnt << " input\n";
if ((flags & 8) != 0) {
for (const auto& var : vars) {
var.dump(os);
if (var.where && (flags & 1) != 0) {
var.where->show(os);
os << " defined here:\n";
var.where->show_context(os);
}
}
}
os << "------- BEGIN --------\n";
for (const auto& op : ops) {
op.show(os, vars, "", flags);
}
os << "-------- END ---------\n\n";
}
var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, const SrcLocation* location) {
vars.emplace_back(var_cnt, cls, var_type, sym, location);
if (sym) {
sym->value->idx = var_cnt;
}
return var_cnt++;
}
bool CodeBlob::import_params(FormalArgList arg_list) {
if (var_cnt || in_var_cnt || op_cnt) {
return false;
}
std::vector<var_idx_t> list;
for (const auto& par : arg_list) {
TypeExpr* arg_type;
SymDef* arg_sym;
SrcLocation arg_loc;
std::tie(arg_type, arg_sym, arg_loc) = par;
list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, &arg_loc));
}
emplace_back(loc, Op::_Import, list);
in_var_cnt = var_cnt;
return true;
}
} // namespace tolk

916
tolk/analyzer.cpp Normal file
View file

@ -0,0 +1,916 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
namespace tolk {
/*
*
* ANALYZE AND PREPROCESS ABSTRACT CODE
*
*/
void CodeBlob::simplify_var_types() {
for (TmpVar& var : vars) {
TypeExpr::remove_indirect(var.v_type);
var.v_type->recompute_width();
}
}
int CodeBlob::split_vars(bool strict) {
int n = var_cnt, changes = 0;
for (int j = 0; j < var_cnt; j++) {
TmpVar& var = vars[j];
if (strict && var.v_type->minw != var.v_type->maxw) {
throw ParseError{var.where.get(), "variable does not have fixed width, cannot manipulate it"};
}
std::vector<TypeExpr*> comp_types;
int k = var.v_type->extract_components(comp_types);
tolk_assert(k <= 254 && n <= 0x7fff00);
tolk_assert((unsigned)k == comp_types.size());
if (k != 1) {
var.coord = ~((n << 8) + k);
for (int i = 0; i < k; i++) {
auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where.get());
tolk_assert(v == n + i);
tolk_assert(vars[v].idx == v);
vars[v].name = vars[j].name;
vars[v].coord = ((int)j << 8) + i + 1;
}
n += k;
++changes;
} else if (strict && var.v_type->minw != 1) {
throw ParseError{var.where.get(),
"cannot work with variable or variable component of width greater than one"};
}
}
if (!changes) {
return 0;
}
for (auto& op : ops) {
op.split_vars(vars);
}
return changes;
}
bool CodeBlob::compute_used_code_vars() {
VarDescrList empty_var_info;
return compute_used_code_vars(ops, empty_var_info, true);
}
bool CodeBlob::compute_used_code_vars(std::unique_ptr<Op>& ops_ptr, const VarDescrList& var_info, bool edit) const {
tolk_assert(ops_ptr);
if (!ops_ptr->next) {
tolk_assert(ops_ptr->cl == Op::_Nop);
return ops_ptr->set_var_info(var_info);
}
// here and below, bitwise | (not logical ||) are used to execute both left and right parts
return static_cast<int>(compute_used_code_vars(ops_ptr->next, var_info, edit)) |
static_cast<int>(ops_ptr->compute_used_vars(*this, edit));
}
bool operator==(const VarDescrList& x, const VarDescrList& y) {
if (x.size() != y.size()) {
return false;
}
for (std::size_t i = 0; i < x.size(); i++) {
if (x.list[i].idx != y.list[i].idx || x.list[i].flags != y.list[i].flags) {
return false;
}
}
return true;
}
bool same_values(const VarDescr& x, const VarDescr& y) {
if (x.val != y.val || x.int_const.is_null() != y.int_const.is_null()) {
return false;
}
if (x.int_const.not_null() && cmp(x.int_const, y.int_const) != 0) {
return false;
}
return true;
}
bool same_values(const VarDescrList& x, const VarDescrList& y) {
if (x.size() != y.size()) {
return false;
}
for (std::size_t i = 0; i < x.size(); i++) {
if (x.list[i].idx != y.list[i].idx || !same_values(x.list[i], y.list[i])) {
return false;
}
}
return true;
}
bool Op::set_var_info(const VarDescrList& new_var_info) {
if (var_info == new_var_info) {
return false;
}
var_info = new_var_info;
return true;
}
bool Op::set_var_info(VarDescrList&& new_var_info) {
if (var_info == new_var_info) {
return false;
}
var_info = std::move(new_var_info);
return true;
}
bool Op::set_var_info_except(const VarDescrList& new_var_info, const std::vector<var_idx_t>& var_list) {
if (!var_list.size()) {
return set_var_info(new_var_info);
}
VarDescrList tmp_info{new_var_info};
tmp_info -= var_list;
return set_var_info(tmp_info);
}
bool Op::set_var_info_except(VarDescrList&& new_var_info, const std::vector<var_idx_t>& var_list) {
if (var_list.size()) {
new_var_info -= var_list;
}
return set_var_info(std::move(new_var_info));
}
std::vector<var_idx_t> sort_unique_vars(const std::vector<var_idx_t>& var_list) {
std::vector<var_idx_t> vars{var_list}, unique_vars;
std::sort(vars.begin(), vars.end());
vars.erase(std::unique(vars.begin(), vars.end()), vars.end());
return vars;
}
VarDescr* VarDescrList::operator[](var_idx_t idx) {
auto it = std::lower_bound(list.begin(), list.end(), idx);
return it != list.end() && it->idx == idx ? &*it : nullptr;
}
const VarDescr* VarDescrList::operator[](var_idx_t idx) const {
auto it = std::lower_bound(list.begin(), list.end(), idx);
return it != list.end() && it->idx == idx ? &*it : nullptr;
}
std::size_t VarDescrList::count(const std::vector<var_idx_t> idx_list) const {
std::size_t res = 0;
for (var_idx_t idx : idx_list) {
if (operator[](idx)) {
++res;
}
}
return res;
}
std::size_t VarDescrList::count_used(const std::vector<var_idx_t> idx_list) const {
std::size_t res = 0;
for (var_idx_t idx : idx_list) {
auto v = operator[](idx);
if (v && !v->is_unused()) {
++res;
}
}
return res;
}
VarDescrList& VarDescrList::operator-=(var_idx_t idx) {
auto it = std::lower_bound(list.begin(), list.end(), idx);
if (it != list.end() && it->idx == idx) {
list.erase(it);
}
return *this;
}
VarDescrList& VarDescrList::operator-=(const std::vector<var_idx_t>& idx_list) {
for (var_idx_t idx : idx_list) {
*this -= idx;
}
return *this;
}
VarDescrList& VarDescrList::add_var(var_idx_t idx, bool unused) {
auto it = std::lower_bound(list.begin(), list.end(), idx);
if (it == list.end() || it->idx != idx) {
list.emplace(it, idx, VarDescr::_Last | (unused ? VarDescr::_Unused : 0));
} else if (it->is_unused() && !unused) {
it->clear_unused();
}
return *this;
}
VarDescrList& VarDescrList::add_vars(const std::vector<var_idx_t>& idx_list, bool unused) {
for (var_idx_t idx : idx_list) {
add_var(idx, unused);
}
return *this;
}
VarDescr& VarDescrList::add(var_idx_t idx) {
auto it = std::lower_bound(list.begin(), list.end(), idx);
if (it == list.end() || it->idx != idx) {
it = list.emplace(it, idx);
}
return *it;
}
VarDescr& VarDescrList::add_newval(var_idx_t idx) {
auto it = std::lower_bound(list.begin(), list.end(), idx);
if (it == list.end() || it->idx != idx) {
return *list.emplace(it, idx);
} else {
it->clear_value();
return *it;
}
}
VarDescrList& VarDescrList::clear_last() {
for (auto& var : list) {
if (var.flags & VarDescr::_Last) {
var.flags &= ~VarDescr::_Last;
}
}
return *this;
}
VarDescrList VarDescrList::operator+(const VarDescrList& y) const {
VarDescrList res;
auto it1 = list.cbegin();
auto it2 = y.list.cbegin();
while (it1 != list.cend() && it2 != y.list.cend()) {
if (it1->idx < it2->idx) {
res.list.push_back(*it1++);
} else if (it1->idx > it2->idx) {
res.list.push_back(*it2++);
} else {
res.list.push_back(*it1++);
res.list.back() += *it2++;
}
}
while (it1 != list.cend()) {
res.list.push_back(*it1++);
}
while (it2 != y.list.cend()) {
res.list.push_back(*it2++);
}
return res;
}
VarDescrList& VarDescrList::operator+=(const VarDescrList& y) {
return *this = *this + y;
}
VarDescrList VarDescrList::operator|(const VarDescrList& y) const {
if (y.unreachable) {
return *this;
}
if (unreachable) {
return y;
}
VarDescrList res;
auto it1 = list.cbegin();
auto it2 = y.list.cbegin();
while (it1 != list.cend() && it2 != y.list.cend()) {
if (it1->idx < it2->idx) {
it1++;
} else if (it1->idx > it2->idx) {
it2++;
} else {
res.list.push_back(*it1++);
res.list.back() |= *it2++;
}
}
return res;
}
VarDescrList& VarDescrList::operator|=(const VarDescrList& y) {
if (y.unreachable) {
return *this;
} else {
return *this = *this | y;
}
}
VarDescrList& VarDescrList::operator&=(const VarDescrList& values) {
for (const VarDescr& vd : values.list) {
VarDescr* item = operator[](vd.idx);
if (item) {
*item &= vd;
}
}
unreachable |= values.unreachable;
return *this;
}
VarDescrList& VarDescrList::import_values(const VarDescrList& values) {
if (values.unreachable) {
set_unreachable();
} else
for (auto& vd : list) {
auto new_vd = values[vd.idx];
if (new_vd) {
vd.set_value(*new_vd);
} else {
vd.clear_value();
}
}
return *this;
}
bool Op::std_compute_used_vars(bool disabled) {
// left = OP right
// var_info := (var_info - left) + right
VarDescrList new_var_info{next->var_info};
new_var_info -= left;
new_var_info.clear_last();
if (args.size() == right.size() && !disabled) {
for (const VarDescr& arg : args) {
new_var_info.add_var(arg.idx, arg.is_unused());
}
} else {
new_var_info.add_vars(right, disabled);
}
return set_var_info(std::move(new_var_info));
}
bool Op::compute_used_vars(const CodeBlob& code, bool edit) {
tolk_assert(next);
const VarDescrList& next_var_info = next->var_info;
if (cl == _Nop) {
return set_var_info_except(next_var_info, left);
}
switch (cl) {
case _IntConst:
case _SliceConst:
case _GlobVar:
case _Call:
case _CallInd:
case _Tuple:
case _UnTuple: {
// left = EXEC right;
if (!next_var_info.count_used(left) && is_pure()) {
// all variables in `left` are not needed
if (edit) {
disable();
}
return std_compute_used_vars(true);
}
return std_compute_used_vars();
}
case _SetGlob: {
// GLOB = right
if (right.empty() && edit) {
disable();
}
return std_compute_used_vars(right.empty());
}
case _Let: {
// left = right
std::size_t cnt = next_var_info.count_used(left);
tolk_assert(left.size() == right.size());
auto l_it = left.cbegin(), r_it = right.cbegin();
VarDescrList new_var_info{next_var_info};
new_var_info -= left;
new_var_info.clear_last();
std::vector<var_idx_t> new_left, new_right;
for (; l_it < left.cend(); ++l_it, ++r_it) {
if (std::find(l_it + 1, left.cend(), *l_it) == left.cend()) {
auto p = next_var_info[*l_it];
new_var_info.add_var(*r_it, edit && (!p || p->is_unused()));
new_left.push_back(*l_it);
new_right.push_back(*r_it);
}
}
if (new_left.size() < left.size()) {
left = std::move(new_left);
right = std::move(new_right);
}
if (!cnt && edit) {
// all variables in `left` are not needed
disable();
}
return set_var_info(std::move(new_var_info));
}
case _Return: {
// return left
if (var_info.count(left) == left.size()) {
return false;
}
std::vector<var_idx_t> unique_vars = sort_unique_vars(left);
var_info.list.clear();
for (var_idx_t i : unique_vars) {
var_info.list.emplace_back(i, VarDescr::_Last);
}
return true;
}
case _Import: {
// import left
std::vector<var_idx_t> unique_vars = sort_unique_vars(left);
var_info.list.clear();
for (var_idx_t i : unique_vars) {
var_info.list.emplace_back(i, next_var_info[i] ? 0 : VarDescr::_Last);
}
return true;
}
case _If: {
// if (left) then block0 else block1
// VarDescrList nx_var_info = next_var_info;
// nx_var_info.clear_last();
code.compute_used_code_vars(block0, next_var_info, edit);
VarDescrList merge_info;
if (block1) {
code.compute_used_code_vars(block1, next_var_info, edit);
merge_info = block0->var_info + block1->var_info;
} else {
merge_info = block0->var_info + next_var_info;
}
merge_info.clear_last();
merge_info += left;
return set_var_info(std::move(merge_info));
}
case _While: {
// while (block0 || left) block1;
// ... block0 left { block1 block0 left } next
VarDescrList new_var_info{next_var_info};
bool changes = false;
do {
VarDescrList after_cond{new_var_info};
after_cond += left;
code.compute_used_code_vars(block0, after_cond, changes);
code.compute_used_code_vars(block1, block0->var_info, changes);
std::size_t n = new_var_info.size();
new_var_info += block1->var_info;
new_var_info.clear_last();
if (changes) {
break;
}
changes = (new_var_info.size() == n);
} while (changes <= edit);
new_var_info += left;
code.compute_used_code_vars(block0, new_var_info, edit);
return set_var_info(block0->var_info);
}
case _Until: {
// until (block0 || left);
// .. { block0 left } block0 left next
VarDescrList after_cond_first{next_var_info};
after_cond_first += left;
code.compute_used_code_vars(block0, after_cond_first, false);
VarDescrList new_var_info{block0->var_info};
bool changes = false;
do {
VarDescrList after_cond{new_var_info};
after_cond += next_var_info;
after_cond += left;
code.compute_used_code_vars(block0, after_cond, changes);
std::size_t n = new_var_info.size();
new_var_info += block0->var_info;
new_var_info.clear_last();
if (changes) {
break;
}
changes = (new_var_info.size() == n);
} while (changes <= edit);
return set_var_info(std::move(new_var_info) + next_var_info);
}
case _Repeat: {
// repeat (left) block0
// left { block0 } next
VarDescrList new_var_info{next_var_info};
bool changes = false;
do {
code.compute_used_code_vars(block0, new_var_info, changes);
std::size_t n = new_var_info.size();
new_var_info += block0->var_info;
new_var_info.clear_last();
if (changes) {
break;
}
changes = (new_var_info.size() == n);
} while (changes <= edit);
tolk_assert(left.size() == 1);
bool last = new_var_info.count_used(left) == 0;
new_var_info += left;
if (last) {
new_var_info[left[0]]->flags |= VarDescr::_Last;
}
return set_var_info(std::move(new_var_info));
}
case _Again: {
// for(;;) block0
// { block0 }
VarDescrList new_var_info;
bool changes = false;
do {
code.compute_used_code_vars(block0, new_var_info, changes);
std::size_t n = new_var_info.size();
new_var_info += block0->var_info;
new_var_info.clear_last();
if (changes) {
break;
}
changes = (new_var_info.size() == n);
} while (changes <= edit);
return set_var_info(std::move(new_var_info));
}
case _TryCatch: {
code.compute_used_code_vars(block0, next_var_info, edit);
code.compute_used_code_vars(block1, next_var_info, edit);
VarDescrList merge_info = block0->var_info + block1->var_info + next_var_info;
merge_info -= left;
merge_info.clear_last();
return set_var_info(std::move(merge_info));
}
default:
std::cerr << "fatal: unknown operation <??" << cl << "> in compute_used_vars()\n";
throw ParseError{where, "unknown operation"};
}
}
bool prune_unreachable(std::unique_ptr<Op>& ops) {
if (!ops) {
return true;
}
Op& op = *ops;
if (op.cl == Op::_Nop) {
if (op.next) {
ops = std::move(op.next);
return prune_unreachable(ops);
}
return true;
}
bool reach;
switch (op.cl) {
case Op::_IntConst:
case Op::_SliceConst:
case Op::_GlobVar:
case Op::_SetGlob:
case Op::_Call:
case Op::_CallInd:
case Op::_Tuple:
case Op::_UnTuple:
case Op::_Import:
reach = true;
break;
case Op::_Let: {
reach = true;
break;
}
case Op::_Return:
reach = false;
break;
case Op::_If: {
// if left then block0 else block1; ...
VarDescr* c_var = op.var_info[op.left[0]];
if (c_var && c_var->always_true()) {
op.block0->last().next = std::move(op.next);
ops = std::move(op.block0);
return prune_unreachable(ops);
} else if (c_var && c_var->always_false()) {
op.block1->last().next = std::move(op.next);
ops = std::move(op.block1);
return prune_unreachable(ops);
} else {
reach = static_cast<int>(prune_unreachable(op.block0)) | static_cast<int>(prune_unreachable(op.block1));
}
break;
}
case Op::_While: {
// while (block0 || left) block1;
if (!prune_unreachable(op.block0)) {
// computation of block0 never returns
ops = std::move(op.block0);
return prune_unreachable(ops);
}
VarDescr* c_var = op.block0->last().var_info[op.left[0]];
if (c_var && c_var->always_false()) {
// block1 never executed
op.block0->last().next = std::move(op.next);
ops = std::move(op.block0);
return prune_unreachable(ops);
} else if (c_var && c_var->always_true()) {
if (!prune_unreachable(op.block1)) {
// block1 never returns
op.block0->last().next = std::move(op.block1);
ops = std::move(op.block0);
return false;
}
// infinite loop
op.cl = Op::_Again;
op.block0->last().next = std::move(op.block1);
op.left.clear();
reach = false;
} else {
if (!prune_unreachable(op.block1)) {
// block1 never returns, while equivalent to block0 ; if left then block1 else next
op.cl = Op::_If;
std::unique_ptr<Op> new_op = std::move(op.block0);
op.block0 = std::move(op.block1);
op.block1 = std::make_unique<Op>(op.next->where, Op::_Nop);
new_op->last().next = std::move(ops);
ops = std::move(new_op);
}
reach = true; // block1 may be never executed
}
break;
}
case Op::_Repeat: {
// repeat (left) block0
VarDescr* c_var = op.var_info[op.left[0]];
if (c_var && c_var->always_nonpos()) {
// loop never executed
ops = std::move(op.next);
return prune_unreachable(ops);
}
if (c_var && c_var->always_pos()) {
if (!prune_unreachable(op.block0)) {
// block0 executed at least once, and it never returns
// replace code with block0
ops = std::move(op.block0);
return false;
}
} else {
prune_unreachable(op.block0);
}
reach = true;
break;
}
case Op::_Until:
case Op::_Again: {
// do block0 until left; ...
if (!prune_unreachable(op.block0)) {
// block0 never returns, replace loop by block0
ops = std::move(op.block0);
return false;
}
reach = (op.cl != Op::_Again);
break;
}
case Op::_TryCatch: {
reach = static_cast<int>(prune_unreachable(op.block0)) | static_cast<int>(prune_unreachable(op.block1));
break;
}
default:
std::cerr << "fatal: unknown operation <??" << op.cl << ">\n";
throw ParseError{op.where, "unknown operation in prune_unreachable()"};
}
if (reach) {
return prune_unreachable(op.next);
} else {
while (op.next->next) {
op.next = std::move(op.next->next);
}
return false;
}
}
void CodeBlob::prune_unreachable_code() {
if (prune_unreachable(ops)) {
throw ParseError{loc, "control reaches end of function"};
}
}
void CodeBlob::fwd_analyze() {
VarDescrList values;
tolk_assert(ops && ops->cl == Op::_Import);
for (var_idx_t i : ops->left) {
values += i;
if (vars[i].v_type->is_int()) {
values[i]->val |= VarDescr::_Int;
}
}
ops->fwd_analyze(values);
}
void Op::prepare_args(VarDescrList values) {
if (args.size() != right.size()) {
args.clear();
for (var_idx_t i : right) {
args.emplace_back(i);
}
}
for (std::size_t i = 0; i < right.size(); i++) {
const VarDescr* val = values[right[i]];
if (val) {
args[i].set_value(*val);
// args[i].clear_unused();
} else {
args[i].clear_value();
}
args[i].clear_unused();
}
}
VarDescrList Op::fwd_analyze(VarDescrList values) {
var_info.import_values(values);
switch (cl) {
case _Nop:
case _Import:
break;
case _Return:
values.set_unreachable();
break;
case _IntConst: {
values.add_newval(left[0]).set_const(int_const);
break;
}
case _SliceConst: {
values.add_newval(left[0]).set_const(str_const);
break;
}
case _Call: {
prepare_args(values);
auto func = dynamic_cast<const SymValAsmFunc*>(fun_ref->value);
if (func) {
std::vector<VarDescr> res;
res.reserve(left.size());
for (var_idx_t i : left) {
res.emplace_back(i);
}
AsmOpList tmp;
func->compile(tmp, res, args, where); // abstract interpretation of res := f (args)
int j = 0;
for (var_idx_t i : left) {
values.add_newval(i).set_value(res[j++]);
}
} else {
for (var_idx_t i : left) {
values.add_newval(i);
}
}
break;
}
case _Tuple:
case _UnTuple:
case _GlobVar:
case _CallInd: {
for (var_idx_t i : left) {
values.add_newval(i);
}
break;
}
case _SetGlob:
break;
case _Let: {
std::vector<VarDescr> old_val;
tolk_assert(left.size() == right.size());
for (std::size_t i = 0; i < right.size(); i++) {
const VarDescr* ov = values[right[i]];
if (!ov && verbosity >= 5) {
std::cerr << "FATAL: error in assignment at right component #" << i << " (no value for _" << right[i] << ")"
<< std::endl;
for (auto x : left) {
std::cerr << '_' << x << " ";
}
std::cerr << "= ";
for (auto x : right) {
std::cerr << '_' << x << " ";
}
std::cerr << std::endl;
}
// tolk_assert(ov);
if (ov) {
old_val.push_back(*ov);
} else {
old_val.emplace_back();
}
}
for (std::size_t i = 0; i < left.size(); i++) {
values.add_newval(left[i]).set_value(std::move(old_val[i]));
}
break;
}
case _If: {
VarDescrList val1 = block0->fwd_analyze(values);
VarDescrList val2 = block1 ? block1->fwd_analyze(std::move(values)) : std::move(values);
values = val1 | val2;
break;
}
case _Repeat: {
bool atl1 = (values[left[0]] && values[left[0]]->always_pos());
VarDescrList next_values = block0->fwd_analyze(values);
while (true) {
VarDescrList new_values = values | next_values;
if (same_values(new_values, values)) {
break;
}
values = std::move(new_values);
next_values = block0->fwd_analyze(values);
}
if (atl1) {
values = std::move(next_values);
}
break;
}
case _While: {
auto values0 = values;
values = block0->fwd_analyze(values);
if (values[left[0]] && values[left[0]]->always_false()) {
// block1 never executed
block1->fwd_analyze(values);
break;
}
while (true) {
VarDescrList next_values = values | block0->fwd_analyze(values0 | block1->fwd_analyze(values));
if (same_values(next_values, values)) {
break;
}
values = std::move(next_values);
}
break;
}
case _Until:
case _Again: {
while (true) {
VarDescrList next_values = values | block0->fwd_analyze(values);
if (same_values(next_values, values)) {
break;
}
values = std::move(next_values);
}
values = block0->fwd_analyze(values);
break;
}
case _TryCatch: {
VarDescrList val1 = block0->fwd_analyze(values);
VarDescrList val2 = block1->fwd_analyze(std::move(values));
values = val1 | val2;
break;
}
default:
std::cerr << "fatal: unknown operation <??" << cl << ">\n";
throw ParseError{where, "unknown operation in fwd_analyze()"};
}
if (next) {
return next->fwd_analyze(std::move(values));
} else {
return values;
}
}
bool Op::set_noreturn(bool nr) {
if (nr) {
flags |= _NoReturn;
} else {
flags &= ~_NoReturn;
}
return nr;
}
bool Op::mark_noreturn() {
switch (cl) {
case _Nop:
if (!next) {
return set_noreturn(false);
}
// fallthrough
case _Import:
case _IntConst:
case _SliceConst:
case _Let:
case _Tuple:
case _UnTuple:
case _SetGlob:
case _GlobVar:
case _CallInd:
case _Call:
return set_noreturn(next->mark_noreturn());
case _Return:
return set_noreturn(true);
case _If:
case _TryCatch:
return set_noreturn((static_cast<int>(block0->mark_noreturn()) & static_cast<int>(block1 && block1->mark_noreturn())) | static_cast<int>(next->mark_noreturn()));
case _Again:
block0->mark_noreturn();
return set_noreturn(true);
case _Until:
return set_noreturn(static_cast<int>(block0->mark_noreturn()) | static_cast<int>(next->mark_noreturn()));
case _While:
block1->mark_noreturn();
return set_noreturn(static_cast<int>(block0->mark_noreturn()) | static_cast<int>(next->mark_noreturn()));
case _Repeat:
block0->mark_noreturn();
return set_noreturn(next->mark_noreturn());
default:
std::cerr << "fatal: unknown operation <??" << cl << ">\n";
throw ParseError{where, "unknown operation in mark_noreturn()"};
}
}
void CodeBlob::mark_noreturn() {
ops->mark_noreturn();
}
} // namespace tolk

372
tolk/asmops.cpp Normal file
View file

@ -0,0 +1,372 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
#include <iostream>
namespace tolk {
/*
*
* ASM-OP LIST FUNCTIONS
*
*/
int is_pos_pow2(td::RefInt256 x) {
if (sgn(x) > 0 && !sgn(x & (x - 1))) {
return x->bit_size(false) - 1;
} else {
return -1;
}
}
int is_neg_pow2(td::RefInt256 x) {
return sgn(x) < 0 ? is_pos_pow2(-x) : 0;
}
std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg) {
int i = stack_reg.idx;
if (i >= 0) {
if (i < 16) {
return os << 's' << i;
} else {
return os << i << " s()";
}
} else if (i >= -2) {
return os << "s(" << i << ')';
} else {
return os << i << " s()";
}
}
AsmOp AsmOp::Const(int arg, std::string push_op, td::RefInt256 origin) {
std::ostringstream os;
os << arg << ' ' << push_op;
return AsmOp::Const(os.str(), origin);
}
AsmOp AsmOp::make_stk2(int a, int b, const char* str, int delta) {
std::ostringstream os;
os << SReg(a) << ' ' << SReg(b) << ' ' << str;
int c = std::max(a, b) + 1;
return AsmOp::Custom(os.str(), c, c + delta);
}
AsmOp AsmOp::make_stk3(int a, int b, int c, const char* str, int delta) {
std::ostringstream os;
os << SReg(a) << ' ' << SReg(b) << ' ' << SReg(c) << ' ' << str;
int m = std::max(a, std::max(b, c)) + 1;
return AsmOp::Custom(os.str(), m, m + delta);
}
AsmOp AsmOp::BlkSwap(int a, int b) {
std::ostringstream os;
if (a == 1 && b == 1) {
return AsmOp::Xchg(0, 1);
} else if (a == 1) {
if (b == 2) {
os << "ROT";
} else {
os << b << " ROLL";
}
} else if (b == 1) {
if (a == 2) {
os << "-ROT";
} else {
os << a << " -ROLL";
}
} else {
os << a << " " << b << " BLKSWAP";
}
return AsmOp::Custom(os.str(), a + b, a + b);
}
AsmOp AsmOp::BlkPush(int a, int b) {
std::ostringstream os;
if (a == 1) {
return AsmOp::Push(b);
} else if (a == 2 && b == 1) {
os << "2DUP";
} else {
os << a << " " << b << " BLKPUSH";
}
return AsmOp::Custom(os.str(), b + 1, a + b + 1);
}
AsmOp AsmOp::BlkDrop(int a) {
std::ostringstream os;
if (a == 1) {
return AsmOp::Pop();
} else if (a == 2) {
os << "2DROP";
} else {
os << a << " BLKDROP";
}
return AsmOp::Custom(os.str(), a, 0);
}
AsmOp AsmOp::BlkDrop2(int a, int b) {
if (!b) {
return BlkDrop(a);
}
std::ostringstream os;
os << a << " " << b << " BLKDROP2";
return AsmOp::Custom(os.str(), a + b, b);
}
AsmOp AsmOp::BlkReverse(int a, int b) {
std::ostringstream os;
os << a << " " << b << " REVERSE";
return AsmOp::Custom(os.str(), a + b, a + b);
}
AsmOp AsmOp::Tuple(int a) {
switch (a) {
case 1:
return AsmOp::Custom("SINGLE", 1, 1);
case 2:
return AsmOp::Custom("PAIR", 2, 1);
case 3:
return AsmOp::Custom("TRIPLE", 3, 1);
}
std::ostringstream os;
os << a << " TUPLE";
return AsmOp::Custom(os.str(), a, 1);
}
AsmOp AsmOp::UnTuple(int a) {
switch (a) {
case 1:
return AsmOp::Custom("UNSINGLE", 1, 1);
case 2:
return AsmOp::Custom("UNPAIR", 1, 2);
case 3:
return AsmOp::Custom("UNTRIPLE", 1, 3);
}
std::ostringstream os;
os << a << " UNTUPLE";
return AsmOp::Custom(os.str(), 1, a);
}
AsmOp AsmOp::IntConst(td::RefInt256 x) {
if (x->signed_fits_bits(8)) {
return AsmOp::Const(dec_string(x) + " PUSHINT", x);
}
if (!x->is_valid()) {
return AsmOp::Const("PUSHNAN", x);
}
int k = is_pos_pow2(x);
if (k >= 0) {
return AsmOp::Const(k, "PUSHPOW2", x);
}
k = is_pos_pow2(x + 1);
if (k >= 0) {
return AsmOp::Const(k, "PUSHPOW2DEC", x);
}
k = is_pos_pow2(-x);
if (k >= 0) {
return AsmOp::Const(k, "PUSHNEGPOW2", x);
}
if (!x->mod_pow2_short(23)) {
return AsmOp::Const(dec_string(x) + " PUSHINTX", x);
}
return AsmOp::Const(dec_string(x) + " PUSHINT", x);
}
AsmOp AsmOp::BoolConst(bool f) {
return AsmOp::Const(f ? "TRUE" : "FALSE");
}
AsmOp AsmOp::Parse(std::string custom_op) {
if (custom_op == "NOP") {
return AsmOp::Nop();
} else if (custom_op == "SWAP") {
return AsmOp::Xchg(1);
} else if (custom_op == "DROP") {
return AsmOp::Pop(0);
} else if (custom_op == "NIP") {
return AsmOp::Pop(1);
} else if (custom_op == "DUP") {
return AsmOp::Push(0);
} else if (custom_op == "OVER") {
return AsmOp::Push(1);
} else {
return AsmOp::Custom(custom_op);
}
}
AsmOp AsmOp::Parse(std::string custom_op, int args, int retv) {
auto res = Parse(custom_op);
if (res.is_custom()) {
res.a = args;
res.b = retv;
}
return res;
}
void AsmOp::out(std::ostream& os) const {
if (!op.empty()) {
os << op;
return;
}
switch (t) {
case a_none:
break;
case a_xchg:
if (!a && !(b & -2)) {
os << (b ? "SWAP" : "NOP");
break;
}
os << SReg(a) << ' ' << SReg(b) << " XCHG";
break;
case a_push:
if (!(a & -2)) {
os << (a ? "OVER" : "DUP");
break;
}
os << SReg(a) << " PUSH";
break;
case a_pop:
if (!(a & -2)) {
os << (a ? "NIP" : "DROP");
break;
}
os << SReg(a) << " POP";
break;
default:
throw Fatal{"unknown assembler operation"};
}
}
void AsmOp::out_indent_nl(std::ostream& os, bool no_eol) const {
for (int i = 0; i < indent; i++) {
os << " ";
}
out(os);
if (!no_eol) {
os << std::endl;
}
}
std::string AsmOp::to_string() const {
if (!op.empty()) {
return op;
} else {
std::ostringstream os;
out(os);
return os.str();
}
}
bool AsmOpList::append(const std::vector<AsmOp>& ops) {
for (const auto& op : ops) {
if (!append(op)) {
return false;
}
}
return true;
}
const_idx_t AsmOpList::register_const(Const new_const) {
if (new_const.is_null()) {
return not_const;
}
unsigned idx;
for (idx = 0; idx < constants_.size(); idx++) {
if (!td::cmp(new_const, constants_[idx])) {
return idx;
}
}
constants_.push_back(std::move(new_const));
return (const_idx_t)idx;
}
Const AsmOpList::get_const(const_idx_t idx) {
if ((unsigned)idx < constants_.size()) {
return constants_[idx];
} else {
return {};
}
}
void AsmOpList::show_var(std::ostream& os, var_idx_t idx) const {
if (!var_names_ || (unsigned)idx >= var_names_->size()) {
os << '_' << idx;
} else {
var_names_->at(idx).show(os, 2);
}
}
void AsmOpList::show_var_ext(std::ostream& os, std::pair<var_idx_t, const_idx_t> idx_pair) const {
auto i = idx_pair.first;
auto j = idx_pair.second;
if (!var_names_ || (unsigned)i >= var_names_->size()) {
os << '_' << i;
} else {
var_names_->at(i).show(os, 2);
}
if ((unsigned)j < constants_.size() && constants_[j].not_null()) {
os << '=' << constants_[j];
}
}
void AsmOpList::out(std::ostream& os, int mode) const {
if (!(mode & 2)) {
for (const auto& op : list_) {
op.out_indent_nl(os);
}
} else {
std::size_t n = list_.size();
for (std::size_t i = 0; i < n; i++) {
const auto& op = list_[i];
if (!op.is_comment() && i + 1 < n && list_[i + 1].is_comment()) {
op.out_indent_nl(os, true);
os << '\t';
do {
i++;
} while (i + 1 < n && list_[i + 1].is_comment());
list_[i].out(os);
os << std::endl;
} else {
op.out_indent_nl(os, false);
}
}
}
}
bool apply_op(StackTransform& trans, const AsmOp& op) {
if (!trans.is_valid()) {
return false;
}
switch (op.t) {
case AsmOp::a_none:
return true;
case AsmOp::a_xchg:
return trans.apply_xchg(op.a, op.b, true);
case AsmOp::a_push:
return trans.apply_push(op.a);
case AsmOp::a_pop:
return trans.apply_pop(op.a);
case AsmOp::a_const:
return !op.a && op.b == 1 && trans.apply_push_newconst();
case AsmOp::a_custom:
return op.is_gconst() && trans.apply_push_newconst();
default:
return false;
}
}
} // namespace tolk

1231
tolk/builtins.cpp Normal file

File diff suppressed because it is too large Load diff

910
tolk/codegen.cpp Normal file
View file

@ -0,0 +1,910 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
namespace tolk {
/*
*
* GENERATE TVM STACK CODE
*
*/
StackLayout Stack::vars() const {
StackLayout res;
res.reserve(s.size());
for (auto x : s) {
res.push_back(x.first);
}
return res;
}
int Stack::find(var_idx_t var, int from) const {
for (int i = from; i < depth(); i++) {
if (at(i).first == var) {
return i;
}
}
return -1;
}
// finds var in [from .. to)
int Stack::find(var_idx_t var, int from, int to) const {
for (int i = from; i < depth() && i < to; i++) {
if (at(i).first == var) {
return i;
}
}
return -1;
}
// finds var outside [from .. to)
int Stack::find_outside(var_idx_t var, int from, int to) const {
from = std::max(from, 0);
if (from >= to) {
return find(var);
} else {
int t = find(var, 0, from);
return t >= 0 ? t : find(var, to);
}
}
int Stack::find_const(const_idx_t cst, int from) const {
for (int i = from; i < depth(); i++) {
if (at(i).second == cst) {
return i;
}
}
return -1;
}
void Stack::forget_const() {
for (auto& vc : s) {
if (vc.second != not_const) {
vc.second = not_const;
}
}
}
void Stack::issue_pop(int i) {
validate(i);
if (output_enabled()) {
o << AsmOp::Pop(i);
}
at(i) = get(0);
s.pop_back();
modified();
}
void Stack::issue_push(int i) {
validate(i);
if (output_enabled()) {
o << AsmOp::Push(i);
}
s.push_back(get(i));
modified();
}
void Stack::issue_xchg(int i, int j) {
validate(i);
validate(j);
if (i != j && get(i) != get(j)) {
if (output_enabled()) {
o << AsmOp::Xchg(i, j);
}
std::swap(at(i), at(j));
modified();
}
}
int Stack::drop_vars_except(const VarDescrList& var_info, int excl_var) {
int dropped = 0, changes;
do {
changes = 0;
int n = depth();
for (int i = 0; i < n; i++) {
var_idx_t idx = at(i).first;
if (((!var_info[idx] || var_info[idx]->is_unused()) && idx != excl_var) || find(idx, 0, i - 1) >= 0) {
// unneeded
issue_pop(i);
changes = 1;
break;
}
}
dropped += changes;
} while (changes);
return dropped;
}
void Stack::show(int flags) {
std::ostringstream os;
for (auto i : s) {
os << ' ';
o.show_var_ext(os, i);
}
o << AsmOp::Comment(os.str());
mode |= _Shown;
}
void Stack::forget_var(var_idx_t idx) {
for (auto& x : s) {
if (x.first == idx) {
x = std::make_pair(_Garbage, not_const);
modified();
}
}
}
void Stack::push_new_var(var_idx_t idx) {
forget_var(idx);
s.emplace_back(idx, not_const);
modified();
}
void Stack::push_new_const(var_idx_t idx, const_idx_t cidx) {
forget_var(idx);
s.emplace_back(idx, cidx);
modified();
}
void Stack::assign_var(var_idx_t new_idx, var_idx_t old_idx) {
int i = find(old_idx);
tolk_assert(i >= 0 && "variable not found in stack");
if (new_idx != old_idx) {
at(i).first = new_idx;
modified();
}
}
void Stack::do_copy_var(var_idx_t new_idx, var_idx_t old_idx) {
int i = find(old_idx);
tolk_assert(i >= 0 && "variable not found in stack");
if (find(old_idx, i + 1) < 0) {
issue_push(i);
tolk_assert(at(0).first == old_idx);
}
assign_var(new_idx, old_idx);
}
void Stack::enforce_state(const StackLayout& req_stack) {
int k = (int)req_stack.size();
for (int i = 0; i < k; i++) {
var_idx_t x = req_stack[i];
if (i < depth() && s[i].first == x) {
continue;
}
while (depth() > 0 && std::find(req_stack.cbegin(), req_stack.cend(), get(0).first) == req_stack.cend()) {
// current TOS entry is unused in req_stack, drop it
issue_pop(0);
}
int j = find(x);
if (j >= depth() - i) {
issue_push(j);
j = 0;
}
issue_xchg(j, depth() - i - 1);
tolk_assert(s[i].first == x);
}
while (depth() > k) {
issue_pop(0);
}
tolk_assert(depth() == k);
for (int i = 0; i < k; i++) {
tolk_assert(s[i].first == req_stack[i]);
}
}
void Stack::merge_const(const Stack& req_stack) {
tolk_assert(s.size() == req_stack.s.size());
for (std::size_t i = 0; i < s.size(); i++) {
tolk_assert(s[i].first == req_stack.s[i].first);
if (s[i].second != req_stack.s[i].second) {
s[i].second = not_const;
}
}
}
void Stack::merge_state(const Stack& req_stack) {
enforce_state(req_stack.vars());
merge_const(req_stack);
}
void Stack::rearrange_top(const StackLayout& top, std::vector<bool> last) {
while (last.size() < top.size()) {
last.push_back(false);
}
int k = (int)top.size();
for (int i = 0; i < k; i++) {
for (int j = i + 1; j < k; j++) {
if (top[i] == top[j]) {
last[i] = false;
break;
}
}
}
int ss = 0;
for (int i = 0; i < k; i++) {
if (last[i]) {
++ss;
}
}
for (int i = 0; i < k; i++) {
var_idx_t x = top[i];
// find s(j) containing x with j not in [ss, ss+i)
int j = find_outside(x, ss, ss + i);
if (last[i]) {
// rearrange x to be at s(ss-1)
issue_xchg(--ss, j);
tolk_assert(get(ss).first == x);
} else {
// create a new copy of x
issue_push(j);
issue_xchg(0, ss);
tolk_assert(get(ss).first == x);
}
}
tolk_assert(!ss);
}
void Stack::rearrange_top(var_idx_t top, bool last) {
int i = find(top);
if (last) {
issue_xchg(0, i);
} else {
issue_push(i);
}
tolk_assert(get(0).first == top);
}
bool Op::generate_code_step(Stack& stack) {
stack.opt_show();
stack.drop_vars_except(var_info);
stack.opt_show();
bool inline_func = stack.mode & Stack::_InlineFunc;
switch (cl) {
case _Nop:
case _Import:
return true;
case _Return: {
stack.enforce_state(left);
if (stack.o.retalt_ && (stack.mode & Stack::_NeedRetAlt)) {
stack.o << "RETALT";
}
stack.opt_show();
return false;
}
case _IntConst: {
auto p = next->var_info[left[0]];
if (!p || p->is_unused()) {
return true;
}
auto cidx = stack.o.register_const(int_const);
int i = stack.find_const(cidx);
if (i < 0) {
stack.o << push_const(int_const);
stack.push_new_const(left[0], cidx);
} else {
tolk_assert(stack.at(i).second == cidx);
stack.do_copy_var(left[0], stack[i]);
}
return true;
}
case _SliceConst: {
auto p = next->var_info[left[0]];
if (!p || p->is_unused()) {
return true;
}
stack.o << AsmOp::Const("x{" + str_const + "} PUSHSLICE");
stack.push_new_var(left[0]);
return true;
}
case _GlobVar:
if (dynamic_cast<const SymValGlobVar*>(fun_ref->value)) {
bool used = false;
for (auto i : left) {
auto p = next->var_info[i];
if (p && !p->is_unused()) {
used = true;
}
}
if (!used || disabled()) {
return true;
}
std::string name = symbols.get_name(fun_ref->sym_idx);
stack.o << AsmOp::Custom(name + " GETGLOB", 0, 1);
if (left.size() != 1) {
tolk_assert(left.size() <= 15);
stack.o << AsmOp::UnTuple((int)left.size());
}
for (auto i : left) {
stack.push_new_var(i);
}
return true;
} else {
tolk_assert(left.size() == 1);
auto p = next->var_info[left[0]];
if (!p || p->is_unused() || disabled()) {
return true;
}
stack.o << "CONT:<{";
stack.o.indent();
auto func = dynamic_cast<SymValAsmFunc*>(fun_ref->value);
if (func) {
// TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly)
std::vector<VarDescr> args0, res;
TypeExpr::remove_indirect(func->sym_type);
tolk_assert(func->get_type()->is_map());
auto wr = func->get_type()->args.at(0)->get_width();
auto wl = func->get_type()->args.at(1)->get_width();
tolk_assert(wl >= 0 && wr >= 0);
for (int i = 0; i < wl; i++) {
res.emplace_back(0);
}
for (int i = 0; i < wr; i++) {
args0.emplace_back(0);
}
func->compile(stack.o, res, args0, where); // compile res := f (args0)
} else {
std::string name = symbols.get_name(fun_ref->sym_idx);
stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size());
}
stack.o.undent();
stack.o << "}>";
stack.push_new_var(left.at(0));
return true;
}
case _Let: {
tolk_assert(left.size() == right.size());
int i = 0;
std::vector<bool> active;
active.reserve(left.size());
for (std::size_t k = 0; k < left.size(); k++) {
var_idx_t y = left[k]; // "y" = "x"
auto p = next->var_info[y];
active.push_back(p && !p->is_unused());
}
for (std::size_t k = 0; k < left.size(); k++) {
if (!active[k]) {
continue;
}
var_idx_t x = right[k]; // "y" = "x"
bool is_last = true;
for (std::size_t l = k + 1; l < right.size(); l++) {
if (right[l] == x && active[l]) {
is_last = false;
}
}
if (is_last) {
auto info = var_info[x];
is_last = (info && info->is_last());
}
if (is_last) {
stack.assign_var(--i, x);
} else {
stack.do_copy_var(--i, x);
}
}
i = 0;
for (std::size_t k = 0; k < left.size(); k++) {
if (active[k]) {
stack.assign_var(left[k], --i);
}
}
return true;
}
case _Tuple:
case _UnTuple: {
if (disabled()) {
return true;
}
std::vector<bool> last;
for (var_idx_t x : right) {
last.push_back(var_info[x] && var_info[x]->is_last());
}
stack.rearrange_top(right, std::move(last));
stack.opt_show();
int k = (int)stack.depth() - (int)right.size();
tolk_assert(k >= 0);
if (cl == _Tuple) {
stack.o << AsmOp::Tuple((int)right.size());
tolk_assert(left.size() == 1);
} else {
stack.o << AsmOp::UnTuple((int)left.size());
tolk_assert(right.size() == 1);
}
stack.s.resize(k);
for (int i = 0; i < (int)left.size(); i++) {
stack.push_new_var(left.at(i));
}
return true;
}
case _Call:
case _CallInd: {
if (disabled()) {
return true;
}
SymValFunc* func = (fun_ref ? dynamic_cast<SymValFunc*>(fun_ref->value) : nullptr);
auto arg_order = (func ? func->get_arg_order() : nullptr);
auto ret_order = (func ? func->get_ret_order() : nullptr);
tolk_assert(!arg_order || arg_order->size() == right.size());
tolk_assert(!ret_order || ret_order->size() == left.size());
std::vector<var_idx_t> right1;
if (args.size()) {
tolk_assert(args.size() == right.size());
for (int i = 0; i < (int)right.size(); i++) {
int j = arg_order ? arg_order->at(i) : i;
const VarDescr& arg = args.at(j);
if (!arg.is_unused()) {
tolk_assert(var_info[arg.idx] && !var_info[arg.idx]->is_unused());
right1.push_back(arg.idx);
}
}
} else if (arg_order) {
for (int i = 0; i < (int)right.size(); i++) {
right1.push_back(right.at(arg_order->at(i)));
}
} else {
right1 = right;
}
std::vector<bool> last;
for (var_idx_t x : right1) {
last.push_back(var_info[x] && var_info[x]->is_last());
}
stack.rearrange_top(right1, std::move(last));
stack.opt_show();
int k = (int)stack.depth() - (int)right1.size();
tolk_assert(k >= 0);
for (int i = 0; i < (int)right1.size(); i++) {
if (stack.s[k + i].first != right1[i]) {
std::cerr << stack.o;
}
tolk_assert(stack.s[k + i].first == right1[i]);
}
auto exec_callxargs = [&](int args, int ret) {
if (args <= 15 && ret <= 15) {
stack.o << exec_arg2_op("CALLXARGS", args, ret, args + 1, ret);
} else {
tolk_assert(args <= 254 && ret <= 254);
stack.o << AsmOp::Const(PSTRING() << args << " PUSHINT");
stack.o << AsmOp::Const(PSTRING() << ret << " PUSHINT");
stack.o << AsmOp::Custom("CALLXVARARGS", args + 3, ret);
}
};
if (cl == _CallInd) {
exec_callxargs((int)right.size() - 1, (int)left.size());
} else {
auto func = dynamic_cast<const SymValAsmFunc*>(fun_ref->value);
if (func) {
std::vector<VarDescr> res;
res.reserve(left.size());
for (var_idx_t i : left) {
res.emplace_back(i);
}
func->compile(stack.o, res, args, where); // compile res := f (args)
} else {
auto fv = dynamic_cast<const SymValCodeFunc*>(fun_ref->value);
std::string name = symbols.get_name(fun_ref->sym_idx);
bool is_inline = (fv && (fv->flags & 3));
if (is_inline) {
stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size());
} else if (fv && fv->code && fv->code->require_callxargs) {
stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2);
exec_callxargs((int)right.size() + 1, (int)left.size());
} else {
stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size());
}
}
}
stack.s.resize(k);
for (int i = 0; i < (int)left.size(); i++) {
int j = ret_order ? ret_order->at(i) : i;
stack.push_new_var(left.at(j));
}
return true;
}
case _SetGlob: {
tolk_assert(fun_ref && dynamic_cast<const SymValGlobVar*>(fun_ref->value));
std::vector<bool> last;
for (var_idx_t x : right) {
last.push_back(var_info[x] && var_info[x]->is_last());
}
stack.rearrange_top(right, std::move(last));
stack.opt_show();
int k = (int)stack.depth() - (int)right.size();
tolk_assert(k >= 0);
for (int i = 0; i < (int)right.size(); i++) {
if (stack.s[k + i].first != right[i]) {
std::cerr << stack.o;
}
tolk_assert(stack.s[k + i].first == right[i]);
}
if (right.size() > 1) {
stack.o << AsmOp::Tuple((int)right.size());
}
if (!right.empty()) {
std::string name = symbols.get_name(fun_ref->sym_idx);
stack.o << AsmOp::Custom(name + " SETGLOB", 1, 0);
}
stack.s.resize(k);
return true;
}
case _If: {
if (block0->is_empty() && block1->is_empty()) {
return true;
}
if (!next->noreturn() && (block0->noreturn() != block1->noreturn())) {
stack.o.retalt_ = true;
}
var_idx_t x = left[0];
stack.rearrange_top(x, var_info[x] && var_info[x]->is_last());
tolk_assert(stack[0] == x);
stack.opt_show();
stack.s.pop_back();
stack.modified();
if (inline_func && (block0->noreturn() || block1->noreturn())) {
bool is0 = block0->noreturn();
Op* block_noreturn = is0 ? block0.get() : block1.get();
Op* block_other = is0 ? block1.get() : block0.get();
stack.mode &= ~Stack::_InlineFunc;
stack.o << (is0 ? "IF:<{" : "IFNOT:<{");
stack.o.indent();
Stack stack_copy{stack};
block_noreturn->generate_code_all(stack_copy);
stack.o.undent();
stack.o << "}>ELSE<{";
stack.o.indent();
block_other->generate_code_all(stack);
if (!block_other->noreturn()) {
next->generate_code_all(stack);
}
stack.o.undent();
stack.o << "}>";
return false;
}
if (block1->is_empty() || block0->is_empty()) {
bool is0 = block1->is_empty();
Op* block = is0 ? block0.get() : block1.get();
// if (left) block0; ...
// if (!left) block1; ...
if (block->noreturn()) {
stack.o << (is0 ? "IFJMP:<{" : "IFNOTJMP:<{");
stack.o.indent();
Stack stack_copy{stack};
stack_copy.mode &= ~Stack::_InlineFunc;
stack_copy.mode |= next->noreturn() ? 0 : Stack::_NeedRetAlt;
block->generate_code_all(stack_copy);
stack.o.undent();
stack.o << "}>";
return true;
}
stack.o << (is0 ? "IF:<{" : "IFNOT:<{");
stack.o.indent();
Stack stack_copy{stack}, stack_target{stack};
stack_target.disable_output();
stack_target.drop_vars_except(next->var_info);
stack_copy.mode &= ~Stack::_InlineFunc;
block->generate_code_all(stack_copy);
stack_copy.drop_vars_except(var_info);
stack_copy.opt_show();
if ((is0 && stack_copy == stack) || (!is0 && stack_copy.vars() == stack.vars())) {
stack.o.undent();
stack.o << "}>";
if (!is0) {
stack.merge_const(stack_copy);
}
return true;
}
// stack_copy.drop_vars_except(next->var_info);
stack_copy.enforce_state(stack_target.vars());
stack_copy.opt_show();
if (stack_copy.vars() == stack.vars()) {
stack.o.undent();
stack.o << "}>";
stack.merge_const(stack_copy);
return true;
}
stack.o.undent();
stack.o << "}>ELSE<{";
stack.o.indent();
stack.merge_state(stack_copy);
stack.opt_show();
stack.o.undent();
stack.o << "}>";
return true;
}
if (block0->noreturn() || block1->noreturn()) {
bool is0 = block0->noreturn();
Op* block_noreturn = is0 ? block0.get() : block1.get();
Op* block_other = is0 ? block1.get() : block0.get();
stack.o << (is0 ? "IFJMP:<{" : "IFNOTJMP:<{");
stack.o.indent();
Stack stack_copy{stack};
stack_copy.mode &= ~Stack::_InlineFunc;
stack_copy.mode |= (block_other->noreturn() || next->noreturn()) ? 0 : Stack::_NeedRetAlt;
block_noreturn->generate_code_all(stack_copy);
stack.o.undent();
stack.o << "}>";
block_other->generate_code_all(stack);
return !block_other->noreturn();
}
stack.o << "IF:<{";
stack.o.indent();
Stack stack_copy{stack};
stack_copy.mode &= ~Stack::_InlineFunc;
block0->generate_code_all(stack_copy);
stack_copy.drop_vars_except(next->var_info);
stack_copy.opt_show();
stack.o.undent();
stack.o << "}>ELSE<{";
stack.o.indent();
stack.mode &= ~Stack::_InlineFunc;
block1->generate_code_all(stack);
stack.merge_state(stack_copy);
stack.opt_show();
stack.o.undent();
stack.o << "}>";
return true;
}
case _Repeat: {
var_idx_t x = left[0];
//stack.drop_vars_except(block0->var_info, x);
stack.rearrange_top(x, var_info[x] && var_info[x]->is_last());
tolk_assert(stack[0] == x);
stack.opt_show();
stack.s.pop_back();
stack.modified();
if (block0->noreturn()) {
stack.o.retalt_ = true;
}
if (true || !next->is_empty()) {
stack.o << "REPEAT:<{";
stack.o.indent();
stack.forget_const();
if (block0->noreturn()) {
Stack stack_copy{stack};
StackLayout layout1 = stack.vars();
stack_copy.mode &= ~Stack::_InlineFunc;
stack_copy.mode |= Stack::_NeedRetAlt;
block0->generate_code_all(stack_copy);
} else {
StackLayout layout1 = stack.vars();
stack.mode &= ~Stack::_InlineFunc;
stack.mode |= Stack::_NeedRetAlt;
block0->generate_code_all(stack);
stack.enforce_state(std::move(layout1));
stack.opt_show();
}
stack.o.undent();
stack.o << "}>";
return true;
} else {
stack.o << "REPEATEND";
stack.forget_const();
StackLayout layout1 = stack.vars();
block0->generate_code_all(stack);
stack.enforce_state(std::move(layout1));
stack.opt_show();
return false;
}
}
case _Again: {
stack.drop_vars_except(block0->var_info);
stack.opt_show();
if (block0->noreturn()) {
stack.o.retalt_ = true;
}
if (!next->is_empty() || inline_func) {
stack.o << "AGAIN:<{";
stack.o.indent();
stack.forget_const();
StackLayout layout1 = stack.vars();
stack.mode &= ~Stack::_InlineFunc;
stack.mode |= Stack::_NeedRetAlt;
block0->generate_code_all(stack);
stack.enforce_state(std::move(layout1));
stack.opt_show();
stack.o.undent();
stack.o << "}>";
return true;
} else {
stack.o << "AGAINEND";
stack.forget_const();
StackLayout layout1 = stack.vars();
block0->generate_code_all(stack);
stack.enforce_state(std::move(layout1));
stack.opt_show();
return false;
}
}
case _Until: {
// stack.drop_vars_except(block0->var_info);
// stack.opt_show();
if (block0->noreturn()) {
stack.o.retalt_ = true;
}
if (true || !next->is_empty()) {
stack.o << "UNTIL:<{";
stack.o.indent();
stack.forget_const();
auto layout1 = stack.vars();
stack.mode &= ~Stack::_InlineFunc;
stack.mode |= Stack::_NeedRetAlt;
block0->generate_code_all(stack);
layout1.push_back(left[0]);
stack.enforce_state(std::move(layout1));
stack.opt_show();
stack.o.undent();
stack.o << "}>";
stack.s.pop_back();
stack.modified();
return true;
} else {
stack.o << "UNTILEND";
stack.forget_const();
StackLayout layout1 = stack.vars();
block0->generate_code_all(stack);
layout1.push_back(left[0]);
stack.enforce_state(std::move(layout1));
stack.opt_show();
return false;
}
}
case _While: {
// while (block0 | left) block1; ...next
var_idx_t x = left[0];
stack.drop_vars_except(block0->var_info);
stack.opt_show();
StackLayout layout1 = stack.vars();
bool next_empty = false && next->is_empty();
if (block0->noreturn()) {
stack.o.retalt_ = true;
}
stack.o << "WHILE:<{";
stack.o.indent();
stack.forget_const();
stack.mode &= ~Stack::_InlineFunc;
stack.mode |= Stack::_NeedRetAlt;
block0->generate_code_all(stack);
stack.rearrange_top(x, !next->var_info[x] && !block1->var_info[x]);
stack.opt_show();
stack.s.pop_back();
stack.modified();
stack.o.undent();
Stack stack_copy{stack};
stack.o << (next_empty ? "}>DO:" : "}>DO<{");
if (!next_empty) {
stack.o.indent();
}
stack_copy.opt_show();
block1->generate_code_all(stack_copy);
stack_copy.enforce_state(std::move(layout1));
stack_copy.opt_show();
if (!next_empty) {
stack.o.undent();
stack.o << "}>";
return true;
} else {
return false;
}
}
case _TryCatch: {
if (block0->is_empty() && block1->is_empty()) {
return true;
}
if (block0->noreturn() || block1->noreturn()) {
stack.o.retalt_ = true;
}
Stack catch_stack{stack.o};
std::vector<var_idx_t> catch_vars;
std::vector<bool> catch_last;
for (const VarDescr& var : block1->var_info.list) {
if (stack.find(var.idx) >= 0) {
catch_vars.push_back(var.idx);
catch_last.push_back(!block0->var_info[var.idx]);
}
}
const size_t block_size = 255;
for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) {
begin = end >= block_size ? end - block_size : 0;
for (size_t i = begin; i < end; ++i) {
catch_stack.push_new_var(catch_vars[i]);
}
}
catch_stack.push_new_var(left[0]);
catch_stack.push_new_var(left[1]);
stack.rearrange_top(catch_vars, catch_last);
stack.opt_show();
stack.o << "c4 PUSH";
stack.o << "c5 PUSH";
stack.o << "c7 PUSH";
stack.o << "<{";
stack.o.indent();
if (block1->noreturn()) {
catch_stack.mode |= Stack::_NeedRetAlt;
}
block1->generate_code_all(catch_stack);
catch_stack.drop_vars_except(next->var_info);
catch_stack.opt_show();
stack.o.undent();
stack.o << "}>CONT";
stack.o << "c7 SETCONT";
stack.o << "c5 SETCONT";
stack.o << "c4 SETCONT";
for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) {
begin = end >= block_size ? end - block_size : 0;
stack.o << std::to_string(end - begin) + " PUSHINT";
stack.o << "-1 PUSHINT";
stack.o << "SETCONTVARARGS";
}
stack.s.erase(stack.s.end() - catch_vars.size(), stack.s.end());
stack.modified();
stack.o << "<{";
stack.o.indent();
if (block0->noreturn()) {
stack.mode |= Stack::_NeedRetAlt;
}
block0->generate_code_all(stack);
if (block0->noreturn()) {
stack.s = std::move(catch_stack.s);
} else if (!block1->noreturn()) {
stack.merge_state(catch_stack);
}
stack.opt_show();
stack.o.undent();
stack.o << "}>CONT";
stack.o << "c1 PUSH";
stack.o << "COMPOSALT";
stack.o << "SWAP";
stack.o << "TRY";
return true;
}
default:
std::cerr << "fatal: unknown operation <??" << cl << ">\n";
throw ParseError{where, "unknown operation in generate_code()"};
}
}
void Op::generate_code_all(Stack& stack) {
int saved_mode = stack.mode;
auto cont = generate_code_step(stack);
stack.mode = (stack.mode & ~Stack::_ModeSave) | (saved_mode & Stack::_ModeSave);
if (cont && next) {
next->generate_code_all(stack);
}
}
void CodeBlob::generate_code(AsmOpList& out, int mode) {
Stack stack{out, mode};
tolk_assert(ops && ops->cl == Op::_Import);
auto args = (int)ops->left.size();
for (var_idx_t x : ops->left) {
stack.push_new_var(x);
}
ops->generate_code_all(stack);
stack.apply_wrappers(require_callxargs && (mode & Stack::_InlineAny) ? args : -1);
if (!(mode & Stack::_DisableOpt)) {
optimize_code(out);
}
}
void CodeBlob::generate_code(std::ostream& os, int mode, int indent) {
AsmOpList out_list(indent, &vars);
generate_code(out_list, mode);
out_list.out(os, mode);
}
} // namespace tolk

449
tolk/gen-abscode.cpp Normal file
View file

@ -0,0 +1,449 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include <numeric>
#include "tolk.h"
using namespace std::literals::string_literals;
namespace tolk {
/*
*
* EXPRESSIONS
*
*/
Expr* Expr::copy() const {
auto res = new Expr{*this};
for (auto& arg : res->args) {
arg = arg->copy();
}
return res;
}
Expr::Expr(int c, sym_idx_t name_idx, std::initializer_list<Expr*> _arglist) : cls(c), args(std::move(_arglist)) {
sym = lookup_symbol(name_idx);
if (!sym) {
}
}
void Expr::chk_rvalue(const Lexem& lem) const {
if (!is_rvalue()) {
lem.error_at("rvalue expected before `", "`");
}
}
void Expr::chk_lvalue(const Lexem& lem) const {
if (!is_lvalue()) {
lem.error_at("lvalue expected before `", "`");
}
}
void Expr::chk_type(const Lexem& lem) const {
if (!is_type()) {
lem.error_at("type expression expected before `", "`");
}
}
bool Expr::deduce_type(const Lexem& lem) {
if (e_type) {
return true;
}
switch (cls) {
case _Apply: {
if (!sym) {
return false;
}
SymVal* sym_val = dynamic_cast<SymVal*>(sym->value);
if (!sym_val || !sym_val->get_type()) {
return false;
}
std::vector<TypeExpr*> arg_types;
for (const auto& arg : args) {
arg_types.push_back(arg->e_type);
}
TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_tensor(arg_types), TypeExpr::new_hole());
try {
unify(fun_type, sym_val->sym_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type "
<< fun_type->args[0] << ": " << ue;
lem.error(os.str());
}
e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
return true;
}
case _VarApply: {
tolk_assert(args.size() == 2);
TypeExpr* fun_type = TypeExpr::new_map(args[1]->e_type, TypeExpr::new_hole());
try {
unify(fun_type, args[0]->e_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type
<< ": " << ue;
lem.error(os.str());
}
e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
return true;
}
case _Letop: {
tolk_assert(args.size() == 2);
try {
// std::cerr << "in assignment: " << args[0]->e_type << " from " << args[1]->e_type << std::endl;
unify(args[0]->e_type, args[1]->e_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type "
<< args[0]->e_type << ": " << ue;
lem.error(os.str());
}
e_type = args[0]->e_type;
TypeExpr::remove_indirect(e_type);
return true;
}
case _LetFirst: {
tolk_assert(args.size() == 2);
TypeExpr* rhs_type = TypeExpr::new_tensor({args[0]->e_type, TypeExpr::new_hole()});
try {
// std::cerr << "in implicit assignment of a modifying method: " << rhs_type << " and " << args[1]->e_type << std::endl;
unify(rhs_type, args[1]->e_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "cannot implicitly assign an expression of type " << args[1]->e_type
<< " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val)
<< "` : " << ue;
lem.error(os.str());
}
e_type = rhs_type->args[1];
TypeExpr::remove_indirect(e_type);
// std::cerr << "result type is " << e_type << std::endl;
return true;
}
case _CondExpr: {
tolk_assert(args.size() == 3);
auto flag_type = TypeExpr::new_atomic(_Int);
try {
unify(args[0]->e_type, flag_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue;
lem.error(os.str());
}
try {
unify(args[1]->e_type, args[2]->e_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and "
<< args[2]->e_type << " : " << ue;
lem.error(os.str());
}
e_type = args[1]->e_type;
TypeExpr::remove_indirect(e_type);
return true;
}
}
return false;
}
int Expr::define_new_vars(CodeBlob& code) {
switch (cls) {
case _Tensor:
case _MkTuple:
case _TypeApply: {
int res = 0;
for (const auto& x : args) {
res += x->define_new_vars(code);
}
return res;
}
case _Var:
if (val < 0) {
val = code.create_var(TmpVar::_Named, e_type, sym, &here);
return 1;
}
break;
case _Hole:
if (val < 0) {
val = code.create_var(TmpVar::_Tmp, e_type, nullptr, &here);
}
break;
}
return 0;
}
int Expr::predefine_vars() {
switch (cls) {
case _Tensor:
case _MkTuple:
case _TypeApply: {
int res = 0;
for (const auto& x : args) {
res += x->predefine_vars();
}
return res;
}
case _Var:
if (!sym) {
tolk_assert(val < 0 && here.defined());
if (prohibited_var_names.count(symbols.get_name(~val))) {
throw ParseError{
here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"};
}
sym = define_symbol(~val, false, here);
// std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl;
if (!sym) {
throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"};
}
sym->value = new SymVal{SymVal::_Var, -1, e_type};
return 1;
}
break;
}
return 0;
}
var_idx_t Expr::new_tmp(CodeBlob& code) const {
return code.create_tmp_var(e_type, &here);
}
void add_set_globs(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>& globs, const SrcLocation& here) {
for (const auto& p : globs) {
auto& op = code.emplace_back(here, Op::_SetGlob, std::vector<var_idx_t>{}, std::vector<var_idx_t>{ p.second }, p.first);
op.flags |= Op::_Impure;
}
}
std::vector<var_idx_t> Expr::pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) {
while (lhs->is_type_apply()) {
lhs = lhs->args.at(0);
}
while (rhs->is_type_apply()) {
rhs = rhs->args.at(0);
}
if (lhs->is_mktuple()) {
if (rhs->is_mktuple()) {
return pre_compile_let(code, lhs->args.at(0), rhs->args.at(0), here);
}
auto right = rhs->pre_compile(code);
TypeExpr::remove_indirect(rhs->e_type);
auto unpacked_type = rhs->e_type->args.at(0);
std::vector<var_idx_t> tmp{code.create_tmp_var(unpacked_type, &rhs->here)};
code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right));
auto tvar = new Expr{_Var};
tvar->set_val(tmp[0]);
tvar->set_location(rhs->here);
tvar->e_type = unpacked_type;
pre_compile_let(code, lhs->args.at(0), tvar, here);
return tmp;
}
auto right = rhs->pre_compile(code);
std::vector<std::pair<SymDef*, var_idx_t>> globs;
auto left = lhs->pre_compile(code, &globs);
for (var_idx_t v : left) {
code.on_var_modification(v, here);
}
code.emplace_back(here, Op::_Let, std::move(left), right);
add_set_globs(code, globs, here);
return right;
}
std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *> args, CodeBlob &code,
std::vector<std::pair<SymDef*, var_idx_t>> *lval_globs,
std::vector<int> arg_order) {
if (arg_order.empty()) {
arg_order.resize(args.size());
std::iota(arg_order.begin(), arg_order.end(), 0);
}
tolk_assert(args.size() == arg_order.size());
std::vector<std::vector<var_idx_t>> res_lists(args.size());
struct ModifiedVar {
size_t i, j;
Op* op;
};
auto modified_vars = std::make_shared<std::vector<ModifiedVar>>();
for (size_t i : arg_order) {
res_lists[i] = args[i]->pre_compile(code, lval_globs);
for (size_t j = 0; j < res_lists[i].size(); ++j) {
TmpVar& var = code.vars.at(res_lists[i][j]);
if (code.flags & CodeBlob::_AllowPostModification) {
if (!lval_globs && (var.cls & TmpVar::_Named)) {
Op *op = &code.emplace_back(nullptr, Op::_Let, std::vector<var_idx_t>(), std::vector<var_idx_t>());
op->flags |= Op::_Disabled;
var.on_modification.push_back([modified_vars, i, j, op, done = false](const SrcLocation &here) mutable {
if (!done) {
done = true;
modified_vars->push_back({i, j, op});
}
});
} else {
var.on_modification.push_back([](const SrcLocation &) {
});
}
} else {
var.on_modification.push_back([name = var.to_string()](const SrcLocation &here) {
throw ParseError{here, PSTRING() << "Modifying local variable " << name
<< " after using it in the same expression"};
});
}
}
}
for (const auto& list : res_lists) {
for (var_idx_t v : list) {
tolk_assert(!code.vars.at(v).on_modification.empty());
code.vars.at(v).on_modification.pop_back();
}
}
for (const ModifiedVar &m : *modified_vars) {
var_idx_t& v = res_lists[m.i][m.j];
var_idx_t v2 = code.create_tmp_var(code.vars[v].v_type, code.vars[v].where.get());
m.op->left = {v2};
m.op->right = {v};
m.op->flags &= ~Op::_Disabled;
v = v2;
}
std::vector<var_idx_t> res;
for (const auto& list : res_lists) {
res.insert(res.end(), list.cbegin(), list.cend());
}
return res;
}
std::vector<var_idx_t> Expr::pre_compile(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>* lval_globs) const {
if (lval_globs && !(cls == _Tensor || cls == _Var || cls == _Hole || cls == _TypeApply || cls == _GlobVar)) {
std::cerr << "lvalue expression constructor is " << cls << std::endl;
throw Fatal{"cannot compile lvalue expression with unknown constructor"};
}
switch (cls) {
case _Tensor: {
return pre_compile_tensor(args, code, lval_globs, {});
}
case _Apply: {
tolk_assert(sym);
auto func = dynamic_cast<SymValFunc*>(sym->value);
std::vector<var_idx_t> res;
if (func && func->arg_order.size() == args.size() && !(code.flags & CodeBlob::_ComputeAsmLtr)) {
//std::cerr << "!!! reordering " << args.size() << " arguments of " << sym->name() << std::endl;
res = pre_compile_tensor(args, code, lval_globs, func->arg_order);
} else {
res = pre_compile_tensor(args, code, lval_globs, {});
}
auto rvect = new_tmp_vect(code);
auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), sym);
if (flags & _IsImpure) {
op.flags |= Op::_Impure;
}
return rvect;
}
case _TypeApply:
return args[0]->pre_compile(code, lval_globs);
case _Var:
case _Hole:
if (val < 0) {
throw ParseError{here, "unexpected variable definition"};
}
return {val};
case _VarApply:
if (args[0]->cls == _Glob) {
auto res = args[1]->pre_compile(code);
auto rvect = new_tmp_vect(code);
auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), args[0]->sym);
if (args[0]->flags & _IsImpure) {
op.flags |= Op::_Impure;
}
return rvect;
} else {
auto res = args[1]->pre_compile(code);
auto tfunc = args[0]->pre_compile(code);
if (tfunc.size() != 1) {
throw Fatal{"stack tuple used as a function"};
}
res.push_back(tfunc[0]);
auto rvect = new_tmp_vect(code);
code.emplace_back(here, Op::_CallInd, rvect, std::move(res));
return rvect;
}
case _Const: {
auto rvect = new_tmp_vect(code);
code.emplace_back(here, Op::_IntConst, rvect, intval);
return rvect;
}
case _Glob:
case _GlobVar: {
auto rvect = new_tmp_vect(code);
if (lval_globs) {
lval_globs->push_back({ sym, rvect[0] });
return rvect;
} else {
code.emplace_back(here, Op::_GlobVar, rvect, std::vector<var_idx_t>{}, sym);
return rvect;
}
}
case _Letop: {
return pre_compile_let(code, args.at(0), args.at(1), here);
}
case _LetFirst: {
auto rvect = new_tmp_vect(code);
auto right = args[1]->pre_compile(code);
std::vector<std::pair<SymDef*, var_idx_t>> local_globs;
if (!lval_globs) {
lval_globs = &local_globs;
}
auto left = args[0]->pre_compile(code, lval_globs);
left.push_back(rvect[0]);
for (var_idx_t v : left) {
code.on_var_modification(v, here);
}
code.emplace_back(here, Op::_Let, std::move(left), std::move(right));
add_set_globs(code, local_globs, here);
return rvect;
}
case _MkTuple: {
auto left = new_tmp_vect(code);
auto right = args[0]->pre_compile(code);
code.emplace_back(here, Op::_Tuple, left, std::move(right));
return left;
}
case _CondExpr: {
auto cond = args[0]->pre_compile(code);
tolk_assert(cond.size() == 1);
auto rvect = new_tmp_vect(code);
Op& if_op = code.emplace_back(here, Op::_If, cond);
code.push_set_cur(if_op.block0);
code.emplace_back(here, Op::_Let, rvect, args[1]->pre_compile(code));
code.close_pop_cur(args[1]->here);
code.push_set_cur(if_op.block1);
code.emplace_back(here, Op::_Let, rvect, args[2]->pre_compile(code));
code.close_pop_cur(args[2]->here);
return rvect;
}
case _SliceConst: {
auto rvect = new_tmp_vect(code);
code.emplace_back(here, Op::_SliceConst, rvect, strval);
return rvect;
}
default:
std::cerr << "expression constructor is " << cls << std::endl;
throw Fatal{"cannot compile expression with unknown constructor"};
}
}
} // namespace tolk

126
tolk/keywords.cpp Normal file
View file

@ -0,0 +1,126 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
namespace tolk {
/*
*
* KEYWORD DEFINITION
*
*/
void define_keywords() {
symbols.add_kw_char('+')
.add_kw_char('-')
.add_kw_char('*')
.add_kw_char('/')
.add_kw_char('%')
.add_kw_char('?')
.add_kw_char(':')
.add_kw_char(',')
.add_kw_char(';')
.add_kw_char('(')
.add_kw_char(')')
.add_kw_char('[')
.add_kw_char(']')
.add_kw_char('{')
.add_kw_char('}')
.add_kw_char('=')
.add_kw_char('_')
.add_kw_char('<')
.add_kw_char('>')
.add_kw_char('&')
.add_kw_char('|')
.add_kw_char('^')
.add_kw_char('~');
symbols.add_keyword("==", Keyword::_Eq)
.add_keyword("!=", Keyword::_Neq)
.add_keyword("<=", Keyword::_Leq)
.add_keyword(">=", Keyword::_Geq)
.add_keyword("<=>", Keyword::_Spaceship)
.add_keyword("<<", Keyword::_Lshift)
.add_keyword(">>", Keyword::_Rshift)
.add_keyword("~>>", Keyword::_RshiftR)
.add_keyword("^>>", Keyword::_RshiftC)
.add_keyword("~/", Keyword::_DivR)
.add_keyword("^/", Keyword::_DivC)
.add_keyword("~%", Keyword::_ModR)
.add_keyword("^%", Keyword::_ModC)
.add_keyword("/%", Keyword::_DivMod)
.add_keyword("+=", Keyword::_PlusLet)
.add_keyword("-=", Keyword::_MinusLet)
.add_keyword("*=", Keyword::_TimesLet)
.add_keyword("/=", Keyword::_DivLet)
.add_keyword("~/=", Keyword::_DivRLet)
.add_keyword("^/=", Keyword::_DivCLet)
.add_keyword("%=", Keyword::_ModLet)
.add_keyword("~%=", Keyword::_ModRLet)
.add_keyword("^%=", Keyword::_ModCLet)
.add_keyword("<<=", Keyword::_LshiftLet)
.add_keyword(">>=", Keyword::_RshiftLet)
.add_keyword("~>>=", Keyword::_RshiftRLet)
.add_keyword("^>>=", Keyword::_RshiftCLet)
.add_keyword("&=", Keyword::_AndLet)
.add_keyword("|=", Keyword::_OrLet)
.add_keyword("^=", Keyword::_XorLet);
symbols.add_keyword("return", Keyword::_Return)
.add_keyword("var", Keyword::_Var)
.add_keyword("repeat", Keyword::_Repeat)
.add_keyword("do", Keyword::_Do)
.add_keyword("while", Keyword::_While)
.add_keyword("until", Keyword::_Until)
.add_keyword("try", Keyword::_Try)
.add_keyword("catch", Keyword::_Catch)
.add_keyword("if", Keyword::_If)
.add_keyword("ifnot", Keyword::_Ifnot)
.add_keyword("then", Keyword::_Then)
.add_keyword("else", Keyword::_Else)
.add_keyword("elseif", Keyword::_Elseif)
.add_keyword("elseifnot", Keyword::_Elseifnot);
symbols.add_keyword("int", Keyword::_Int)
.add_keyword("cell", Keyword::_Cell)
.add_keyword("slice", Keyword::_Slice)
.add_keyword("builder", Keyword::_Builder)
.add_keyword("cont", Keyword::_Cont)
.add_keyword("tuple", Keyword::_Tuple)
.add_keyword("type", Keyword::_Type)
.add_keyword("->", Keyword::_Mapsto)
.add_keyword("forall", Keyword::_Forall);
symbols.add_keyword("extern", Keyword::_Extern)
.add_keyword("global", Keyword::_Global)
.add_keyword("asm", Keyword::_Asm)
.add_keyword("impure", Keyword::_Impure)
.add_keyword("inline", Keyword::_Inline)
.add_keyword("inline_ref", Keyword::_InlineRef)
.add_keyword("auto_apply", Keyword::_AutoApply)
.add_keyword("method_id", Keyword::_MethodId)
.add_keyword("operator", Keyword::_Operator)
.add_keyword("infix", Keyword::_Infix)
.add_keyword("infixl", Keyword::_Infixl)
.add_keyword("infixr", Keyword::_Infixr)
.add_keyword("const", Keyword::_Const);
symbols.add_keyword("#pragma", Keyword::_PragmaHashtag)
.add_keyword("#include", Keyword::_IncludeHashtag);
}
} // namespace tolk

335
tolk/lexer.cpp Normal file
View file

@ -0,0 +1,335 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "lexer.h"
#include "symtable.h"
#include <sstream>
#include <cassert>
namespace tolk {
/*
*
* LEXER
*
*/
std::string Lexem::lexem_name_str(int idx) {
if (idx == Eof) {
return "end of file";
} else if (idx == Ident) {
return "identifier";
} else if (idx == Number) {
return "number";
} else if (idx == String) {
return "string";
} else if (idx == Special) {
return "special";
} else if (symbols.get_keyword(idx)) {
return "`" + symbols.get_keyword(idx)->str + "`";
} else {
std::ostringstream os{"<unknown lexem of type "};
os << idx << ">";
return os.str();
}
}
std::string Lexem::name_str() const {
if (tp == Ident) {
return std::string{"identifier `"} + symbols.get_name(val) + "`";
} else if (tp == String) {
return std::string{"string \""} + str + '"';
} else {
return lexem_name_str(tp);
}
}
bool is_number(std::string str) {
auto st = str.begin(), en = str.end();
if (st == en) {
return false;
}
if (*st == '-') {
st++;
}
bool hex = false;
if (st + 1 < en && *st == '0' && st[1] == 'x') {
st += 2;
hex = true;
}
if (st == en) {
return false;
}
while (st < en) {
int c = *st;
if (c >= '0' && c <= '9') {
++st;
continue;
}
if (!hex) {
return false;
}
c |= 0x20;
if (c < 'a' || c > 'f') {
return false;
}
++st;
}
return true;
}
int Lexem::classify() {
if (tp != Unknown) {
return tp;
}
sym_idx_t i = symbols.lookup(str);
if (i) {
assert(str == symbols[i]->str);
str = symbols[i]->str;
sym_idx_t idx = symbols[i]->idx;
tp = (idx < 0 ? -idx : Ident);
val = i;
} else if (is_number(str)) {
tp = Number;
} else {
tp = 0;
}
if (tp == Unknown) {
tp = Ident;
val = symbols.lookup(str, 1);
}
return tp;
}
int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) {
str = _str;
loc = _loc;
tp = _tp;
val = _val;
return classify();
}
Lexer::Lexer(SourceReader& _src, bool init, std::string active_chars, std::string eol_cmts, std::string open_cmts,
std::string close_cmts, std::string quote_chars, std::string multiline_quote)
: src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined),
multiline_quote(std::move(multiline_quote)) {
std::memset(char_class, 0, sizeof(char_class));
unsigned char activity = cc::active;
for (char c : active_chars) {
if (c == ' ') {
if (!--activity) {
activity = cc::allow_repeat;
}
} else if ((unsigned)c < 0x80) {
char_class[(unsigned)c] |= activity;
}
}
set_spec(eol_cmt, eol_cmts);
set_spec(cmt_op, open_cmts);
set_spec(cmt_cl, close_cmts);
for (int c : quote_chars) {
if (c > ' ' && c <= 0x7f) {
char_class[(unsigned)c] |= cc::quote_char;
}
}
if (init) {
next();
}
}
void Lexer::set_spec(std::array<int, 3>& arr, std::string setup) {
arr[0] = arr[1] = arr[2] = -0x100;
std::size_t n = setup.size(), i;
for (i = 0; i < n; i++) {
if (setup[i] == ' ') {
continue;
}
if (i == n - 1 || setup[i + 1] == ' ') {
arr[0] = setup[i];
} else if (i == n - 2 || (i < n - 2 && setup[i + 2] == ' ')) {
arr[1] = setup[i];
arr[2] = setup[++i];
} else {
while (i < n && setup[i] != ' ') {
i++;
}
}
}
}
bool Lexer::is_multiline_quote(const char* begin, const char* end) {
if (multiline_quote.empty()) {
return false;
}
for (const char& c : multiline_quote) {
if (begin == end || *begin != c) {
return false;
}
++begin;
}
return true;
}
void Lexer::expect(int exp_tp, const char* msg) {
if (tp() != exp_tp) {
throw ParseError{lexem.loc, (msg ? std::string{msg} : Lexem::lexem_name_str(exp_tp)) + " expected instead of " +
cur().name_str()};
}
next();
}
const Lexem& Lexer::next() {
if (peek_lexem.valid()) {
lexem = std::move(peek_lexem);
peek_lexem.clear({}, Lexem::Undefined);
eof = (lexem.tp == Lexem::Eof);
return lexem;
}
if (eof) {
return lexem.clear(src.here(), Lexem::Eof);
}
long long comm = 1;
while (!src.seek_eof()) {
int cc = src.cur_char(), nc = src.next_char();
if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2])) {
src.load_line();
} else if (cc == cmt_op[1] && nc == cmt_op[2]) {
src.advance(2);
comm = comm * 2 + 1;
} else if (cc == cmt_op[0]) {
src.advance(1);
comm *= 2;
} else if (comm == 1) {
break;
} else if (cc == cmt_cl[1] && nc == cmt_cl[2]) {
if (!(comm & 1)) {
src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] +
"`");
}
comm >>= 1;
src.advance(2);
} else if (cc == cmt_cl[0]) {
if (!(comm & 1)) {
src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] +
"`");
}
comm >>= 1;
src.advance(1);
} else {
src.advance(1);
}
if (comm < 0) {
src.error("too many nested comments");
}
}
if (src.seek_eof()) {
eof = true;
if (comm > 1) {
if (comm & 1) {
src.error(std::string{"`"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment extends past end of file");
} else {
src.error(std::string{"`"} + (char)cmt_op[0] + "` comment extends past end of file");
}
}
return lexem.clear(src.here(), Lexem::Eof);
}
if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) {
src.advance(multiline_quote.size());
const char* end = nullptr;
SrcLocation here = src.here();
std::string body;
while (!src.is_eof()) {
if (src.is_eoln()) {
body.push_back('\n');
src.load_line();
continue;
}
if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) {
end = src.get_ptr();
src.advance(multiline_quote.size());
break;
}
body.push_back(src.cur_char());
src.advance(1);
}
if (!end) {
src.error("string extends past end of file");
}
lexem.set(body, here, Lexem::String);
int c = src.cur_char();
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
lexem.val = c;
src.advance(1);
}
return lexem;
}
int c = src.cur_char();
const char* end = src.get_ptr();
if (is_quote_char(c) || c == '`') {
int qc = c;
++end;
while (end < src.get_end_ptr() && *end != qc) {
++end;
}
if (*end != qc) {
src.error(qc == '`' ? "a `back-quoted` token extends past end of line" : "string extends past end of line");
}
lexem.set(std::string{src.get_ptr() + 1, end}, src.here(), qc == '`' ? Lexem::Unknown : Lexem::String);
src.set_ptr(end + 1);
c = src.cur_char();
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
lexem.val = c;
src.set_ptr(end + 2);
}
// std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl;
return lexem;
}
int len = 0, pc = -0x100;
while (end < src.get_end_ptr()) {
c = *end;
bool repeated = (c == pc && is_repeatable(c));
if (c == ' ' || c == 9 || (len && is_left_active(c) && !repeated)) {
break;
}
++len;
++end;
if (is_right_active(c) && !repeated) {
break;
}
pc = c;
}
lexem.set(std::string{src.get_ptr(), end}, src.here());
src.set_ptr(end);
// std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl;
return lexem;
}
const Lexem& Lexer::peek() {
if (peek_lexem.valid()) {
return peek_lexem;
}
if (eof) {
return lexem.clear(src.here(), Lexem::Eof);
}
Lexem keep = std::move(lexem);
next();
peek_lexem = std::move(lexem);
lexem = std::move(keep);
eof = false;
return peek_lexem;
}
} // namespace tolk

113
tolk/lexer.h Normal file
View file

@ -0,0 +1,113 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "srcread.h"
#include <array>
#include <memory>
#include <cstring>
namespace tolk {
/*
*
* LEXER
*
*/
struct Lexem {
enum { Undefined = -2, Eof = -1, Unknown = 0, Ident = 0, Number = 1, Special = 2, String = 3 };
int tp;
int val;
std::string str;
SrcLocation loc;
int classify();
Lexem(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0)
: tp(_tp), val(_val), str(_str), loc(_loc) {
classify();
}
int set(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0);
Lexem& clear(const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) {
tp = _tp;
val = _val;
loc = _loc;
str = "";
return *this;
}
bool valid() const {
return tp != Undefined;
}
std::string name_str() const;
void error(std::string _str) const {
throw ParseError{loc, _str};
}
void error_at(std::string str1, std::string str2) const {
error(str1 + str + str2);
}
static std::string lexem_name_str(int idx);
};
class Lexer {
SourceReader& src;
bool eof;
Lexem lexem, peek_lexem;
unsigned char char_class[128];
std::array<int, 3> eol_cmt, cmt_op, cmt_cl;
std::string multiline_quote;
enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 };
public:
bool eof_found() const {
return eof;
}
Lexer(SourceReader& _src, bool init = false, std::string active_chars = ";,() ~.", std::string eol_cmts = ";;",
std::string open_cmts = "{-", std::string close_cmts = "-}", std::string quote_chars = "\"",
std::string multiline_quote = "\"\"\"");
const Lexem& next();
const Lexem& cur() const {
return lexem;
}
const Lexem& peek();
int tp() const {
return lexem.tp;
}
void expect(int exp_tp, const char* msg = 0);
int classify_char(unsigned c) const {
return c < 0x80 ? char_class[c] : 0;
}
bool is_active(int c) const {
return (classify_char(c) & cc::active) == cc::active;
}
bool is_left_active(int c) const {
return (classify_char(c) & cc::left_active);
}
bool is_right_active(int c) const {
return (classify_char(c) & cc::right_active);
}
bool is_repeatable(int c) const {
return (classify_char(c) & cc::allow_repeat);
}
bool is_quote_char(int c) const {
return (classify_char(c) & cc::quote_char);
}
private:
void set_spec(std::array<int, 3>& arr, std::string setup);
bool is_multiline_quote(const char* begin, const char* end);
};
} // namespace tolk

652
tolk/optimize.cpp Normal file
View file

@ -0,0 +1,652 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
namespace tolk {
/*
*
* PEEPHOLE OPTIMIZER
*
*/
void Optimizer::set_code(AsmOpConsList code) {
code_ = std::move(code);
unpack();
}
void Optimizer::unpack() {
int i = 0, j = 0;
for (AsmOpCons *p = code_.get(); p && i < n; p = p->cdr.get(), ++j) {
if (p->car->is_very_custom()) {
break;
}
if (p->car->is_comment()) {
continue;
}
op_cons_[i] = p;
op_[i] = std::move(p->car);
offs_[i] = j;
++i;
}
l_ = i;
indent_ = (i ? op_[0]->indent : 0);
}
void Optimizer::pack() {
for (int i = 0; i < l_; i++) {
op_cons_[i]->car = std::move(op_[i]);
op_cons_[i] = nullptr;
}
l_ = 0;
}
void Optimizer::apply() {
if (!p_ && !q_) {
return;
}
tolk_assert(p_ > 0 && p_ <= l_ && q_ >= 0 && q_ <= n && l_ <= n);
for (int i = p_; i < l_; i++) {
tolk_assert(op_[i]);
op_cons_[i]->car = std::move(op_[i]);
op_cons_[i] = nullptr;
}
for (int c = offs_[p_ - 1]; c >= 0; --c) {
code_ = std::move(code_->cdr);
}
for (int j = q_ - 1; j >= 0; j--) {
tolk_assert(oq_[j]);
oq_[j]->indent = indent_;
code_ = AsmOpCons::cons(std::move(oq_[j]), std::move(code_));
}
l_ = 0;
}
AsmOpConsList Optimizer::extract_code() {
pack();
return std::move(code_);
}
void Optimizer::show_head() const {
if (!debug_) {
return;
}
std::cerr << "optimizing";
for (int i = 0; i < l_; i++) {
if (op_[i]) {
std::cerr << ' ' << *op_[i] << ' ';
} else {
std::cerr << " (null) ";
}
}
std::cerr << std::endl;
}
void Optimizer::show_left() const {
if (!debug_) {
return;
}
std::cerr << "// *** rewriting";
for (int i = 0; i < p_; i++) {
if (op_[i]) {
std::cerr << ' ' << *op_[i] << ' ';
} else {
std::cerr << " (null) ";
}
}
}
void Optimizer::show_right() const {
if (!debug_) {
return;
}
std::cerr << "->";
for (int i = 0; i < q_; i++) {
if (oq_[i]) {
std::cerr << ' ' << *oq_[i] << ' ';
} else {
std::cerr << " (null) ";
}
}
std::cerr << std::endl;
}
bool Optimizer::say(std::string str) const {
if (debug_) {
std::cerr << str << std::endl;
}
return true;
}
bool Optimizer::find_const_op(int* op_idx, int cst) {
for (int i = 0; i < l2_; i++) {
if (op_[i]->is_gconst() && tr_[i].get(0) == cst) {
*op_idx = i;
return true;
}
}
return false;
}
bool Optimizer::is_push_const(int* i, int* c) const {
return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_push_const(i, c);
}
// PUSHCONST c ; PUSH s(i+1) ; SWAP -> PUSH s(i) ; PUSHCONST c
bool Optimizer::rewrite_push_const(int i, int c) {
p_ = pb_;
q_ = 2;
int idx = -1;
if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) {
return false;
}
show_left();
oq_[1] = std::move(op_[idx]);
oq_[0] = std::move(op_[!idx]);
*oq_[0] = AsmOp::Push(i);
show_right();
return true;
}
bool Optimizer::is_const_rot(int* c) const {
return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_const_rot(c);
}
bool Optimizer::rewrite_const_rot(int c) {
p_ = pb_;
q_ = 2;
int idx = -1;
if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) {
return false;
}
show_left();
oq_[0] = std::move(op_[idx]);
oq_[1] = std::move(op_[!idx]);
*oq_[1] = AsmOp::Custom("ROT", 3, 3);
show_right();
return true;
}
bool Optimizer::is_const_pop(int* c, int* i) const {
return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_const_pop(c, i);
}
bool Optimizer::rewrite_const_pop(int c, int i) {
p_ = pb_;
q_ = 2;
int idx = -1;
if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) {
return false;
}
show_left();
oq_[0] = std::move(op_[idx]);
oq_[1] = std::move(op_[!idx]);
*oq_[1] = AsmOp::Pop(i);
show_right();
return true;
}
bool Optimizer::is_const_push_xchgs() {
if (!(pb_ >= 2 && pb_ <= l2_ && op_[0]->is_gconst())) {
return false;
}
StackTransform t;
int pos = 0, i;
for (i = 1; i < pb_; i++) {
int a, b;
if (op_[i]->is_xchg(&a, &b)) {
if (pos == a) {
pos = b;
} else if (pos == b) {
pos = a;
} else {
t.apply_xchg(a - (a > pos), b - (b > pos));
}
} else if (op_[i]->is_push(&a)) {
if (pos == a) {
return false;
}
t.apply_push(a - (a > pos));
++pos;
} else {
return false;
}
}
if (pos) {
return false;
}
t.apply_push_newconst();
if (t <= tr_[i - 1]) {
p_ = i;
return true;
} else {
return false;
}
}
bool Optimizer::rewrite_const_push_xchgs() {
if (!p_) {
return false;
}
show_left();
auto c_op = std::move(op_[0]);
tolk_assert(c_op->is_gconst());
StackTransform t;
q_ = 0;
int pos = 0;
for (int i = 1; i < p_; i++) {
int a, b;
if (op_[i]->is_xchg(&a, &b)) {
if (a == pos) {
pos = b;
} else if (b == pos) {
pos = a;
} else {
oq_[q_] = std::move(op_[i]);
if (a > pos) {
oq_[q_]->a = a - 1;
}
if (b > pos) {
oq_[q_]->b = b - 1;
}
tolk_assert(apply_op(t, *oq_[q_]));
++q_;
}
} else {
tolk_assert(op_[i]->is_push(&a));
tolk_assert(a != pos);
oq_[q_] = std::move(op_[i]);
if (a > pos) {
oq_[q_]->a = a - 1;
}
tolk_assert(apply_op(t, *oq_[q_]));
++q_;
++pos;
}
}
tolk_assert(!pos);
t.apply_push_newconst();
tolk_assert(t <= tr_[p_ - 1]);
oq_[q_++] = std::move(c_op);
show_right();
return true;
}
bool Optimizer::rewrite(int p, AsmOp&& new_op) {
tolk_assert(p > 0 && p <= l_);
p_ = p;
q_ = 1;
show_left();
oq_[0] = std::move(op_[0]);
*oq_[0] = new_op;
show_right();
return true;
}
bool Optimizer::rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2) {
tolk_assert(p > 1 && p <= l_);
p_ = p;
q_ = 2;
show_left();
oq_[0] = std::move(op_[0]);
*oq_[0] = new_op1;
oq_[1] = std::move(op_[1]);
*oq_[1] = new_op2;
show_right();
return true;
}
bool Optimizer::rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3) {
tolk_assert(p > 2 && p <= l_);
p_ = p;
q_ = 3;
show_left();
oq_[0] = std::move(op_[0]);
*oq_[0] = new_op1;
oq_[1] = std::move(op_[1]);
*oq_[1] = new_op2;
oq_[2] = std::move(op_[2]);
*oq_[2] = new_op3;
show_right();
return true;
}
bool Optimizer::rewrite_nop() {
tolk_assert(p_ > 0 && p_ <= l_);
q_ = 0;
show_left();
show_right();
return true;
}
bool Optimizer::is_pred(const std::function<bool(const StackTransform&)>& pred, int min_p) {
min_p = std::max(min_p, pb_);
for (int p = l2_; p >= min_p; p--) {
if (pred(tr_[p - 1])) {
p_ = p;
return true;
}
}
return false;
}
bool Optimizer::is_same_as(const StackTransform& trans, int min_p) {
return is_pred([&trans](const auto& t) { return t >= trans; }, min_p);
}
// s1 s3 XCHG ; s0 s2 XCHG -> 2SWAP
bool Optimizer::is_2swap() {
static const StackTransform t_2swap{2, 3, 0, 1, 4};
return is_same_as(t_2swap);
}
// s3 PUSH ; s3 PUSH -> 2OVER
bool Optimizer::is_2over() {
static const StackTransform t_2over{2, 3, 0};
return is_same_as(t_2over);
}
bool Optimizer::is_2dup() {
static const StackTransform t_2dup{0, 1, 0};
return is_same_as(t_2dup);
}
bool Optimizer::is_tuck() {
static const StackTransform t_tuck{0, 1, 0, 2};
return is_same_as(t_tuck);
}
bool Optimizer::is_2drop() {
static const StackTransform t_2drop{2};
return is_same_as(t_2drop);
}
bool Optimizer::is_rot() {
return is_pred([](const auto& t) { return t.is_rot(); });
}
bool Optimizer::is_rotrev() {
return is_pred([](const auto& t) { return t.is_rotrev(); });
}
bool Optimizer::is_nop() {
return is_pred([](const auto& t) { return t.is_id(); }, 1);
}
bool Optimizer::is_xchg(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_xchg(i, j) && ((*i < 16 && *j < 16) || (!*i && *j < 256)); });
}
bool Optimizer::is_xchg_xchg(int* i, int* j, int* k, int* l) {
return is_pred([i, j, k, l](const auto& t) {
return t.is_xchg_xchg(i, j, k, l) && (*i < 2 && *j < (*i ? 16 : 256) && *k < 2 && *l < (*k ? 16 : 256));
}) &&
(!(p_ == 2 && op_[0]->is_xchg(*i, *j) && op_[1]->is_xchg(*k, *l)));
}
bool Optimizer::is_push(int* i) {
return is_pred([i](const auto& t) { return t.is_push(i) && *i < 256; });
}
bool Optimizer::is_pop(int* i) {
return is_pred([i](const auto& t) { return t.is_pop(i) && *i < 256; });
}
bool Optimizer::is_pop_pop(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_pop_pop(i, j) && *i < 256 && *j < 256; }, 3);
}
bool Optimizer::is_push_rot(int* i) {
return is_pred([i](const auto& t) { return t.is_push_rot(i) && *i < 16; }, 3);
}
bool Optimizer::is_push_rotrev(int* i) {
return is_pred([i](const auto& t) { return t.is_push_rotrev(i) && *i < 16; }, 3);
}
bool Optimizer::is_push_xchg(int* i, int* j, int* k) {
return is_pred([i, j, k](const auto& t) { return t.is_push_xchg(i, j, k) && *i < 16 && *j < 16 && *k < 16; }) &&
!(p_ == 2 && op_[0]->is_push() && op_[1]->is_xchg());
}
bool Optimizer::is_xchg2(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_xchg2(i, j) && *i < 16 && *j < 16; });
}
bool Optimizer::is_xcpu(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_xcpu(i, j) && *i < 16 && *j < 16; });
}
bool Optimizer::is_puxc(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_puxc(i, j) && *i < 16 && *j < 15; });
}
bool Optimizer::is_push2(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_push2(i, j) && *i < 16 && *j < 16; });
}
bool Optimizer::is_xchg3(int* i, int* j, int* k) {
return is_pred([i, j, k](const auto& t) { return t.is_xchg3(i, j, k) && *i < 16 && *j < 16 && *k < 16; });
}
bool Optimizer::is_xc2pu(int* i, int* j, int* k) {
return is_pred([i, j, k](const auto& t) { return t.is_xc2pu(i, j, k) && *i < 16 && *j < 16 && *k < 16; });
}
bool Optimizer::is_xcpuxc(int* i, int* j, int* k) {
return is_pred([i, j, k](const auto& t) { return t.is_xcpuxc(i, j, k) && *i < 16 && *j < 16 && *k < 15; });
}
bool Optimizer::is_xcpu2(int* i, int* j, int* k) {
return is_pred([i, j, k](const auto& t) { return t.is_xcpu2(i, j, k) && *i < 16 && *j < 16 && *k < 16; });
}
bool Optimizer::is_puxc2(int* i, int* j, int* k) {
return is_pred(
[i, j, k](const auto& t) { return t.is_puxc2(i, j, k) && *i < 16 && *j < 15 && *k < 15 && *j + *k != -1; });
}
bool Optimizer::is_puxcpu(int* i, int* j, int* k) {
return is_pred([i, j, k](const auto& t) { return t.is_puxcpu(i, j, k) && *i < 16 && *j < 15 && *k < 15; });
}
bool Optimizer::is_pu2xc(int* i, int* j, int* k) {
return is_pred([i, j, k](const auto& t) { return t.is_pu2xc(i, j, k) && *i < 16 && *j < 15 && *k < 14; });
}
bool Optimizer::is_push3(int* i, int* j, int* k) {
return is_pred([i, j, k](const auto& t) { return t.is_push3(i, j, k) && *i < 16 && *j < 16 && *k < 16; });
}
bool Optimizer::is_blkswap(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_blkswap(i, j) && *i > 0 && *j > 0 && *i <= 16 && *j <= 16; });
}
bool Optimizer::is_blkpush(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_blkpush(i, j) && *i > 0 && *i < 16 && *j < 16; });
}
bool Optimizer::is_blkdrop(int* i) {
return is_pred([i](const auto& t) { return t.is_blkdrop(i) && *i > 0 && *i < 16; });
}
bool Optimizer::is_blkdrop2(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_blkdrop2(i, j) && *i > 0 && *i < 16 && *j > 0 && *j < 16; });
}
bool Optimizer::is_reverse(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_reverse(i, j) && *i >= 2 && *i <= 17 && *j < 16; });
}
bool Optimizer::is_nip_seq(int* i, int* j) {
return is_pred([i, j](const auto& t) { return t.is_nip_seq(i, j) && *i >= 3 && *i <= 15; });
}
bool Optimizer::is_pop_blkdrop(int* i, int* k) {
return is_pred([i, k](const auto& t) { return t.is_pop_blkdrop(i, k) && *i >= *k && *k >= 2 && *k <= 15; }, 3);
}
bool Optimizer::is_2pop_blkdrop(int* i, int* j, int* k) {
return is_pred(
[i, j, k](const auto& t) { return t.is_2pop_blkdrop(i, j, k) && *i >= *k && *j >= *k && *k >= 2 && *k <= 15; },
3);
}
bool Optimizer::compute_stack_transforms() {
StackTransform trans;
for (int i = 0; i < l_; i++) {
if (!apply_op(trans, *op_[i])) {
l2_ = i;
return true;
}
tr_[i] = trans;
}
l2_ = l_;
return true;
}
bool Optimizer::show_stack_transforms() const {
show_head();
// slow version
/*
StackTransform trans2;
std::cerr << "id = " << trans2 << std::endl;
for (int i = 0; i < l_; i++) {
StackTransform op;
if (!apply_op(op, *op_[i])) {
std::cerr << "* (" << *op_[i] << " = invalid)\n";
break;
}
trans2 *= op;
std::cerr << "* " << *op_[i] << " = " << op << " -> " << trans2 << std::endl;
}
*/
// fast version
StackTransform trans;
for (int i = 0; i < l_; i++) {
std::cerr << trans << std::endl << *op_[i] << " -> ";
if (!apply_op(trans, *op_[i])) {
std::cerr << " <not-applicable>" << std::endl;
return true;
}
}
std::cerr << trans << std::endl;
return true;
}
bool Optimizer::find_at_least(int pb) {
p_ = q_ = 0;
pb_ = pb;
// show_stack_transforms();
int i, j, k, l, c;
return (is_push_const(&i, &c) && rewrite_push_const(i, c)) || (is_nop() && rewrite_nop()) ||
(!(mode_ & 1) && is_const_rot(&c) && rewrite_const_rot(c)) ||
(is_const_push_xchgs() && rewrite_const_push_xchgs()) || (is_const_pop(&c, &i) && rewrite_const_pop(c, i)) ||
(is_xchg(&i, &j) && rewrite(AsmOp::Xchg(i, j))) || (is_push(&i) && rewrite(AsmOp::Push(i))) ||
(is_pop(&i) && rewrite(AsmOp::Pop(i))) || (is_pop_pop(&i, &j) && rewrite(AsmOp::Pop(i), AsmOp::Pop(j))) ||
(is_xchg_xchg(&i, &j, &k, &l) && rewrite(AsmOp::Xchg(i, j), AsmOp::Xchg(k, l))) ||
(!(mode_ & 1) &&
((is_rot() && rewrite(AsmOp::Custom("ROT", 3, 3))) || (is_rotrev() && rewrite(AsmOp::Custom("-ROT", 3, 3))) ||
(is_2dup() && rewrite(AsmOp::Custom("2DUP", 2, 4))) ||
(is_2swap() && rewrite(AsmOp::Custom("2SWAP", 2, 4))) ||
(is_2over() && rewrite(AsmOp::Custom("2OVER", 2, 4))) ||
(is_tuck() && rewrite(AsmOp::Custom("TUCK", 2, 3))) ||
(is_2drop() && rewrite(AsmOp::Custom("2DROP", 2, 0))) || (is_xchg2(&i, &j) && rewrite(AsmOp::Xchg2(i, j))) ||
(is_xcpu(&i, &j) && rewrite(AsmOp::XcPu(i, j))) || (is_puxc(&i, &j) && rewrite(AsmOp::PuXc(i, j))) ||
(is_push2(&i, &j) && rewrite(AsmOp::Push2(i, j))) || (is_blkswap(&i, &j) && rewrite(AsmOp::BlkSwap(i, j))) ||
(is_blkpush(&i, &j) && rewrite(AsmOp::BlkPush(i, j))) || (is_blkdrop(&i) && rewrite(AsmOp::BlkDrop(i))) ||
(is_push_rot(&i) && rewrite(AsmOp::Push(i), AsmOp::Custom("ROT"))) ||
(is_push_rotrev(&i) && rewrite(AsmOp::Push(i), AsmOp::Custom("-ROT"))) ||
(is_push_xchg(&i, &j, &k) && rewrite(AsmOp::Push(i), AsmOp::Xchg(j, k))) ||
(is_reverse(&i, &j) && rewrite(AsmOp::BlkReverse(i, j))) ||
(is_blkdrop2(&i, &j) && rewrite(AsmOp::BlkDrop2(i, j))) ||
(is_nip_seq(&i, &j) && rewrite(AsmOp::Xchg(i, j), AsmOp::BlkDrop(i))) ||
(is_pop_blkdrop(&i, &k) && rewrite(AsmOp::Pop(i), AsmOp::BlkDrop(k))) ||
(is_2pop_blkdrop(&i, &j, &k) && (k >= 3 && k <= 13 && i != j + 1 && i <= 15 && j <= 14
? rewrite(AsmOp::Xchg2(j + 1, i), AsmOp::BlkDrop(k + 2))
: rewrite(AsmOp::Pop(i), AsmOp::Pop(j), AsmOp::BlkDrop(k)))) ||
(is_xchg3(&i, &j, &k) && rewrite(AsmOp::Xchg3(i, j, k))) ||
(is_xc2pu(&i, &j, &k) && rewrite(AsmOp::Xc2Pu(i, j, k))) ||
(is_xcpuxc(&i, &j, &k) && rewrite(AsmOp::XcPuXc(i, j, k))) ||
(is_xcpu2(&i, &j, &k) && rewrite(AsmOp::XcPu2(i, j, k))) ||
(is_puxc2(&i, &j, &k) && rewrite(AsmOp::PuXc2(i, j, k))) ||
(is_puxcpu(&i, &j, &k) && rewrite(AsmOp::PuXcPu(i, j, k))) ||
(is_pu2xc(&i, &j, &k) && rewrite(AsmOp::Pu2Xc(i, j, k))) ||
(is_push3(&i, &j, &k) && rewrite(AsmOp::Push3(i, j, k)))));
}
bool Optimizer::find() {
if (!compute_stack_transforms()) {
return false;
}
for (int pb = l_; pb > 0; --pb) {
if (find_at_least(pb)) {
return true;
}
}
return false;
}
bool Optimizer::optimize() {
bool f = false;
while (find()) {
f = true;
apply();
unpack();
}
return f;
}
AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode) {
Optimizer opt(std::move(op_list), op_rewrite_comments, mode);
opt.optimize();
return opt.extract_code();
}
AsmOpConsList optimize_code(AsmOpConsList op_list, int mode) {
std::vector<std::unique_ptr<AsmOp>> v;
while (op_list) {
if (!op_list->car->is_comment()) {
op_list = optimize_code_head(std::move(op_list), mode);
}
if (op_list) {
v.push_back(std::move(op_list->car));
op_list = std::move(op_list->cdr);
}
}
for (auto it = v.rbegin(); it < v.rend(); ++it) {
op_list = AsmOpCons::cons(std::move(*it), std::move(op_list));
}
return std::move(op_list);
}
void optimize_code(AsmOpList& ops) {
AsmOpConsList op_list;
for (auto it = ops.list_.rbegin(); it < ops.list_.rend(); ++it) {
op_list = AsmOpCons::cons(std::make_unique<AsmOp>(std::move(*it)), std::move(op_list));
}
for (int mode : {1, 1, 1, 1, 0, 0, 0, 0}) {
op_list = optimize_code(std::move(op_list), mode);
}
ops.list_.clear();
while (op_list) {
ops.list_.push_back(std::move(*(op_list->car)));
op_list = std::move(op_list->cdr);
}
}
} // namespace tolk

1809
tolk/parse-tolk.cpp Normal file

File diff suppressed because it is too large Load diff

228
tolk/srcread.cpp Normal file
View file

@ -0,0 +1,228 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "srcread.h"
#include <algorithm>
namespace tolk {
/*
*
* SOURCE FILE READER
*
*/
std::ostream& operator<<(std::ostream& os, const FileDescr* fdescr) {
return os << (fdescr ? (fdescr->is_stdin ? "stdin" : fdescr->filename) : "unknown-location");
}
std::ostream& operator<<(std::ostream& os, const Fatal& fatal) {
return os << fatal.get_msg();
}
const char* FileDescr::convert_offset(long offset, long* line_no, long* line_pos, long* line_size) const {
long lno = 0, lpos = -1, lsize = 0;
const char* lstart = nullptr;
if (offset >= 0 && offset < (long)text.size()) {
auto it = std::upper_bound(line_offs.begin(), line_offs.end(), offset);
lno = it - line_offs.begin();
if (lno && it != line_offs.end()) {
lsize = it[0] - it[-1];
lpos = offset - it[-1];
lstart = text.data() + it[-1];
}
} else {
lno = (long)line_offs.size();
}
if (line_no) {
*line_no = lno;
}
if (line_pos) {
*line_pos = lpos;
}
if (line_size) {
*line_size = lsize;
}
return lstart;
}
const char* FileDescr::push_line(std::string new_line) {
if (line_offs.empty()) {
line_offs.push_back(0);
}
std::size_t cur_size = text.size();
text += new_line;
text += '\0';
line_offs.push_back((long)text.size());
return text.data() + cur_size;
}
void SrcLocation::show(std::ostream& os) const {
os << fdescr;
long line_no, line_pos;
if (fdescr && convert_pos(&line_no, &line_pos)) {
os << ':' << line_no;
if (line_pos >= 0) {
os << ':' << (line_pos + 1);
}
}
}
bool SrcLocation::show_context(std::ostream& os) const {
long line_no, line_pos, line_size;
if (!fdescr || !convert_pos(&line_no, &line_pos, &line_size)) {
return false;
}
bool skip_left = (line_pos > 200), skip_right = (line_pos + 200u < line_size);
const char* here = fdescr->text.data() + char_offs;
const char* base = here - line_pos;
const char* start = skip_left ? here - 100 : base;
const char* end = skip_right ? here + 100 : base + line_size;
os << " ";
if (skip_left) {
os << "... ";
}
for (const char* ptr = start; ptr < end; ptr++) {
os << (char)*ptr;
}
if (skip_right) {
os << " ...";
}
os << std::endl;
os << " ";
if (skip_left) {
os << "... ";
}
for (const char* ptr = start; ptr < here; ptr++) {
char c = *ptr;
os << (c == 9 || c == 10 ? c : ' ');
}
os << '^' << std::endl;
return true;
}
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc) {
loc.show(os);
return os;
}
void SrcLocation::show_gen_error(std::ostream& os, std::string message, std::string err_type) const {
show(os);
if (!err_type.empty()) {
os << ": " << err_type;
}
os << ": " << message << std::endl;
show_context(os);
}
std::ostream& operator<<(std::ostream& os, const Error& error) {
error.show(os);
return os;
}
void ParseError::show(std::ostream& os) const {
os << where << ": error: " << message << std::endl;
where.show_context(os);
}
SourceReader::SourceReader(std::istream* _is, FileDescr* _fdescr)
: ifs(_is), fdescr(_fdescr), loc(_fdescr), eof(false), cur_line_len(0), start(0), cur(0), end(0) {
load_line();
}
void SourceReader::set_eof() {
if (!eof) {
eof = true;
start = cur = end = 0;
}
}
int SourceReader::skip_spc() {
if (!cur) {
return 0;
}
const char* ptr = cur;
int res = 0;
while (*ptr == ' ' || *ptr == 9) {
++ptr;
++res;
}
set_ptr(ptr);
return res;
}
bool SourceReader::seek_eof() {
while (seek_eoln()) {
if (!load_line()) {
return true;
}
}
return false;
}
const char* SourceReader::set_ptr(const char* ptr) {
if (ptr != cur) {
if (ptr < cur || ptr > end) {
error("parsing position went outside of line");
}
loc.char_offs += ptr - cur;
cur = ptr;
}
return ptr;
}
bool SourceReader::load_line() {
if (eof) {
return false;
}
loc.set_eof();
if (ifs->eof()) {
set_eof();
return false;
}
std::getline(*ifs, cur_line);
if (ifs->fail()) {
set_eof();
if (!ifs->eof()) {
error("cannot read line from source stream");
}
return false;
}
std::size_t len = cur_line.size();
if (len > 0xffffff) {
set_eof();
error("line too long");
return false;
}
if (len && cur_line.back() == '\r') {
// CP/M line breaks support
cur_line.pop_back();
--len;
}
cur_line_len = (int)len;
if (fdescr) {
cur = start = fdescr->push_line(std::move(cur_line));
end = start + len;
loc.char_offs = (std::size_t)(cur - fdescr->text.data());
cur_line.clear();
} else {
cur = start = cur_line.c_str();
end = start + cur_line_len;
}
return true;
}
} // namespace tolk

162
tolk/srcread.h Normal file
View file

@ -0,0 +1,162 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <string>
#include <vector>
#include <iostream>
namespace tolk {
/*
*
* SOURCE FILE READER
*
*/
struct FileDescr {
std::string filename;
std::string text;
std::vector<long> line_offs;
bool is_stdin;
bool is_main = false;
FileDescr(std::string _fname, bool _stdin = false) : filename(std::move(_fname)), is_stdin(_stdin) {
}
const char* push_line(std::string new_line);
const char* convert_offset(long offset, long* line_no, long* line_pos, long* line_size = nullptr) const;
};
struct Fatal {
std::string message;
Fatal(std::string _msg) : message(std::move(_msg)) {
}
std::string get_msg() const {
return message;
}
};
std::ostream& operator<<(std::ostream& os, const Fatal& fatal);
struct SrcLocation {
const FileDescr* fdescr;
long char_offs;
SrcLocation() : fdescr(nullptr), char_offs(-1) {
}
SrcLocation(const FileDescr* _fdescr, long offs = -1) : fdescr(_fdescr), char_offs(-1) {
}
bool defined() const {
return fdescr;
}
bool eof() const {
return char_offs == -1;
}
void set_eof() {
char_offs = -1;
}
const char* convert_pos(long* line_no, long* line_pos, long* line_size = nullptr) const {
return defined() ? fdescr->convert_offset(char_offs, line_no, line_pos, line_size) : nullptr;
}
void show(std::ostream& os) const;
bool show_context(std::ostream& os) const;
void show_gen_error(std::ostream& os, std::string message, std::string err_type = "") const;
void show_note(std::string err_msg) const {
show_gen_error(std::cerr, err_msg, "note");
}
void show_warning(std::string err_msg) const {
show_gen_error(std::cerr, err_msg, "warning");
}
void show_error(std::string err_msg) const {
show_gen_error(std::cerr, err_msg, "error");
}
};
std::ostream& operator<<(std::ostream& os, const SrcLocation& loc);
struct Error {
virtual ~Error() = default;
virtual void show(std::ostream& os) const = 0;
};
std::ostream& operator<<(std::ostream& os, const Error& error);
struct ParseError : Error {
SrcLocation where;
std::string message;
ParseError(const SrcLocation& _where, std::string _msg) : where(_where), message(_msg) {
}
ParseError(const SrcLocation* _where, std::string _msg) : message(_msg) {
if (_where) {
where = *_where;
}
}
~ParseError() override = default;
void show(std::ostream& os) const override;
};
class SourceReader {
std::istream* ifs;
FileDescr* fdescr;
SrcLocation loc;
bool eof;
std::string cur_line;
int cur_line_len;
void set_eof();
const char *start, *cur, *end;
public:
SourceReader(std::istream* _is, FileDescr* _fdescr);
bool load_line();
bool is_eof() const {
return eof;
}
int is_eoln() const {
return cur == end;
}
int skip_spc();
bool seek_eoln() {
skip_spc();
return is_eoln();
}
bool seek_eof();
const char* cur_line_cstr() const {
return cur_line.c_str();
}
const SrcLocation& here() const {
return loc;
}
char cur_char() const {
return *cur;
}
char next_char() const {
return cur[1];
}
const char* get_ptr() const {
return cur;
}
const char* get_end_ptr() const {
return end;
}
const char* set_ptr(const char* ptr);
void advance(int n) {
set_ptr(get_ptr() + n);
}
void error(std::string err_msg) {
throw ParseError{loc, err_msg};
}
};
} // namespace tolk

1054
tolk/stack-transform.cpp Normal file

File diff suppressed because it is too large Load diff

179
tolk/symtable.cpp Normal file
View file

@ -0,0 +1,179 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "symtable.h"
#include <sstream>
#include <cassert>
namespace tolk {
/*
*
* SYMBOL VALUES (DECLARED)
*
*/
int scope_level;
SymTable<100003> symbols;
SymDef* sym_def[symbols.hprime + 1];
SymDef* global_sym_def[symbols.hprime + 1];
std::vector<std::pair<int, SymDef>> symbol_stack;
std::vector<SrcLocation> scope_opened_at;
std::string Symbol::unknown_symbol_name(sym_idx_t i) {
if (!i) {
return "_";
} else {
std::ostringstream os;
os << "SYM#" << i;
return os.str();
}
}
sym_idx_t SymTableBase::gen_lookup(std::string str, int mode, sym_idx_t idx) {
unsigned long long h1 = 1, h2 = 1;
for (char c : str) {
h1 = ((h1 * 239) + (unsigned char)(c)) % p;
h2 = ((h2 * 17) + (unsigned char)(c)) % (p - 1);
}
++h2;
++h1;
while (true) {
if (sym_table[h1]) {
if (sym_table[h1]->str == str) {
return (mode & 2) ? not_found : sym_idx_t(h1);
}
h1 += h2;
if (h1 > p) {
h1 -= p;
}
} else {
if (!(mode & 1)) {
return not_found;
}
if (def_sym >= ((long long)p * 3) / 4) {
throw SymTableOverflow{def_sym};
}
sym_table[h1] = std::make_unique<Symbol>(str, idx <= 0 ? sym_idx_t(h1) : -idx);
++def_sym;
return sym_idx_t(h1);
}
}
}
SymTableBase& SymTableBase::add_keyword(std::string str, sym_idx_t idx) {
if (idx <= 0) {
idx = ++def_kw;
}
sym_idx_t res = gen_lookup(str, -1, idx);
if (!res) {
throw SymTableKwRedef{str};
}
if (idx < max_kw_idx) {
keywords[idx] = res;
}
return *this;
}
void open_scope(Lexer& lex) {
++scope_level;
scope_opened_at.push_back(lex.cur().loc);
}
void close_scope(Lexer& lex) {
if (!scope_level) {
throw Fatal{"cannot close the outer scope"};
}
while (!symbol_stack.empty() && symbol_stack.back().first == scope_level) {
SymDef old_def = symbol_stack.back().second;
auto idx = old_def.sym_idx;
symbol_stack.pop_back();
SymDef* cur_def = sym_def[idx];
assert(cur_def);
assert(cur_def->level == scope_level && cur_def->sym_idx == idx);
//std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl;
if (cur_def->value) {
//std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl;
delete cur_def->value;
}
if (!old_def.level && !old_def.value) {
delete cur_def; // ??? keep the definition always?
sym_def[idx] = nullptr;
} else {
cur_def->value = std::move(old_def.value);
cur_def->level = old_def.level;
}
old_def.value = nullptr;
}
--scope_level;
scope_opened_at.pop_back();
}
SymDef* lookup_symbol(sym_idx_t idx, int flags) {
if (!idx) {
return nullptr;
}
if ((flags & 1) && sym_def[idx]) {
return sym_def[idx];
}
if ((flags & 2) && global_sym_def[idx]) {
return global_sym_def[idx];
}
return nullptr;
}
SymDef* lookup_symbol(std::string name, int flags) {
return lookup_symbol(symbols.lookup(name), flags);
}
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) {
if (!name_idx) {
return nullptr;
}
auto found = global_sym_def[name_idx];
if (found) {
return force_new && found->value ? nullptr : found;
}
return global_sym_def[name_idx] = new SymDef(0, name_idx, loc);
}
SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) {
if (!name_idx) {
return nullptr;
}
if (!scope_level) {
return define_global_symbol(name_idx, force_new, loc);
}
auto found = sym_def[name_idx];
if (found) {
if (found->level < scope_level) {
symbol_stack.push_back(std::make_pair(scope_level, *found));
found->level = scope_level;
} else if (found->value && force_new) {
return nullptr;
}
found->value = 0;
found->loc = loc;
return found;
}
found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc);
symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx}));
return found;
}
} // namespace tolk

175
tolk/symtable.h Normal file
View file

@ -0,0 +1,175 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "srcread.h"
#include "lexer.h"
#include <vector>
namespace tolk {
/*
*
* SYMBOL VALUES (DECLARED)
*
*/
typedef int var_idx_t;
struct SymValBase {
enum { _Param, _Var, _Func, _Typename, _GlobVar, _Const };
int type;
int idx;
SymValBase(int _type, int _idx) : type(_type), idx(_idx) {
}
virtual ~SymValBase() = default;
};
/*
*
* SYMBOL TABLE
*
*/
// defined outside this module (by the end user)
int compute_symbol_subclass(std::string str); // return 0 if unneeded
typedef int sym_idx_t;
struct Symbol {
std::string str;
sym_idx_t idx;
int subclass;
Symbol(std::string _str, sym_idx_t _idx, int _sc) : str(_str), idx(_idx), subclass(_sc) {
}
Symbol(std::string _str, sym_idx_t _idx) : str(_str), idx(_idx) {
subclass = compute_symbol_subclass(std::move(_str));
}
static std::string unknown_symbol_name(sym_idx_t i);
};
class SymTableBase {
unsigned p;
std::unique_ptr<Symbol>* sym_table;
sym_idx_t def_kw, def_sym;
static constexpr int max_kw_idx = 10000;
sym_idx_t keywords[max_kw_idx];
public:
SymTableBase(unsigned p_, std::unique_ptr<Symbol>* sym_table_)
: p(p_), sym_table(sym_table_), def_kw(0x100), def_sym(0) {
std::memset(keywords, 0, sizeof(keywords));
}
static constexpr sym_idx_t not_found = 0;
SymTableBase& add_keyword(std::string str, sym_idx_t idx = 0);
SymTableBase& add_kw_char(char c) {
return add_keyword(std::string{c}, c);
}
sym_idx_t lookup(std::string str, int mode = 0) {
return gen_lookup(str, mode);
}
sym_idx_t lookup_add(std::string str) {
return gen_lookup(str, 1);
}
Symbol* operator[](sym_idx_t i) const {
return sym_table[i].get();
}
bool is_keyword(sym_idx_t i) const {
return sym_table[i] && sym_table[i]->idx < 0;
}
std::string get_name(sym_idx_t i) const {
return sym_table[i] ? sym_table[i]->str : Symbol::unknown_symbol_name(i);
}
int get_subclass(sym_idx_t i) const {
return sym_table[i] ? sym_table[i]->subclass : 0;
}
Symbol* get_keyword(int i) const {
return ((unsigned)i < (unsigned)max_kw_idx) ? sym_table[keywords[i]].get() : nullptr;
}
protected:
sym_idx_t gen_lookup(std::string str, int mode = 0, sym_idx_t idx = 0);
};
template <unsigned pp>
class SymTable : public SymTableBase {
public:
static constexpr int hprime = pp;
static int size() {
return pp + 1;
}
private:
std::unique_ptr<Symbol> sym[pp + 1];
public:
SymTable() : SymTableBase(pp, sym) {
}
SymTable& add_keyword(std::string str, sym_idx_t idx = 0) {
SymTableBase::add_keyword(str, idx);
return *this;
}
SymTable& add_kw_char(char c) {
return add_keyword(std::string{c}, c);
}
};
struct SymTableOverflow {
int sym_def;
SymTableOverflow(int x) : sym_def(x) {
}
};
struct SymTableKwRedef {
std::string kw;
SymTableKwRedef(std::string _kw) : kw(_kw) {
}
};
extern SymTable<100003> symbols;
extern int scope_level;
struct SymDef {
int level;
sym_idx_t sym_idx;
SymValBase* value;
SrcLocation loc;
SymDef(int lvl, sym_idx_t idx, const SrcLocation& _loc = {}, SymValBase* val = 0)
: level(lvl), sym_idx(idx), value(val), loc(_loc) {
}
bool has_name() const {
return sym_idx;
}
std::string name() const {
return symbols.get_name(sym_idx);
}
};
extern SymDef* sym_def[symbols.hprime + 1];
extern SymDef* global_sym_def[symbols.hprime + 1];
extern std::vector<std::pair<int, SymDef>> symbol_stack;
extern std::vector<SrcLocation> scope_opened_at;
void open_scope(Lexer& lex);
void close_scope(Lexer& lex);
SymDef* lookup_symbol(sym_idx_t idx, int flags = 3);
SymDef* lookup_symbol(std::string name, int flags = 3);
SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {});
SymDef* define_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {});
} // namespace tolk

122
tolk/tolk-main.cpp Normal file
View file

@ -0,0 +1,122 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include <getopt.h>
#include <fstream>
#include "git.h"
void usage(const char* progname) {
std::cerr
<< "usage: " << progname
<< " [-vIAPSR][-O<level>][-i<indent-spc>][-o<output-filename>][-W<boc-filename>] {<filename.tolk> ...}\n"
"\tGenerates Fift TVM assembler code from a Tolk source\n"
"-I\tEnables interactive mode (parse stdin)\n"
"-o<fift-output-filename>\tWrites generated code into specified file instead of stdout\n"
"-v\tIncreases verbosity level (extra information output into stderr)\n"
"-i<indent>\tSets indentation for the output code (in two-space units)\n"
"-A\tPrefix code with `\"Asm.fif\" include` preamble\n"
"-O<level>\tSets optimization level (2 by default)\n"
"-P\tEnvelope code into PROGRAM{ ... }END>c\n"
"-S\tInclude stack layout comments in the output code\n"
"-R\tInclude operation rewrite comments in the output code\n"
"-W<output-boc-file>\tInclude Fift code to serialize and save generated code into specified BoC file. Enables "
"-A and -P.\n"
"\t-s\tOutput semantic version of Tolk and exit\n"
"\t-V<version>\tShow Tolk build information\n";
std::exit(2);
}
int main(int argc, char* const argv[]) {
int i;
std::string output_filename;
while ((i = getopt(argc, argv, "Ahi:Io:O:PRsSvW:V")) != -1) {
switch (i) {
case 'A':
tolk::asm_preamble = true;
break;
case 'I':
tolk::interactive = true;
break;
case 'i':
tolk::indent = std::max(0, atoi(optarg));
break;
case 'o':
output_filename = optarg;
break;
case 'O':
tolk::opt_level = std::max(0, atoi(optarg));
break;
case 'P':
tolk::program_envelope = true;
break;
case 'R':
tolk::op_rewrite_comments = true;
break;
case 'S':
tolk::stack_layout_comments = true;
break;
case 'v':
++tolk::verbosity;
break;
case 'W':
tolk::boc_output_filename = optarg;
tolk::asm_preamble = tolk::program_envelope = true;
break;
case 's':
std::cout << tolk::tolk_version << "\n";
std::exit(0);
case 'V':
std::cout << "Tolk semantic version: v" << tolk::tolk_version << "\n";
std::cout << "Build information: [ Commit: " << GitMetadata::CommitSHA1() << ", Date: " << GitMetadata::CommitDate() << "]\n";
std::exit(0);
case 'h':
default:
usage(argv[0]);
}
}
std::ostream *outs = &std::cout;
std::unique_ptr<std::fstream> fs;
if (!output_filename.empty()) {
fs = std::make_unique<std::fstream>(output_filename, std::fstream::trunc | std::fstream::out);
if (!fs->is_open()) {
std::cerr << "failed to create output file " << output_filename << '\n';
return 2;
}
outs = fs.get();
}
std::vector<std::string> sources;
while (optind < argc) {
sources.push_back(std::string(argv[optind++]));
}
tolk::read_callback = tolk::fs_read_callback;
return tolk::tolk_proceed(sources, *outs, std::cerr);
}

148
tolk/tolk-wasm.cpp Normal file
View file

@ -0,0 +1,148 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include "git.h"
#include "td/utils/JsonBuilder.h"
#include "fift/utils.h"
#include "td/utils/base64.h"
#include "td/utils/Status.h"
#include <sstream>
#include <iomanip>
td::Result<std::string> compile_internal(char *config_json) {
TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json)))
auto &obj = input_json.get_object();
TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false));
TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false));
auto &sources_arr = sources_obj.get_array();
std::vector<std::string> sources;
for (auto &item : sources_arr) {
sources.push_back(item.get_string().str());
}
tolk::opt_level = std::max(0, opt_level);
tolk::program_envelope = true;
tolk::verbosity = 0;
tolk::indent = 1;
std::ostringstream outs, errs;
auto compile_res = tolk::tolk_proceed(sources, outs, errs);
if (compile_res != 0) {
return td::Status::Error(std::string("Tolk compilation error: ") + errs.str());
}
TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false));
TRY_RESULT(boc, vm::std_boc_serialize(code_cell));
td::JsonBuilder result_json;
auto result_obj = result_json.enter_object();
result_obj("status", "ok");
result_obj("codeBoc", td::base64_encode(boc));
result_obj("fiftCode", outs.str());
result_obj("codeHashHex", code_cell->get_hash().to_hex());
result_obj.leave();
outs.clear();
errs.clear();
return result_json.string_builder().as_cslice().str();
}
/// Callback used to retrieve additional source files or data.
///
/// @param _kind The kind of callback (a string).
/// @param _data The data for the callback (a string).
/// @param o_contents A pointer to the contents of the file, if found. Allocated via malloc().
/// @param o_error A pointer to an error message, if there is one. Allocated via malloc().
///
/// The callback implementor must use malloc() to allocate storage for
/// contents or error. The callback implementor must use free() to free
/// said storage after tolk_compile returns.
///
/// If the callback is not supported, *o_contents and *o_error must be set to NULL.
typedef void (*CStyleReadFileCallback)(char const* _kind, char const* _data, char** o_contents, char** o_error);
tolk::ReadCallback::Callback wrapReadCallback(CStyleReadFileCallback _readCallback)
{
tolk::ReadCallback::Callback readCallback;
if (_readCallback) {
readCallback = [=](tolk::ReadCallback::Kind _kind, char const* _data) -> td::Result<std::string> {
char* contents_c = nullptr;
char* error_c = nullptr;
_readCallback(tolk::ReadCallback::kindString(_kind).data(), _data, &contents_c, &error_c);
if (!contents_c && !error_c) {
return td::Status::Error("Callback not supported");
}
if (contents_c) {
return contents_c;
}
return td::Status::Error(std::string(error_c));
};
}
return readCallback;
}
extern "C" {
const char* version() {
auto version_json = td::JsonBuilder();
auto obj = version_json.enter_object();
obj("tolkVersion", tolk::tolk_version);
obj("tolkFiftLibCommitHash", GitMetadata::CommitSHA1());
obj("tolkFiftLibCommitDate", GitMetadata::CommitDate());
obj.leave();
return strdup(version_json.string_builder().as_cslice().c_str());
}
const char *tolk_compile(char *config_json, CStyleReadFileCallback callback) {
if (callback) {
tolk::read_callback = wrapReadCallback(callback);
} else {
tolk::read_callback = tolk::fs_read_callback;
}
auto res = compile_internal(config_json);
if (res.is_error()) {
auto result = res.move_as_error();
auto error_res = td::JsonBuilder();
auto error_o = error_res.enter_object();
error_o("status", "error");
error_o("message", result.message().str());
error_o.leave();
return strdup(error_res.string_builder().as_cslice().c_str());
}
auto res_string = res.move_as_ok();
return strdup(res_string.c_str());
}
}

260
tolk/tolk.cpp Normal file
View file

@ -0,0 +1,260 @@
/*
This file is part of TON Blockchain source code.
TON Blockchain is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
TON Blockchain is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
You must obey the GNU General Public License in all respects for all
of the code used other than OpenSSL. If you modify file(s) with this
exception, you may extend this exception to your version of the file(s),
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version. If you delete this exception statement
from all source files in the program, then also delete it here.
*/
#include "tolk.h"
#include "srcread.h"
#include "lexer.h"
#include <getopt.h>
#include "git.h"
#include <fstream>
#include "td/utils/port/path.h"
namespace tolk {
int verbosity, indent, opt_level = 2;
bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble;
bool interactive = false;
GlobalPragma pragma_allow_post_modification{"allow-post-modification"};
GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"};
std::string generated_from, boc_output_filename;
ReadCallback::Callback read_callback;
td::Result<std::string> fs_read_callback(ReadCallback::Kind kind, const char* query) {
switch (kind) {
case ReadCallback::Kind::ReadFile: {
std::ifstream ifs{query};
if (ifs.fail()) {
auto msg = std::string{"cannot open source file `"} + query + "`";
return td::Status::Error(msg);
}
std::stringstream ss;
ss << ifs.rdbuf();
return ss.str();
}
case ReadCallback::Kind::Realpath: {
return td::realpath(td::CSlice(query));
}
default: {
return td::Status::Error("Unknown query kind");
}
}
}
/*
*
* OUTPUT CODE GENERATOR
*
*/
void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &errs) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
tolk_assert(func_val);
std::string name = symbols.get_name(func_sym->sym_idx);
if (verbosity >= 2) {
errs << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl;
}
if (!func_val->code) {
errs << "( function `" << name << "` undefined )\n";
throw ParseError(func_sym->loc, name);
} else {
CodeBlob& code = *(func_val->code);
if (verbosity >= 3) {
code.print(errs, 9);
}
code.simplify_var_types();
if (verbosity >= 5) {
errs << "after simplify_var_types: \n";
code.print(errs, 0);
}
code.prune_unreachable_code();
if (verbosity >= 5) {
errs << "after prune_unreachable: \n";
code.print(errs, 0);
}
code.split_vars(true);
if (verbosity >= 5) {
errs << "after split_vars: \n";
code.print(errs, 0);
}
for (int i = 0; i < 8; i++) {
code.compute_used_code_vars();
if (verbosity >= 4) {
errs << "after compute_used_vars: \n";
code.print(errs, 6);
}
code.fwd_analyze();
if (verbosity >= 5) {
errs << "after fwd_analyze: \n";
code.print(errs, 6);
}
code.prune_unreachable_code();
if (verbosity >= 5) {
errs << "after prune_unreachable: \n";
code.print(errs, 6);
}
}
code.mark_noreturn();
if (verbosity >= 3) {
code.print(errs, 15);
}
if (verbosity >= 2) {
errs << "\n---------- resulting code for " << name << " -------------\n";
}
bool inline_func = (func_val->flags & 1);
bool inline_ref = (func_val->flags & 2);
const char* modifier = "";
if (inline_func) {
modifier = "INLINE";
} else if (inline_ref) {
modifier = "REF";
}
outs << std::string(indent * 2, ' ') << name << " PROC" << modifier << ":<{\n";
int mode = 0;
if (stack_layout_comments) {
mode |= Stack::_StkCmt | Stack::_CptStkCmt;
}
if (opt_level < 2) {
mode |= Stack::_DisableOpt;
}
auto fv = dynamic_cast<const SymValCodeFunc*>(func_sym->value);
// Flags: 1 - inline, 2 - inline_ref
if (fv && (fv->flags & 1) && code.ops->noreturn()) {
mode |= Stack::_InlineFunc;
}
if (fv && (fv->flags & 3)) {
mode |= Stack::_InlineAny;
}
code.generate_code(outs, mode, indent + 1);
outs << std::string(indent * 2, ' ') << "}>\n";
if (verbosity >= 2) {
errs << "--------------\n";
}
}
}
int generate_output(std::ostream &outs, std::ostream &errs) {
if (asm_preamble) {
outs << "\"Asm.fif\" include\n";
}
outs << "// automatically generated from " << generated_from << std::endl;
if (program_envelope) {
outs << "PROGRAM{\n";
}
for (SymDef* func_sym : glob_func) {
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
tolk_assert(func_val);
std::string name = symbols.get_name(func_sym->sym_idx);
outs << std::string(indent * 2, ' ');
if (func_val->method_id.is_null()) {
outs << "DECLPROC " << name << "\n";
} else {
outs << func_val->method_id << " DECLMETHOD " << name << "\n";
}
}
for (SymDef* gvar_sym : glob_vars) {
tolk_assert(dynamic_cast<SymValGlobVar*>(gvar_sym->value));
std::string name = symbols.get_name(gvar_sym->sym_idx);
outs << std::string(indent * 2, ' ') << "DECLGLOBVAR " << name << "\n";
}
int errors = 0;
for (SymDef* func_sym : glob_func) {
try {
generate_output_func(func_sym, outs, errs);
} catch (Error& err) {
errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n"
<< err << std::endl;
++errors;
}
}
if (program_envelope) {
outs << "}END>c\n";
}
if (!boc_output_filename.empty()) {
outs << "2 boc+>B \"" << boc_output_filename << "\" B>file\n";
}
return errors;
}
void output_inclusion_stack(std::ostream &errs) {
while (!inclusion_locations.empty()) {
SrcLocation loc = inclusion_locations.top();
inclusion_locations.pop();
if (loc.fdescr) {
errs << "note: included from ";
loc.show(errs);
errs << std::endl;
}
}
}
int tolk_proceed(const std::vector<std::string> &sources, std::ostream &outs, std::ostream &errs) {
if (program_envelope && !indent) {
indent = 1;
}
define_keywords();
define_builtins();
int ok = 0, proc = 0;
try {
for (auto src : sources) {
ok += parse_source_file(src.c_str(), {}, true);
proc++;
}
if (interactive) {
generated_from += "stdin ";
ok += parse_source_stdin();
proc++;
}
if (ok < proc) {
throw Fatal{"output code generation omitted because of errors"};
}
if (!proc) {
throw Fatal{"no source files, no output"};
}
pragma_allow_post_modification.check_enable_in_libs();
pragma_compute_asm_ltr.check_enable_in_libs();
return generate_output(outs, errs);
} catch (Fatal& fatal) {
errs << "fatal: " << fatal << std::endl;
output_inclusion_stack(errs);
return 2;
} catch (Error& error) {
errs << error << std::endl;
output_inclusion_stack(errs);
return 2;
} catch (UnifyError& unif_err) {
errs << "fatal: ";
unif_err.print_message(errs);
errs << std::endl;
output_inclusion_stack(errs);
return 2;
}
return 0;
}
} // namespace tolk

1785
tolk/tolk.h Normal file

File diff suppressed because it is too large Load diff

429
tolk/unify-types.cpp Normal file
View file

@ -0,0 +1,429 @@
/*
This file is part of TON Blockchain Library.
TON Blockchain Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
TON Blockchain Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tolk.h"
namespace tolk {
/*
*
* TYPE EXPRESSIONS
*
*/
int TypeExpr::holes = 0, TypeExpr::type_vars = 0; // not thread safe, but it is ok for now
void TypeExpr::compute_width() {
switch (constr) {
case te_Atomic:
case te_Map:
minw = maxw = 1;
break;
case te_Tensor:
minw = maxw = 0;
for (TypeExpr* arg : args) {
minw += arg->minw;
maxw += arg->maxw;
}
if (minw > w_inf) {
minw = w_inf;
}
if (maxw > w_inf) {
maxw = w_inf;
}
break;
case te_Tuple:
minw = maxw = 1;
for (TypeExpr* arg : args) {
arg->compute_width();
}
break;
case te_Indirect:
minw = args[0]->minw;
maxw = args[0]->maxw;
break;
default:
minw = 0;
maxw = w_inf;
break;
}
}
bool TypeExpr::recompute_width() {
switch (constr) {
case te_Tensor:
case te_Indirect: {
int min = 0, max = 0;
for (TypeExpr* arg : args) {
min += arg->minw;
max += arg->maxw;
}
if (min > maxw || max < minw) {
return false;
}
if (min > w_inf) {
min = w_inf;
}
if (max > w_inf) {
max = w_inf;
}
if (minw < min) {
minw = min;
}
if (maxw > max) {
maxw = max;
}
return true;
}
case te_Tuple: {
for (TypeExpr* arg : args) {
if (arg->minw > 1 || arg->maxw < 1 || arg->minw > arg->maxw) {
return false;
}
}
return true;
}
default:
return false;
}
}
int TypeExpr::extract_components(std::vector<TypeExpr*>& comp_list) {
if (constr != te_Indirect && constr != te_Tensor) {
comp_list.push_back(this);
return 1;
}
int res = 0;
for (TypeExpr* arg : args) {
res += arg->extract_components(comp_list);
}
return res;
}
TypeExpr* TypeExpr::new_map(TypeExpr* from, TypeExpr* to) {
return new TypeExpr{te_Map, std::vector<TypeExpr*>{from, to}};
}
void TypeExpr::replace_with(TypeExpr* te2) {
if (te2 == this) {
return;
}
constr = te_Indirect;
value = 0;
minw = te2->minw;
maxw = te2->maxw;
args.clear();
args.push_back(te2);
}
bool TypeExpr::remove_indirect(TypeExpr*& te, TypeExpr* forbidden) {
tolk_assert(te);
while (te->constr == te_Indirect) {
te = te->args[0];
}
if (te->constr == te_Unknown) {
return te != forbidden;
}
bool res = true;
for (auto& x : te->args) {
res &= remove_indirect(x, forbidden);
}
return res;
}
std::vector<TypeExpr*> TypeExpr::remove_forall(TypeExpr*& te) {
tolk_assert(te && te->constr == te_ForAll);
tolk_assert(te->args.size() >= 1);
std::vector<TypeExpr*> new_vars;
for (std::size_t i = 1; i < te->args.size(); i++) {
new_vars.push_back(new_hole(1));
}
TypeExpr* te2 = te;
// std::cerr << "removing universal quantifier in " << te << std::endl;
te = te->args[0];
remove_forall_in(te, te2, new_vars);
// std::cerr << "-> " << te << std::endl;
return new_vars;
}
bool TypeExpr::remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector<TypeExpr*>& new_vars) {
tolk_assert(te);
tolk_assert(te2 && te2->constr == te_ForAll);
if (te->constr == te_Var) {
for (std::size_t i = 0; i < new_vars.size(); i++) {
if (te == te2->args[i + 1]) {
te = new_vars[i];
return true;
}
}
return false;
}
if (te->constr == te_ForAll) {
return false;
}
if (te->args.empty()) {
return false;
}
auto te1 = new TypeExpr(*te);
bool res = false;
for (auto& arg : te1->args) {
res |= remove_forall_in(arg, te2, new_vars);
}
if (res) {
te = te1;
} else {
delete te1;
}
return res;
}
void TypeExpr::show_width(std::ostream& os) {
os << minw;
if (maxw != minw) {
os << "..";
if (maxw < w_inf) {
os << maxw;
}
}
}
std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr) {
if (!type_expr) {
return os << "(null-type-ptr)";
}
return type_expr->print(os);
}
std::ostream& TypeExpr::print(std::ostream& os, int lex_level) {
switch (constr) {
case te_Unknown:
return os << "??" << value;
case te_Var:
if (value >= -26 && value < 0) {
return os << "_" << (char)(91 + value);
} else if (value >= 0 && value < 26) {
return os << (char)(65 + value);
} else {
return os << "TVAR" << value;
}
case te_Indirect:
return os << args[0];
case te_Atomic: {
switch (value) {
case _Int:
return os << "int";
case _Cell:
return os << "cell";
case _Slice:
return os << "slice";
case _Builder:
return os << "builder";
case _Cont:
return os << "cont";
case _Tuple:
return os << "tuple";
case _Type:
return os << "type";
default:
return os << "atomic-type-" << value;
}
}
case te_Tensor: {
if (lex_level > -127) {
os << "(";
}
auto c = args.size();
if (c) {
for (const auto& x : args) {
x->print(os);
if (--c) {
os << ", ";
}
}
}
if (lex_level > -127) {
os << ")";
}
return os;
}
case te_Tuple: {
os << "[";
auto c = args.size();
if (c == 1 && args[0]->constr == te_Tensor) {
args[0]->print(os, -127);
} else if (c) {
for (const auto& x : args) {
x->print(os);
if (--c) {
os << ", ";
}
}
}
return os << "]";
}
case te_Map: {
tolk_assert(args.size() == 2);
if (lex_level > 0) {
os << "(";
}
args[0]->print(os, 1);
os << " -> ";
args[1]->print(os);
if (lex_level > 0) {
os << ")";
}
return os;
}
case te_ForAll: {
tolk_assert(args.size() >= 1);
if (lex_level > 0) {
os << '(';
}
os << "Forall ";
for (std::size_t i = 1; i < args.size(); i++) {
os << (i > 1 ? ' ' : '(');
args[i]->print(os);
}
os << ") ";
args[0]->print(os);
if (lex_level > 0) {
os << ')';
}
return os;
}
default:
return os << "unknown-type-expr-" << constr;
}
}
void UnifyError::print_message(std::ostream& os) const {
os << "cannot unify type " << te1 << " with " << te2;
if (!msg.empty()) {
os << ": " << msg;
}
}
std::ostream& operator<<(std::ostream& os, const UnifyError& ue) {
ue.print_message(os);
return os;
}
std::string UnifyError::message() const {
std::ostringstream os;
print_message(os);
return os.str();
}
void check_width_compat(TypeExpr* te1, TypeExpr* te2) {
if (te1->minw > te2->maxw || te2->minw > te1->maxw) {
std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate};
te1->show_width(os);
os << " and ";
te2->show_width(os);
throw UnifyError{te1, te2, os.str()};
}
}
void check_update_widths(TypeExpr* te1, TypeExpr* te2) {
check_width_compat(te1, te2);
te1->minw = te2->minw = std::max(te1->minw, te2->minw);
te1->maxw = te2->maxw = std::min(te1->maxw, te2->maxw);
tolk_assert(te1->minw <= te1->maxw);
}
void unify(TypeExpr*& te1, TypeExpr*& te2) {
tolk_assert(te1 && te2);
// std::cerr << "unify( " << te1 << " , " << te2 << " )\n";
while (te1->constr == TypeExpr::te_Indirect) {
te1 = te1->args[0];
}
while (te2->constr == TypeExpr::te_Indirect) {
te2 = te2->args[0];
}
if (te1 == te2) {
return;
}
if (te1->constr == TypeExpr::te_ForAll) {
TypeExpr* te = te1;
std::vector<TypeExpr*> new_vars = TypeExpr::remove_forall(te);
for (TypeExpr* t : new_vars) {
t->was_forall_var = true;
}
unify(te, te2);
for (TypeExpr* t : new_vars) {
t->was_forall_var = false;
}
return;
}
if (te2->constr == TypeExpr::te_ForAll) {
TypeExpr* te = te2;
std::vector<TypeExpr*> new_vars = TypeExpr::remove_forall(te);
for (TypeExpr* t : new_vars) {
t->was_forall_var = true;
}
unify(te1, te);
for (TypeExpr* t : new_vars) {
t->was_forall_var = false;
}
return;
}
if (te1->was_forall_var && te2->constr == TypeExpr::te_Tensor) {
throw UnifyError{te1, te2, "cannot unify generic type and tensor"};
}
if (te2->was_forall_var && te1->constr == TypeExpr::te_Tensor) {
throw UnifyError{te2, te1, "cannot unify generic type and tensor"};
}
if (te1->constr == TypeExpr::te_Unknown) {
if (te2->constr == TypeExpr::te_Unknown) {
tolk_assert(te1->value != te2->value);
}
if (!TypeExpr::remove_indirect(te2, te1)) {
throw UnifyError{te1, te2, "type unification results in an infinite cyclic type"};
}
check_update_widths(te1, te2);
te1->replace_with(te2);
te1 = te2;
return;
}
if (te2->constr == TypeExpr::te_Unknown) {
if (!TypeExpr::remove_indirect(te1, te2)) {
throw UnifyError{te2, te1, "type unification results in an infinite cyclic type"};
}
check_update_widths(te2, te1);
te2->replace_with(te1);
te2 = te1;
return;
}
if (te1->constr != te2->constr || te1->value != te2->value || te1->args.size() != te2->args.size()) {
throw UnifyError{te1, te2};
}
for (std::size_t i = 0; i < te1->args.size(); i++) {
unify(te1->args[i], te2->args[i]);
}
if (te1->constr == TypeExpr::te_Tensor) {
if (!te1->recompute_width()) {
throw UnifyError{te1, te2, "type unification incompatible with known width of first type"};
}
if (!te2->recompute_width()) {
throw UnifyError{te2, te1, "type unification incompatible with known width of first type"};
}
check_update_widths(te1, te2);
}
te1->replace_with(te2);
te1 = te2;
}
} // namespace tolk