mirror of
https://github.com/ton-blockchain/ton
synced 2025-02-13 03:32:22 +00:00
f0e6470d0b
A new lexer is noticeably faster and memory efficient (although splitting a file to tokens is negligible in a whole pipeline). But the purpose of rewriting lexer was not just to speed up, but to allow writing code without spaces: `2+2` is now 4, not a valid identifier as earlier. The variety of symbols allowed in identifier has greatly reduced and is now similar to other languages. SrcLocation became 8 bytes on stack everywhere. Command-line flags were also reworked: - the input for Tolk compiler is only a single file now, it's parsed, and parsing continues while new #include are resolved - flags like -A -P and so on are no more needed, actually
349 lines
11 KiB
C++
349 lines
11 KiB
C++
/*
|
|
This file is part of TON Blockchain source code.
|
|
|
|
TON Blockchain is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
TON Blockchain is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with TON Blockchain. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
In addition, as a special exception, the copyright holders give permission
|
|
to link the code of portions of this program with the OpenSSL library.
|
|
You must obey the GNU General Public License in all respects for all
|
|
of the code used other than OpenSSL. If you modify file(s) with this
|
|
exception, you may extend this exception to your version of the file(s),
|
|
but you are not obligated to do so. If you do not wish to do so, delete this
|
|
exception statement from your version. If you delete this exception statement
|
|
from all source files in the program, then also delete it here.
|
|
*/
|
|
#include "tolk.h"
|
|
#include "lexer.h"
|
|
#include <getopt.h>
|
|
#include "git.h"
|
|
#include <fstream>
|
|
#include "td/utils/port/path.h"
|
|
#include <sys/stat.h>
|
|
|
|
namespace tolk {
|
|
|
|
int verbosity = 0, opt_level = 2;
|
|
bool stack_layout_comments = true;
|
|
GlobalPragma pragma_allow_post_modification{"allow-post-modification"};
|
|
GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"};
|
|
GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"};
|
|
std::string generated_from, boc_output_filename;
|
|
ReadCallback::Callback read_callback;
|
|
|
|
// returns argument type of a function
|
|
// note, that when a function has multiple arguments, its arg type is a tensor (no arguments — an empty tensor)
|
|
// in other words, `f(int a, int b)` and `f((int,int) ab)` is the same when we speak about types
|
|
const TypeExpr *SymValFunc::get_arg_type() const {
|
|
if (!sym_type)
|
|
return nullptr;
|
|
|
|
tolk_assert(sym_type->constr == TypeExpr::te_Map || sym_type->constr == TypeExpr::te_ForAll);
|
|
const TypeExpr *te_map = sym_type->constr == TypeExpr::te_ForAll ? sym_type->args[0] : sym_type;
|
|
const TypeExpr *arg_type = te_map->args[0];
|
|
|
|
while (arg_type->constr == TypeExpr::te_Indirect) {
|
|
arg_type = arg_type->args[0];
|
|
}
|
|
return arg_type;
|
|
}
|
|
|
|
|
|
bool SymValCodeFunc::does_need_codegen() const {
|
|
// when a function is declared, but not referenced from code in any way, don't generate its body
|
|
if (!is_really_used && pragma_remove_unused_functions.enabled()) {
|
|
return false;
|
|
}
|
|
// when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist
|
|
if (flags & flagUsedAsNonCall) {
|
|
return true;
|
|
}
|
|
// when a function f() is just `return anotherF(...args)`, it doesn't need to be codegenerated at all,
|
|
// since all its usages are inlined
|
|
return !is_just_wrapper_for_another_f();
|
|
// in the future, we may want to implement a true AST inlining for `inline` functions also
|
|
}
|
|
|
|
void GlobalPragma::enable(SrcLocation loc) {
|
|
if (deprecated_from_v_) {
|
|
loc.show_warning(PSTRING() << "#pragma " << name_ <<
|
|
" is deprecated since Tolk v" << deprecated_from_v_ <<
|
|
". Please, remove this line from your code.");
|
|
return;
|
|
}
|
|
if (!loc.get_src_file()->is_entrypoint_file()) {
|
|
// todo generally it's not true; rework pragmas completely
|
|
loc.show_warning(PSTRING() << "#pragma " << name_ <<
|
|
" should be used in the main file only.");
|
|
}
|
|
|
|
enabled_ = true;
|
|
}
|
|
|
|
void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) {
|
|
deprecated_from_v_ = deprecated_from_v;
|
|
enabled_ = true;
|
|
}
|
|
|
|
td::Result<std::string> fs_read_callback(ReadCallback::Kind kind, const char* query) {
|
|
switch (kind) {
|
|
case ReadCallback::Kind::ReadFile: {
|
|
struct stat f_stat;
|
|
int res = stat(query, &f_stat);
|
|
if (res != 0) {
|
|
return td::Status::Error(std::string{"cannot open source file: "} + query);
|
|
}
|
|
|
|
size_t file_size = static_cast<size_t>(f_stat.st_size);
|
|
std::string str;
|
|
str.resize(file_size);
|
|
FILE* f = fopen(query, "r");
|
|
fread(str.data(), file_size, 1, f);
|
|
fclose(f);
|
|
return std::move(str);
|
|
}
|
|
case ReadCallback::Kind::Realpath: {
|
|
return td::realpath(td::CSlice(query));
|
|
}
|
|
default: {
|
|
return td::Status::Error("Unknown query kind");
|
|
}
|
|
}
|
|
}
|
|
|
|
void mark_function_used_dfs(const std::unique_ptr<Op>& op);
|
|
|
|
void mark_function_used(SymValCodeFunc* func_val) {
|
|
if (!func_val->code || func_val->is_really_used) { // already handled
|
|
return;
|
|
}
|
|
|
|
func_val->is_really_used = true;
|
|
mark_function_used_dfs(func_val->code->ops);
|
|
}
|
|
|
|
void mark_global_var_used(SymValGlobVar* glob_val) {
|
|
glob_val->is_really_used = true;
|
|
}
|
|
|
|
void mark_function_used_dfs(const std::unique_ptr<Op>& op) {
|
|
if (!op) {
|
|
return;
|
|
}
|
|
// op->fun_ref, despite its name, may actually ref global var
|
|
// note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar
|
|
// (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also)
|
|
if (op->fun_ref) {
|
|
if (auto* func_val = dynamic_cast<SymValCodeFunc*>(op->fun_ref->value)) {
|
|
mark_function_used(func_val);
|
|
} else if (auto* glob_val = dynamic_cast<SymValGlobVar*>(op->fun_ref->value)) {
|
|
mark_global_var_used(glob_val);
|
|
} else if (auto* asm_val = dynamic_cast<SymValAsmFunc*>(op->fun_ref->value)) {
|
|
} else {
|
|
tolk_assert(false);
|
|
}
|
|
}
|
|
mark_function_used_dfs(op->next);
|
|
mark_function_used_dfs(op->block0);
|
|
mark_function_used_dfs(op->block1);
|
|
}
|
|
|
|
void mark_used_symbols() {
|
|
for (SymDef* func_sym : glob_func) {
|
|
auto* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
|
std::string name = symbols.get_name(func_sym->sym_idx);
|
|
if (func_val->method_id.not_null() ||
|
|
name == "main" || name == "recv_internal" || name == "recv_external" ||
|
|
name == "run_ticktock" || name == "split_prepare" || name == "split_install") {
|
|
mark_function_used(func_val);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
*
|
|
* OUTPUT CODE GENERATOR
|
|
*
|
|
*/
|
|
|
|
void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &errs) {
|
|
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
|
tolk_assert(func_val);
|
|
std::string name = symbols.get_name(func_sym->sym_idx);
|
|
if (verbosity >= 2) {
|
|
errs << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl;
|
|
}
|
|
if (!func_val->code) {
|
|
throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented");
|
|
} else {
|
|
CodeBlob& code = *(func_val->code);
|
|
if (verbosity >= 3) {
|
|
code.print(errs, 9);
|
|
}
|
|
code.simplify_var_types();
|
|
if (verbosity >= 5) {
|
|
errs << "after simplify_var_types: \n";
|
|
code.print(errs, 0);
|
|
}
|
|
code.prune_unreachable_code();
|
|
if (verbosity >= 5) {
|
|
errs << "after prune_unreachable: \n";
|
|
code.print(errs, 0);
|
|
}
|
|
code.split_vars(true);
|
|
if (verbosity >= 5) {
|
|
errs << "after split_vars: \n";
|
|
code.print(errs, 0);
|
|
}
|
|
for (int i = 0; i < 8; i++) {
|
|
code.compute_used_code_vars();
|
|
if (verbosity >= 4) {
|
|
errs << "after compute_used_vars: \n";
|
|
code.print(errs, 6);
|
|
}
|
|
code.fwd_analyze();
|
|
if (verbosity >= 5) {
|
|
errs << "after fwd_analyze: \n";
|
|
code.print(errs, 6);
|
|
}
|
|
code.prune_unreachable_code();
|
|
if (verbosity >= 5) {
|
|
errs << "after prune_unreachable: \n";
|
|
code.print(errs, 6);
|
|
}
|
|
}
|
|
code.mark_noreturn();
|
|
if (verbosity >= 3) {
|
|
code.print(errs, 15);
|
|
}
|
|
if (verbosity >= 2) {
|
|
errs << "\n---------- resulting code for " << name << " -------------\n";
|
|
}
|
|
const char* modifier = "";
|
|
if (func_val->is_inline()) {
|
|
modifier = "INLINE";
|
|
} else if (func_val->is_inline_ref()) {
|
|
modifier = "REF";
|
|
}
|
|
outs << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n";
|
|
int mode = 0;
|
|
if (stack_layout_comments) {
|
|
mode |= Stack::_StkCmt | Stack::_CptStkCmt;
|
|
}
|
|
if (opt_level < 2) {
|
|
mode |= Stack::_DisableOpt;
|
|
}
|
|
if (func_val->is_inline() && code.ops->noreturn()) {
|
|
mode |= Stack::_InlineFunc;
|
|
}
|
|
if (func_val->is_inline() || func_val->is_inline_ref()) {
|
|
mode |= Stack::_InlineAny;
|
|
}
|
|
code.generate_code(outs, mode, 2);
|
|
outs << std::string(2, ' ') << "}>\n";
|
|
if (verbosity >= 2) {
|
|
errs << "--------------\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
int generate_output(std::ostream &outs, std::ostream &errs) {
|
|
outs << "\"Asm.fif\" include\n";
|
|
outs << "// automatically generated from " << generated_from << std::endl;
|
|
outs << "PROGRAM{\n";
|
|
mark_used_symbols();
|
|
for (SymDef* func_sym : glob_func) {
|
|
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
|
tolk_assert(func_val);
|
|
if (!func_val->does_need_codegen()) {
|
|
if (verbosity >= 2) {
|
|
errs << func_sym->name() << ": code not generated, function does not need codegen\n";
|
|
}
|
|
continue;
|
|
}
|
|
|
|
std::string name = symbols.get_name(func_sym->sym_idx);
|
|
outs << std::string(2, ' ');
|
|
if (func_val->method_id.is_null()) {
|
|
outs << "DECLPROC " << name << "\n";
|
|
} else {
|
|
outs << func_val->method_id << " DECLMETHOD " << name << "\n";
|
|
}
|
|
}
|
|
for (SymDef* gvar_sym : glob_vars) {
|
|
auto* glob_val = dynamic_cast<SymValGlobVar*>(gvar_sym->value);
|
|
tolk_assert(glob_val);
|
|
if (!glob_val->is_really_used && pragma_remove_unused_functions.enabled()) {
|
|
if (verbosity >= 2) {
|
|
errs << gvar_sym->name() << ": variable not generated, it's unused\n";
|
|
}
|
|
continue;
|
|
}
|
|
std::string name = symbols.get_name(gvar_sym->sym_idx);
|
|
outs << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n";
|
|
}
|
|
int errors = 0;
|
|
for (SymDef* func_sym : glob_func) {
|
|
SymValCodeFunc* func_val = dynamic_cast<SymValCodeFunc*>(func_sym->value);
|
|
if (!func_val->does_need_codegen()) {
|
|
continue;
|
|
}
|
|
try {
|
|
generate_output_func(func_sym, outs, errs);
|
|
} catch (ParseError& err) {
|
|
errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n"
|
|
<< err << std::endl;
|
|
++errors;
|
|
}
|
|
}
|
|
outs << "}END>c\n";
|
|
if (!boc_output_filename.empty()) {
|
|
outs << "boc>B \"" << boc_output_filename << "\" B>file\n";
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
|
|
int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs) {
|
|
define_builtins();
|
|
lexer_init();
|
|
pragma_allow_post_modification.always_on_and_deprecated("0.5.0");
|
|
pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0");
|
|
|
|
try {
|
|
bool ok = parse_source_file(entrypoint_file_name.c_str(), {});
|
|
if (!ok) {
|
|
throw Fatal{"output code generation omitted because of errors"};
|
|
}
|
|
|
|
// todo #ifdef TOLK_PROFILING + comment
|
|
// lexer_measure_performance(all_src_files.get_all_files());
|
|
|
|
return generate_output(outs, errs);
|
|
} catch (Fatal& fatal) {
|
|
errs << "fatal: " << fatal << std::endl;
|
|
return 2;
|
|
} catch (ParseError& error) {
|
|
errs << error << std::endl;
|
|
return 2;
|
|
} catch (UnifyError& unif_err) {
|
|
errs << "fatal: ";
|
|
unif_err.print_message(errs);
|
|
errs << std::endl;
|
|
return 2;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
} // namespace tolk
|