1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

[Tolk] Rewrite lexer, spaces are not mandatory anymore

A new lexer is noticeably faster and memory efficient
(although splitting a file to tokens is negligible in a whole pipeline).

But the purpose of rewriting lexer was not just to speed up,
but to allow writing code without spaces:
`2+2` is now 4, not a valid identifier as earlier.

The variety of symbols allowed in identifier has greatly reduced
and is now similar to other languages.

SrcLocation became 8 bytes on stack everywhere.

Command-line flags were also reworked:
- the input for Tolk compiler is only a single file now, it's parsed, and parsing continues while new #include are resolved
- flags like -A -P and so on are no more needed, actually
This commit is contained in:
tolk-vm 2024-10-31 10:59:23 +04:00
parent 0bcc0b3c12
commit f0e6470d0b
No known key found for this signature in database
GPG key ID: 7905DD7FE0324B12
26 changed files with 2042 additions and 2129 deletions

View file

@ -41,25 +41,19 @@ Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list<Expr*> _arglist)
}
}
void Expr::chk_rvalue(const Lexem& lem) const {
void Expr::chk_rvalue(const Lexer& lex) const {
if (!is_rvalue()) {
lem.error_at("rvalue expected before `", "`");
lex.error_at("rvalue expected before `", "`");
}
}
void Expr::chk_lvalue(const Lexem& lem) const {
void Expr::chk_lvalue(const Lexer& lex) const {
if (!is_lvalue()) {
lem.error_at("lvalue expected before `", "`");
lex.error_at("lvalue expected before `", "`");
}
}
void Expr::chk_type(const Lexem& lem) const {
if (!is_type()) {
lem.error_at("type expression expected before `", "`");
}
}
bool Expr::deduce_type(const Lexem& lem) {
bool Expr::deduce_type(const Lexer& lex) {
if (e_type) {
return true;
}
@ -83,7 +77,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os;
os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type "
<< fun_type->args[0] << ": " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
@ -98,7 +92,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os;
os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type
<< ": " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = fun_type->args[1];
TypeExpr::remove_indirect(e_type);
@ -113,7 +107,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os;
os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type "
<< args[0]->e_type << ": " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = args[0]->e_type;
TypeExpr::remove_indirect(e_type);
@ -130,7 +124,7 @@ bool Expr::deduce_type(const Lexem& lem) {
os << "cannot implicitly assign an expression of type " << args[1]->e_type
<< " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val)
<< "` : " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = rhs_type->args[1];
TypeExpr::remove_indirect(e_type);
@ -139,13 +133,13 @@ bool Expr::deduce_type(const Lexem& lem) {
}
case _CondExpr: {
tolk_assert(args.size() == 3);
auto flag_type = TypeExpr::new_atomic(_Int);
auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int);
try {
unify(args[0]->e_type, flag_type);
} catch (UnifyError& ue) {
std::ostringstream os;
os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue;
lem.error(os.str());
lex.error(os.str());
}
try {
unify(args[1]->e_type, args[2]->e_type);
@ -153,7 +147,7 @@ bool Expr::deduce_type(const Lexem& lem) {
std::ostringstream os;
os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and "
<< args[2]->e_type << " : " << ue;
lem.error(os.str());
lex.error(os.str());
}
e_type = args[1]->e_type;
TypeExpr::remove_indirect(e_type);
@ -176,13 +170,13 @@ int Expr::define_new_vars(CodeBlob& code) {
}
case _Var:
if (val < 0) {
val = code.create_var(TmpVar::_Named, e_type, sym, &here);
val = code.create_var(TmpVar::_Named, e_type, sym, here);
return 1;
}
break;
case _Hole:
if (val < 0) {
val = code.create_var(TmpVar::_Tmp, e_type, nullptr, &here);
val = code.create_var(TmpVar::_Tmp, e_type, nullptr, here);
}
break;
}
@ -202,7 +196,7 @@ int Expr::predefine_vars() {
}
case _Var:
if (!sym) {
tolk_assert(val < 0 && here.defined());
tolk_assert(val < 0 && here.is_defined());
if (prohibited_var_names.count(symbols.get_name(~val))) {
throw ParseError{
here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"};
@ -212,7 +206,7 @@ int Expr::predefine_vars() {
if (!sym) {
throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"};
}
sym->value = new SymVal{SymVal::_Var, -1, e_type};
sym->value = new SymVal{SymValKind::_Var, -1, e_type};
return 1;
}
break;
@ -221,17 +215,17 @@ int Expr::predefine_vars() {
}
var_idx_t Expr::new_tmp(CodeBlob& code) const {
return code.create_tmp_var(e_type, &here);
return code.create_tmp_var(e_type, here);
}
void add_set_globs(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>& globs, const SrcLocation& here) {
void add_set_globs(CodeBlob& code, std::vector<std::pair<SymDef*, var_idx_t>>& globs, SrcLocation here) {
for (const auto& p : globs) {
auto& op = code.emplace_back(here, Op::_SetGlob, std::vector<var_idx_t>{}, std::vector<var_idx_t>{ p.second }, p.first);
op.set_impure(code);
}
}
std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) {
std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) {
while (lhs->is_type_apply()) {
lhs = lhs->args.at(0);
}
@ -245,7 +239,7 @@ std::vector<var_idx_t> pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, con
auto right = rhs->pre_compile(code);
TypeExpr::remove_indirect(rhs->e_type);
auto unpacked_type = rhs->e_type->args.at(0);
std::vector<var_idx_t> tmp{code.create_tmp_var(unpacked_type, &rhs->here)};
std::vector<var_idx_t> tmp{code.create_tmp_var(unpacked_type, rhs->here)};
code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right));
auto tvar = new Expr{Expr::_Var};
tvar->set_val(tmp[0]);
@ -286,14 +280,14 @@ std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *>& args, CodeB
for (size_t j = 0; j < res_lists[i].size(); ++j) {
TmpVar& var = code.vars.at(res_lists[i][j]);
if (!lval_globs && (var.cls & TmpVar::_Named)) {
var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](const SrcLocation &here) mutable {
var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable {
if (!done) {
done = true;
modified_vars.push_back({i, j, cur_ops});
}
});
} else {
var.on_modification.push_back([](const SrcLocation &) {
var.on_modification.push_back([](SrcLocation) {
});
}
}
@ -307,8 +301,8 @@ std::vector<var_idx_t> pre_compile_tensor(const std::vector<Expr *>& args, CodeB
for (size_t idx = modified_vars.size(); idx--; ) {
const ModifiedVar &m = modified_vars[idx];
var_idx_t orig_v = res_lists[m.i][m.j];
var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where.get());
std::unique_ptr<Op> op = std::make_unique<Op>(*code.vars[orig_v].where, Op::_Let);
var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where);
std::unique_ptr<Op> op = std::make_unique<Op>(code.vars[orig_v].where, Op::_Let);
op->left = {tmp_v};
op->right = {orig_v};
op->next = std::move((*m.cur_ops));