2024-10-31 06:59:23 +00:00
|
|
|
/*
|
|
|
|
This file is part of TON Blockchain Library.
|
|
|
|
|
|
|
|
TON Blockchain Library is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
TON Blockchain Library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with TON Blockchain Library. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
|
|
|
#include "fwd-declarations.h"
|
2024-10-31 06:59:23 +00:00
|
|
|
|
|
|
|
namespace tolk {
|
|
|
|
|
|
|
|
struct SrcFile {
|
|
|
|
struct SrcPosition {
|
|
|
|
int offset;
|
|
|
|
int line_no;
|
|
|
|
int char_no;
|
|
|
|
std::string_view line_str;
|
|
|
|
};
|
|
|
|
|
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
|
|
|
struct ImportDirective {
|
2024-10-31 07:02:01 +00:00
|
|
|
const SrcFile* imported_file;
|
|
|
|
};
|
|
|
|
|
|
|
|
int file_id; // an incremental counter through all parsed files
|
|
|
|
std::string rel_filename; // relative to cwd
|
|
|
|
std::string abs_filename; // absolute from root
|
2024-10-31 07:04:58 +00:00
|
|
|
std::string text; // file contents loaded into memory, every Token::str_val points inside it
|
[Tolk] AST-based semantic analysis, get rid of Expr
This is a huge refactoring focusing on untangling compiler internals
(previously forked from FunC).
The goal is to convert AST directly to Op (a kind of IR representation),
doing all code analysis at AST level.
Noteable changes:
- AST-based semantic kernel includes: registering global symbols,
scope handling and resolving local/global identifiers,
lvalue/rvalue calc and check, implicit return detection,
mutability analysis, pure/impure validity checks,
simple constant folding
- values of `const` variables are calculated NOT based on CodeBlob,
but via a newly-introduced AST-based constant evaluator
- AST vertices are now inherited from expression/statement/other;
expression vertices have common properties (TypeExpr, lvalue/rvalue)
- symbol table is rewritten completely, SymDef/SymVal no longer exist,
lexer now doesn't need to register identifiers
- AST vertices have references to symbols, filled at different
stages of pipeline
- the remaining "FunC legacy part" is almost unchanged besides Expr
which was fully dropped; AST is converted to Ops (IR) directly
2024-12-16 18:19:45 +00:00
|
|
|
AnyV ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here
|
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
|
|
|
std::vector<ImportDirective> imports; // to check strictness (can't use a symbol without importing its file)
|
2024-10-31 06:59:23 +00:00
|
|
|
|
2024-10-31 07:02:01 +00:00
|
|
|
SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text)
|
2024-10-31 06:59:23 +00:00
|
|
|
: file_id(file_id)
|
|
|
|
, rel_filename(std::move(rel_filename))
|
|
|
|
, abs_filename(std::move(abs_filename))
|
2024-10-31 07:02:01 +00:00
|
|
|
, text(std::move(text)) { }
|
2024-10-31 06:59:23 +00:00
|
|
|
|
|
|
|
SrcFile(const SrcFile& other) = delete;
|
|
|
|
SrcFile &operator=(const SrcFile&) = delete;
|
|
|
|
|
2024-10-31 07:16:19 +00:00
|
|
|
bool is_stdlib_file() const;
|
2024-10-31 07:02:01 +00:00
|
|
|
|
2024-10-31 06:59:23 +00:00
|
|
|
bool is_offset_valid(int offset) const;
|
|
|
|
SrcPosition convert_offset(int offset) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// SrcLocation points to a location (line, column) in some loaded .tolk source SrcFile.
|
|
|
|
// Note, that instead of storing src_file, line_no, etc., only 2 ints are stored.
|
|
|
|
// The purpose is: sizeof(SrcLocation) == 8, so it's just passed/stored without pointers/refs, just like int64_t.
|
|
|
|
// When decoding SrcLocation into human-readable format, it's converted to SrcFile::SrcPosition via offset.
|
|
|
|
class SrcLocation {
|
|
|
|
friend class Lexer;
|
|
|
|
|
2024-10-31 07:02:01 +00:00
|
|
|
int file_id = -1; // = SrcFile::file_id (note, that get_src_file() does linear search)
|
2024-10-31 06:59:23 +00:00
|
|
|
int char_offset = -1; // offset from SrcFile::text
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
SrcLocation() = default;
|
|
|
|
explicit SrcLocation(const SrcFile* src_file) : file_id(src_file->file_id) {
|
|
|
|
}
|
|
|
|
|
|
|
|
bool is_defined() const { return file_id != -1; }
|
2024-10-31 07:04:58 +00:00
|
|
|
bool is_stdlib() const { return file_id == 0; }
|
2024-10-31 06:59:23 +00:00
|
|
|
const SrcFile* get_src_file() const;
|
|
|
|
|
2024-10-31 07:02:01 +00:00
|
|
|
// similar to `this->get_src_file() == symbol->get_src_file() || symbol->get_src_file()->is_stdlib()`
|
|
|
|
// (but effectively, avoiding linear search)
|
|
|
|
bool is_symbol_from_same_or_builtin_file(SrcLocation symbol_loc) const {
|
|
|
|
return file_id == symbol_loc.file_id || symbol_loc.file_id < 1;
|
|
|
|
}
|
|
|
|
|
2024-10-31 06:59:23 +00:00
|
|
|
void show(std::ostream& os) const;
|
|
|
|
void show_context(std::ostream& os) const;
|
2024-10-31 07:04:58 +00:00
|
|
|
std::string to_string() const;
|
2024-10-31 06:59:23 +00:00
|
|
|
|
|
|
|
void show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const;
|
|
|
|
void show_note(const std::string& err_msg) const;
|
|
|
|
void show_warning(const std::string& err_msg) const;
|
|
|
|
void show_error(const std::string& err_msg) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& os, SrcLocation loc);
|
|
|
|
|
2024-10-31 07:04:58 +00:00
|
|
|
class AllRegisteredSrcFiles {
|
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
|
|
|
std::vector<const SrcFile*> all_src_files;
|
2024-10-31 07:04:58 +00:00
|
|
|
int last_registered_file_id = -1;
|
|
|
|
int last_parsed_file_id = -1;
|
|
|
|
|
|
|
|
public:
|
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
|
|
|
const SrcFile* find_file(int file_id) const;
|
|
|
|
const SrcFile* find_file(const std::string& abs_filename) const;
|
2024-10-31 07:04:58 +00:00
|
|
|
|
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
|
|
|
const SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from);
|
2024-10-31 07:04:58 +00:00
|
|
|
SrcFile* get_next_unparsed_file();
|
|
|
|
|
[Tolk] Rewrite the type system from Hindley-Milner to static typing
FunC's (and Tolk's before this PR) type system is based on Hindley-Milner.
This is a common approach for functional languages, where
types are inferred from usage through unification.
As a result, type declarations are not necessary:
() f(a,b) { return a+b; } // a and b now int, since `+` (int, int)
While this approach works for now, problems arise with the introduction
of new types like bool, where `!x` must handle both int and bool.
It will also become incompatible with int32 and other strict integers.
This will clash with structure methods, struggle with proper generics,
and become entirely impractical for union types.
This PR completely rewrites the type system targeting the future.
1) type of any expression is inferred and never changed
2) this is available because dependent expressions already inferred
3) forall completely removed, generic functions introduced
(they work like template functions actually, instantiated while inferring)
4) instantiation `<...>` syntax, example: `t.tupleAt<int>(0)`
5) `as` keyword, for example `t.tupleAt(0) as int`
6) methods binding is done along with type inferring, not before
("before", as worked previously, was always a wrong approach)
2024-12-30 15:31:27 +00:00
|
|
|
auto begin() const { return all_src_files.begin(); }
|
|
|
|
auto end() const { return all_src_files.end(); }
|
2024-10-31 07:04:58 +00:00
|
|
|
};
|
|
|
|
|
2024-10-31 07:02:01 +00:00
|
|
|
struct Fatal final : std::exception {
|
|
|
|
std::string message;
|
|
|
|
|
|
|
|
explicit Fatal(std::string _msg) : message(std::move(_msg)) {
|
|
|
|
}
|
|
|
|
const char* what() const noexcept override {
|
|
|
|
return message.c_str();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& os, const Fatal& fatal);
|
|
|
|
|
2024-10-31 06:59:23 +00:00
|
|
|
struct ParseError : std::exception {
|
|
|
|
SrcLocation where;
|
|
|
|
std::string message;
|
|
|
|
ParseError(SrcLocation _where, std::string _msg) : where(_where), message(std::move(_msg)) {
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* what() const noexcept override {
|
|
|
|
return message.c_str();
|
|
|
|
}
|
|
|
|
void show(std::ostream& os) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
std::ostream& operator<<(std::ostream& os, const ParseError& error);
|
|
|
|
|
|
|
|
} // namespace tolk
|