mirror of
https://github.com/ton-blockchain/ton
synced 2025-03-09 15:40:10 +00:00
[FunC] Support traditional // and /**/ comments
They work alongside Lisp-style ;; and {--}, without any #pragma. Conceptually, a new syntax should be disabled by default and activated using a special compiler option. But now, we don't have an easy way to provide compiler options in func-js, blueprint, etc. Note, that introducing per-file #pragma is a wrong approach here, since if we want to fire human-readable error on using '//' without pragma, lexer should nevertheless work differently. (this could be controlled by a launch option, but see above)
This commit is contained in:
parent
a174f858be
commit
30572c77d6
6 changed files with 105 additions and 27 deletions
39
crypto/func/auto-tests/tests/comments.fc
Normal file
39
crypto/func/auto-tests/tests/comments.fc
Normal file
|
@ -0,0 +1,39 @@
|
|||
|
||||
_ get10();
|
||||
|
||||
int {-
|
||||
block comment
|
||||
/*
|
||||
nested
|
||||
*/
|
||||
;;;; -} main()
|
||||
|
||||
// inside a comment, {- doesn't start a new one
|
||||
{- but if ;; is inside, a comment may end at this line-} {
|
||||
var cc = "a string may contain {- or // or /*, not parsed";
|
||||
// return 1;
|
||||
return get10() + /*
|
||||
traditional comment /* may be also nested */
|
||||
// line comment
|
||||
// ends */1 +
|
||||
1;
|
||||
{- moreover, different comment styles
|
||||
may be used for opening and closing
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
first line
|
||||
//* nested
|
||||
//two-lined*/
|
||||
*/
|
||||
|
||||
int get10() method_id(10) {
|
||||
return 10;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
TESTCASE | 0 | | 12
|
||||
TESTCASE | 10 | | 10
|
||||
*/
|
11
crypto/func/auto-tests/tests/invalid-cmt-eof.fc
Normal file
11
crypto/func/auto-tests/tests/invalid-cmt-eof.fc
Normal file
|
@ -0,0 +1,11 @@
|
|||
int main() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
{-
|
||||
int ...
|
||||
|
||||
/*
|
||||
@compilation_should_fail
|
||||
@stderr comment extends past end of file
|
||||
*/
|
|
@ -1822,7 +1822,14 @@ void parse_include(Lexer& lex, const src::FileDescr* fdescr) {
|
|||
|
||||
bool parse_source(std::istream* is, src::FileDescr* fdescr) {
|
||||
src::SourceReader reader{is, fdescr};
|
||||
Lexer lex{reader, true, ";,()[] ~."};
|
||||
Lexer lex{reader, ";,()[] ~."};
|
||||
// previously, FunC had lisp-style comments,
|
||||
// but starting from v0.5.0, it supports traditional (slash) comments alongside
|
||||
// (in IDE, the user has a setting, what comment style he prefers)
|
||||
// maybe, in some far future, we'll stop supporting lisp-style comments
|
||||
lex.set_comment_tokens(";;", "{-", "-}");
|
||||
lex.set_comment2_tokens("//", "/*", "*/");
|
||||
lex.start_parsing();
|
||||
while (lex.tp() != _Eof) {
|
||||
if (lex.tp() == _PragmaHashtag) {
|
||||
parse_pragma(lex);
|
||||
|
|
|
@ -124,8 +124,7 @@ int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) {
|
|||
return classify();
|
||||
}
|
||||
|
||||
Lexer::Lexer(SourceReader& _src, bool init, std::string active_chars, std::string eol_cmts, std::string open_cmts,
|
||||
std::string close_cmts, std::string quote_chars, std::string multiline_quote)
|
||||
Lexer::Lexer(SourceReader& _src, std::string active_chars, std::string quote_chars, std::string multiline_quote)
|
||||
: src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined),
|
||||
multiline_quote(std::move(multiline_quote)) {
|
||||
std::memset(char_class, 0, sizeof(char_class));
|
||||
|
@ -139,17 +138,27 @@ Lexer::Lexer(SourceReader& _src, bool init, std::string active_chars, std::strin
|
|||
char_class[(unsigned)c] |= activity;
|
||||
}
|
||||
}
|
||||
set_spec(eol_cmt, eol_cmts);
|
||||
set_spec(cmt_op, open_cmts);
|
||||
set_spec(cmt_cl, close_cmts);
|
||||
for (int c : quote_chars) {
|
||||
if (c > ' ' && c <= 0x7f) {
|
||||
char_class[(unsigned)c] |= cc::quote_char;
|
||||
}
|
||||
}
|
||||
if (init) {
|
||||
next();
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts) {
|
||||
set_spec(eol_cmt, eol_cmts);
|
||||
set_spec(cmt_op, open_cmts);
|
||||
set_spec(cmt_cl, close_cmts);
|
||||
}
|
||||
|
||||
void Lexer::set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2) {
|
||||
set_spec(eol_cmt2, eol_cmts2);
|
||||
set_spec(cmt_op2, open_cmts2);
|
||||
set_spec(cmt_cl2, close_cmts2);
|
||||
}
|
||||
|
||||
void Lexer::start_parsing() {
|
||||
next();
|
||||
}
|
||||
|
||||
void Lexer::set_spec(std::array<int, 3>& arr, std::string setup) {
|
||||
|
@ -206,24 +215,30 @@ const Lexem& Lexer::next() {
|
|||
long long comm = 1;
|
||||
while (!src.seek_eof()) {
|
||||
int cc = src.cur_char(), nc = src.next_char();
|
||||
if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2])) {
|
||||
src.load_line();
|
||||
} else if (cc == cmt_op[1] && nc == cmt_op[2]) {
|
||||
// note, that in practice (both in FunC and tlbc), [0]-th element is -256, condition for [0]-th is always false
|
||||
if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2]) || cc == eol_cmt2[0] || (cc == eol_cmt2[1] && nc == eol_cmt2[2])) {
|
||||
if (comm == 1) { // just "//" — skip a whole line
|
||||
src.load_line();
|
||||
} else { // if "//" is nested into "/*", continue reading, since "*/" may be met
|
||||
src.advance(1);
|
||||
}
|
||||
} else if (cc == cmt_op[1] && nc == cmt_op[2] || cc == cmt_op2[1] && nc == cmt_op2[2]) {
|
||||
src.advance(2);
|
||||
comm = comm * 2 + 1;
|
||||
} else if (cc == cmt_op[0]) {
|
||||
} else if (cc == cmt_op[0] || cc == cmt_op2[0]) { // always false
|
||||
src.advance(1);
|
||||
comm *= 2;
|
||||
} else if (comm == 1) {
|
||||
break;
|
||||
} else if (cc == cmt_cl[1] && nc == cmt_cl[2]) {
|
||||
if (!(comm & 1)) {
|
||||
break; // means that we are not inside a comment
|
||||
} else if (cc == cmt_cl[1] && nc == cmt_cl[2] || cc == cmt_cl2[1] && nc == cmt_cl2[2]) {
|
||||
if (!(comm & 1)) { // always false
|
||||
src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] +
|
||||
"`");
|
||||
}
|
||||
// note that in FunC, {- may be closed with */, but assume it's ok (we'll get rid of {- in the future)
|
||||
comm >>= 1;
|
||||
src.advance(2);
|
||||
} else if (cc == cmt_cl[0]) {
|
||||
} else if (cc == cmt_cl[0] || cc == cmt_cl2[0]) { // always false
|
||||
if (!(comm & 1)) {
|
||||
src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] +
|
||||
"`");
|
||||
|
@ -240,11 +255,7 @@ const Lexem& Lexer::next() {
|
|||
if (src.seek_eof()) {
|
||||
eof = true;
|
||||
if (comm > 1) {
|
||||
if (comm & 1) {
|
||||
src.error(std::string{"`"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment extends past end of file");
|
||||
} else {
|
||||
src.error(std::string{"`"} + (char)cmt_op[0] + "` comment extends past end of file");
|
||||
}
|
||||
src.error("comment extends past end of file");
|
||||
}
|
||||
return lexem.clear(src.here(), Lexem::Eof);
|
||||
}
|
||||
|
|
|
@ -65,12 +65,16 @@ struct Lexem {
|
|||
static std::string lexem_name_str(int idx);
|
||||
};
|
||||
|
||||
// todo this class (like all sources in /ton/crypto/parser) is shared between FunC and tlbc
|
||||
// this "shareness" and "generalization" is weird and annoying rather than solves any problems
|
||||
// later on, I'll get rid of this (parser/) folder, copying and adapting its sources to FunC and tlbc
|
||||
class Lexer {
|
||||
SourceReader& src;
|
||||
bool eof;
|
||||
Lexem lexem, peek_lexem;
|
||||
unsigned char char_class[128];
|
||||
std::array<int, 3> eol_cmt, cmt_op, cmt_cl;
|
||||
std::array<int, 3> eol_cmt, cmt_op, cmt_cl; // for FunC < 0.5.0: ;; {- -}
|
||||
std::array<int, 3> eol_cmt2, cmt_op2, cmt_cl2; // for FunC >= 0.5.0: // /* */
|
||||
std::string multiline_quote;
|
||||
enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 };
|
||||
|
||||
|
@ -78,9 +82,13 @@ class Lexer {
|
|||
bool eof_found() const {
|
||||
return eof;
|
||||
}
|
||||
Lexer(SourceReader& _src, bool init = false, std::string active_chars = ";,() ~.", std::string eol_cmts = ";;",
|
||||
std::string open_cmts = "{-", std::string close_cmts = "-}", std::string quote_chars = "\"",
|
||||
std::string multiline_quote = "\"\"\"");
|
||||
explicit Lexer(SourceReader& _src, std::string active_chars = ";,() ~.",
|
||||
std::string quote_chars = "\"", std::string multiline_quote = "\"\"\"");
|
||||
|
||||
void set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts);
|
||||
void set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2);
|
||||
void start_parsing();
|
||||
|
||||
const Lexem& next();
|
||||
const Lexem& cur() const {
|
||||
return lexem;
|
||||
|
|
|
@ -2421,7 +2421,9 @@ std::vector<const src::FileDescr*> source_fdescr;
|
|||
|
||||
bool parse_source(std::istream* is, src::FileDescr* fdescr) {
|
||||
src::SourceReader reader{is, fdescr};
|
||||
src::Lexer lex{reader, true, "(){}:;? #$. ^~ #", "//", "/*", "*/", ""};
|
||||
src::Lexer lex{reader, "(){}:;? #$. ^~ #", ""};
|
||||
lex.set_comment_tokens("//", "/*", "*/");
|
||||
lex.start_parsing();
|
||||
while (lex.tp() != src::_Eof) {
|
||||
parse_constructor_def(lex);
|
||||
// std::cerr << lex.cur().str << '\t' << lex.cur().name_str() << std::endl;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue