From ce5880510450ee3cf133cb1501be8055f623ff0b Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Wed, 11 Dec 2024 14:48:48 +0300 Subject: [PATCH 01/61] Improve readability of validator-engine-console commands (#1426) 1. Add dashes to command names (old names still work for compatibility) 2. Better shard format 3. Allow base64 in some parameters --- ton/ton-types.h | 20 ++ .../validator-engine-console-query.cpp | 43 ++- .../validator-engine-console-query.h | 261 +++++++++--------- .../validator-engine-console.cpp | 10 +- .../validator-engine-console.h | 14 + 5 files changed, 193 insertions(+), 155 deletions(-) diff --git a/ton/ton-types.h b/ton/ton-types.h index cd970081..2447a8c5 100644 --- a/ton/ton-types.h +++ b/ton/ton-types.h @@ -120,6 +120,26 @@ struct ShardIdFull { char buffer[64]; return std::string{buffer, (unsigned)snprintf(buffer, 63, "(%d,%016llx)", workchain, (unsigned long long)shard)}; } + static td::Result parse(td::Slice s) { + // Formats: (0,2000000000000000) (0:2000000000000000) 0,2000000000000000 0:2000000000000000 + if (s.empty()) { + return td::Status::Error("empty string"); + } + if (s[0] == '(' && s.back() == ')') { + s = s.substr(1, s.size() - 2); + } + auto sep = s.find(':'); + if (sep == td::Slice::npos) { + sep = s.find(','); + } + if (sep == td::Slice::npos || s.size() - sep - 1 != 16) { + return td::Status::Error(PSTRING() << "invalid shard " << s); + } + ShardIdFull shard; + TRY_RESULT_ASSIGN(shard.workchain, td::to_integer_safe(s.substr(0, sep))); + TRY_RESULT_ASSIGN(shard.shard, td::hex_to_integer_safe(s.substr(sep + 1))); + return shard; + } }; struct AccountIdPrefixFull { diff --git a/validator-engine-console/validator-engine-console-query.cpp b/validator-engine-console/validator-engine-console-query.cpp index bfcd50da..d1110019 100644 --- a/validator-engine-console/validator-engine-console-query.cpp +++ b/validator-engine-console/validator-engine-console-query.cpp @@ -1041,8 +1041,7 @@ td::Status ImportCertificateQuery::receive(td::BufferSlice data) { td::Status SignShardOverlayCertificateQuery::run() { - TRY_RESULT_ASSIGN(wc_, tokenizer_.get_token()); - TRY_RESULT_ASSIGN(shard_, tokenizer_.get_token() ); + TRY_RESULT_ASSIGN(shard_, tokenizer_.get_token() ); TRY_RESULT_ASSIGN(key_, tokenizer_.get_token()); TRY_RESULT_ASSIGN(expire_at_, tokenizer_.get_token()); TRY_RESULT_ASSIGN(max_size_, tokenizer_.get_token()); @@ -1052,8 +1051,9 @@ td::Status SignShardOverlayCertificateQuery::run() { } td::Status SignShardOverlayCertificateQuery::send() { - auto b = ton::create_serialize_tl_object - (wc_, shard_, ton::create_tl_object(key_.tl()), expire_at_, max_size_); + auto b = ton::create_serialize_tl_object( + shard_.workchain, shard_.shard, ton::create_tl_object(key_.tl()), + expire_at_, max_size_); td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); return td::Status::OK(); } @@ -1071,8 +1071,7 @@ td::Status SignShardOverlayCertificateQuery::receive(td::BufferSlice data) { } td::Status ImportShardOverlayCertificateQuery::run() { - TRY_RESULT_ASSIGN(wc_, tokenizer_.get_token()); - TRY_RESULT_ASSIGN(shard_, tokenizer_.get_token() ); + TRY_RESULT_ASSIGN(shard_, tokenizer_.get_token()); TRY_RESULT_ASSIGN(key_, tokenizer_.get_token()); TRY_RESULT_ASSIGN(in_file_, tokenizer_.get_token()); @@ -1083,8 +1082,9 @@ td::Status ImportShardOverlayCertificateQuery::send() { TRY_RESULT(data, td::read_file(in_file_)); TRY_RESULT_PREFIX(cert, ton::fetch_tl_object(data.as_slice(), true), "incorrect certificate"); - auto b = ton::create_serialize_tl_object - (wc_, shard_, ton::create_tl_object(key_.tl()), std::move(cert)); + auto b = ton::create_serialize_tl_object( + shard_.workchain, shard_.shard, ton::create_tl_object(key_.tl()), + std::move(cert)); td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); return td::Status::OK(); } @@ -1173,14 +1173,12 @@ td::Status GetPerfTimerStatsJsonQuery::receive(td::BufferSlice data) { } td::Status GetShardOutQueueSizeQuery::run() { - TRY_RESULT_ASSIGN(block_id_.workchain, tokenizer_.get_token()); - TRY_RESULT_ASSIGN(block_id_.shard, tokenizer_.get_token()); + TRY_RESULT(shard, tokenizer_.get_token()); + block_id_.workchain = shard.workchain; + block_id_.shard = shard.shard; TRY_RESULT_ASSIGN(block_id_.seqno, tokenizer_.get_token()); if (!tokenizer_.endl()) { - ton::ShardIdFull dest; - TRY_RESULT_ASSIGN(dest.workchain, tokenizer_.get_token()); - TRY_RESULT_ASSIGN(dest.shard, tokenizer_.get_token()); - dest_ = dest; + TRY_RESULT_ASSIGN(dest_, tokenizer_.get_token()); } TRY_STATUS(tokenizer_.check_endl()); return td::Status::OK(); @@ -1188,8 +1186,7 @@ td::Status GetShardOutQueueSizeQuery::run() { td::Status GetShardOutQueueSizeQuery::send() { auto b = ton::create_serialize_tl_object( - dest_ ? 1 : 0, ton::create_tl_block_id_simple(block_id_), dest_ ? dest_.value().workchain : 0, - dest_ ? dest_.value().shard : 0); + dest_.is_valid() ? 1 : 0, ton::create_tl_block_id_simple(block_id_), dest_.workchain, dest_.shard); td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); return td::Status::OK(); } @@ -1563,14 +1560,13 @@ td::Status GetAdnlStatsQuery::receive(td::BufferSlice data) { } td::Status AddShardQuery::run() { - TRY_RESULT_ASSIGN(wc_, tokenizer_.get_token()); - TRY_RESULT_ASSIGN(shard_, tokenizer_.get_token()); + TRY_RESULT_ASSIGN(shard_, tokenizer_.get_token()); + TRY_STATUS(tokenizer_.check_endl()); return td::Status::OK(); } td::Status AddShardQuery::send() { - auto b = ton::create_serialize_tl_object( - ton::create_tl_shard_id(ton::ShardIdFull(wc_, shard_))); + auto b = ton::create_serialize_tl_object(ton::create_tl_shard_id(shard_)); td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); return td::Status::OK(); } @@ -1583,14 +1579,13 @@ td::Status AddShardQuery::receive(td::BufferSlice data) { } td::Status DelShardQuery::run() { - TRY_RESULT_ASSIGN(wc_, tokenizer_.get_token()); - TRY_RESULT_ASSIGN(shard_, tokenizer_.get_token()); + TRY_RESULT_ASSIGN(shard_, tokenizer_.get_token()); + TRY_STATUS(tokenizer_.check_endl()); return td::Status::OK(); } td::Status DelShardQuery::send() { - auto b = ton::create_serialize_tl_object( - ton::create_tl_shard_id(ton::ShardIdFull(wc_, shard_))); + auto b = ton::create_serialize_tl_object(ton::create_tl_shard_id(shard_)); td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); return td::Status::OK(); } diff --git a/validator-engine-console/validator-engine-console-query.h b/validator-engine-console/validator-engine-console-query.h index f85179bd..817d70c9 100644 --- a/validator-engine-console/validator-engine-console-query.h +++ b/validator-engine-console/validator-engine-console-query.h @@ -36,6 +36,7 @@ #include "ton/ton-types.h" #include "keys/keys.hpp" +#include "td/utils/base64.h" class ValidatorEngineConsole; @@ -95,27 +96,25 @@ inline td::Result Tokenizer::get_token() { } template <> -inline td::Result Tokenizer::get_token() { - TRY_RESULT(S, get_raw_token()); - TRY_RESULT(F, td::hex_decode(S)); - if (F.size() == 32) { - return ton::PublicKeyHash{td::Slice{F}}; +inline td::Result Tokenizer::get_token() { + TRY_RESULT(word, get_raw_token()); + std::string data; + if (word.size() == 64) { + TRY_RESULT_ASSIGN(data, td::hex_decode(word)); + } else if (word.size() == 44) { + TRY_RESULT_ASSIGN(data, td::base64_decode(word)); } else { return td::Status::Error("cannot parse keyhash: bad length"); } + td::Bits256 v; + v.as_slice().copy_from(data); + return v; } template <> -inline td::Result Tokenizer::get_token() { - TRY_RESULT(S, get_raw_token()); - TRY_RESULT(F, td::hex_decode(S)); - if (F.size() == 32) { - td::Bits256 v; - v.as_slice().copy_from(F); - return v; - } else { - return td::Status::Error("cannot parse keyhash: bad length"); - } +inline td::Result Tokenizer::get_token() { + TRY_RESULT(x, get_token()); + return ton::PublicKeyHash{x}; } template <> @@ -146,6 +145,18 @@ inline td::Result> Tokenizer::get_token_vector() { } } +template <> +inline td::Result Tokenizer::get_token() { + TRY_RESULT(word, get_raw_token()); + auto r_wc = td::to_integer_safe(word); + if (r_wc.is_ok()) { + TRY_RESULT_ASSIGN(word, get_raw_token()); + TRY_RESULT(shard, td::to_integer_safe(word)); + return ton::ShardIdFull{r_wc.move_as_ok(), shard}; + } + return ton::ShardIdFull::parse(word); +} + class QueryRunner { public: virtual ~QueryRunner() = default; @@ -222,10 +233,10 @@ class GetTimeQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice R) override; static std::string get_name() { - return "gettime"; + return "get-time"; } static std::string get_help() { - return "gettime\tshows current server unixtime"; + return "get-time\tshows current server unixtime"; } std::string name() const override { return get_name(); @@ -287,10 +298,10 @@ class NewKeyQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice R) override; static std::string get_name() { - return "newkey"; + return "new-key"; } static std::string get_help() { - return "newkey\tgenerates new key pair on server"; + return "new-key\tgenerates new key pair on server"; } std::string name() const override { return get_name(); @@ -308,10 +319,10 @@ class ImportPrivateKeyFileQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice R) override; static std::string get_name() { - return "importf"; + return "import-f"; } static std::string get_help() { - return "importf \timport private key"; + return "import-f \timport private key"; } std::string name() const override { return get_name(); @@ -330,10 +341,10 @@ class ExportPublicKeyQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice R) override; static std::string get_name() { - return "exportpub"; + return "export-pub"; } static std::string get_help() { - return "exportpub \texports public key by key hash"; + return "export-pub \texports public key by key hash"; } std::string name() const override { return get_name(); @@ -352,10 +363,10 @@ class ExportPublicKeyFileQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice R) override; static std::string get_name() { - return "exportpubf"; + return "export-pubf"; } static std::string get_help() { - return "exportpubf \texports public key by key hash"; + return "export-pub-f \texports public key by key hash"; } std::string name() const override { return get_name(); @@ -398,10 +409,10 @@ class SignFileQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "signf"; + return "sign-f"; } static std::string get_help() { - return "signf \tsigns bytestring with privkey"; + return "sign-f \tsigns bytestring with privkey"; } std::string name() const override { return get_name(); @@ -422,10 +433,10 @@ class ExportAllPrivateKeysQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice R) override; static std::string get_name() { - return "exportallprivatekeys"; + return "export-all-private-keys"; } static std::string get_help() { - return "exportallprivatekeys \texports all private keys from validator engine and stores them to " + return "export-all-private-keys \texports all private keys from validator engine and stores them to " ""; } std::string name() const override { @@ -446,10 +457,10 @@ class AddAdnlAddrQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "addadnl"; + return "add-adnl"; } static std::string get_help() { - return "addadnl \tuse key as ADNL addr"; + return "add-adnl \tuse key as ADNL addr"; } std::string name() const override { return get_name(); @@ -469,10 +480,10 @@ class AddDhtIdQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "adddht"; + return "add-dht"; } static std::string get_help() { - return "adddht \tcreate DHT node with specified ADNL addr"; + return "add-dht \tcreate DHT node with specified ADNL addr"; } std::string name() const override { return get_name(); @@ -491,10 +502,10 @@ class AddValidatorPermanentKeyQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "addpermkey"; + return "add-perm-key"; } static std::string get_help() { - return "addpermkey \tadd validator permanent key"; + return "add-perm-key \tadd validator permanent key"; } std::string name() const override { return get_name(); @@ -515,10 +526,10 @@ class AddValidatorTempKeyQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "addtempkey"; + return "add-temp-key"; } static std::string get_help() { - return "addtempkey \tadd validator temp key"; + return "add-temp-key \tadd validator temp key"; } std::string name() const override { return get_name(); @@ -539,10 +550,10 @@ class AddValidatorAdnlAddrQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "addvalidatoraddr"; + return "add-validator-addr"; } static std::string get_help() { - return "addvalidatoraddr \tadd validator ADNL addr"; + return "add-validator-addr \tadd validator ADNL addr"; } std::string name() const override { return get_name(); @@ -563,10 +574,10 @@ class ChangeFullNodeAdnlAddrQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "changefullnodeaddr"; + return "change-full-node-addr"; } static std::string get_help() { - return "changefullnodeaddr \tchanges fullnode ADNL address"; + return "change-full-node-addr \tchanges fullnode ADNL address"; } std::string name() const override { return get_name(); @@ -585,10 +596,10 @@ class AddLiteServerQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "addliteserver"; + return "add-liteserver"; } static std::string get_help() { - return "addliteserver \tadd liteserver"; + return "add-liteserver \tadd liteserver"; } std::string name() const override { return get_name(); @@ -608,10 +619,10 @@ class DelAdnlAddrQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "deladnl"; + return "del-adnl"; } static std::string get_help() { - return "deladnl \tdel unused ADNL addr"; + return "del-adnl \tdel unused ADNL addr"; } std::string name() const override { return get_name(); @@ -630,10 +641,10 @@ class DelDhtIdQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "deldht"; + return "del-dht"; } static std::string get_help() { - return "deldht \tdel unused DHT node"; + return "del-dht \tdel unused DHT node"; } std::string name() const override { return get_name(); @@ -652,10 +663,10 @@ class DelValidatorPermanentKeyQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "delpermkey"; + return "del-perm-key"; } static std::string get_help() { - return "delpermkey \tforce del unused validator permanent key"; + return "del-perm-key \tforce del unused validator permanent key"; } std::string name() const override { return get_name(); @@ -674,10 +685,10 @@ class DelValidatorTempKeyQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "deltempkey"; + return "del-temp-key"; } static std::string get_help() { - return "deltempkey \tforce del unused validator temp key"; + return "del-temp-key \tforce del unused validator temp key"; } std::string name() const override { return get_name(); @@ -697,10 +708,10 @@ class DelValidatorAdnlAddrQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "delvalidatoraddr"; + return "del-validator-addr"; } static std::string get_help() { - return "delvalidatoraddr \tforce del unused validator ADNL addr"; + return "del-validator-addr \tforce del unused validator ADNL addr"; } std::string name() const override { return get_name(); @@ -720,10 +731,10 @@ class GetConfigQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getconfig"; + return "get-config"; } static std::string get_help() { - return "getconfig\tdownloads current config"; + return "get-config\tdownloads current config"; } std::string name() const override { return get_name(); @@ -741,10 +752,10 @@ class SetVerbosityQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "setverbosity"; + return "set-verbosity"; } static std::string get_help() { - return "setverbosity \tchanges verbosity level"; + return "set-verbosity \tchanges verbosity level"; } std::string name() const override { return get_name(); @@ -763,10 +774,10 @@ class GetStatsQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getstats"; + return "get-stats"; } static std::string get_help() { - return "getstats\tprints stats"; + return "get-stats\tprints stats"; } std::string name() const override { return get_name(); @@ -807,10 +818,10 @@ class AddNetworkAddressQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "addaddr"; + return "add-addr"; } static std::string get_help() { - return "addaddr {cats...} {priocats...}\tadds ip address to address list"; + return "add-addr {cats...} {priocats...}\tadds ip address to address list"; } std::string name() const override { return get_name(); @@ -831,10 +842,10 @@ class AddNetworkProxyAddressQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "addproxyaddr"; + return "add-proxy-addr"; } static std::string get_help() { - return "addproxyaddr {cats...} {priocats...}\tadds ip address to address list"; + return "add-proxy-addr {cats...} {priocats...}\tadds ip address to address list"; } std::string name() const override { return get_name(); @@ -858,10 +869,10 @@ class CreateElectionBidQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "createelectionbid"; + return "create-election-bid"; } static std::string get_help() { - return "createelectionbid \tcreate election bid"; + return "create-election-bid \tcreate election bid"; } std::string name() const override { return get_name(); @@ -883,10 +894,10 @@ class CreateProposalVoteQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "createproposalvote"; + return "create-proposal-vote"; } static std::string get_help() { - return "createproposalvote \tcreate proposal vote"; + return "create-proposal-vote \tcreate proposal vote"; } std::string name() const override { return get_name(); @@ -906,10 +917,10 @@ class CreateComplaintVoteQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "createcomplaintvote"; + return "create-complaint-vote"; } static std::string get_help() { - return "createcomplaintvote \tcreate proposal vote"; + return "create-complaint-vote \tcreate proposal vote"; } std::string name() const override { return get_name(); @@ -930,10 +941,10 @@ class CheckDhtServersQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "checkdht"; + return "check-dht"; } static std::string get_help() { - return "checkdht \tchecks, which root DHT servers are accessible from this ADNL addr"; + return "check-dht \tchecks, which root DHT servers are accessible from this ADNL addr"; } std::string name() const override { return get_name(); @@ -952,10 +963,10 @@ class GetOverlaysStatsQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getoverlaysstats"; + return "get-overlays-stats"; } static std::string get_help() { - return "getoverlaysstats\tgets stats for all overlays"; + return "get-overlays-stats\tgets stats for all overlays"; } std::string name() const override { return get_name(); @@ -971,10 +982,10 @@ class GetOverlaysStatsJsonQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getoverlaysstatsjson"; + return "get-overlays-stats-json"; } static std::string get_help() { - return "getoverlaysstatsjson \tgets stats for all overlays and writes to json file"; + return "get-overlays-stats-json \tgets stats for all overlays and writes to json file"; } std::string name() const override { return get_name(); @@ -993,10 +1004,11 @@ class SignCertificateQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "signcert"; + return "sign-cert"; } static std::string get_help() { - return "signcert \tsign overlay certificate by key"; + return "sign-cert \tsign overlay certificate by " + " key"; } std::string name() const override { return get_name(); @@ -1029,10 +1041,10 @@ class ImportCertificateQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "importcert"; + return "import-cert"; } static std::string get_help() { - return "importcert \timport overlay certificate for specific key"; + return "import-cert \timport overlay certificate for specific key"; } std::string name() const override { return get_name(); @@ -1054,10 +1066,11 @@ class SignShardOverlayCertificateQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "signshardoverlaycert"; + return "sign-shard-overlay-cert"; } static std::string get_help() { - return "signshardoverlaycert \tsign certificate for in currently active shard overlay"; + return "sign-shard-overlay-cert : \tsign certificate " + "for in currently active shard overlay"; } std::string name() const override { return get_name(); @@ -1065,8 +1078,7 @@ class SignShardOverlayCertificateQuery : public Query { private: - td::int32 wc_; - td::int64 shard_; + ton::ShardIdFull shard_; td::int32 expire_at_; ton::PublicKeyHash key_; td::uint32 max_size_; @@ -1083,10 +1095,11 @@ class ImportShardOverlayCertificateQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "importshardoverlaycert"; + return "import-shard-overlay-cert"; } static std::string get_help() { - return "importshardoverlaycert \timport certificate for in currently active shard overlay"; + return "import-shard-overlay-cert : \timport certificate for in " + "currently active shard overlay"; } std::string name() const override { return get_name(); @@ -1094,8 +1107,7 @@ class ImportShardOverlayCertificateQuery : public Query { private: - td::int32 wc_; - td::int64 shard_; + ton::ShardIdFull shard_; ton::PublicKeyHash key_; std::string in_file_; }; @@ -1109,10 +1121,10 @@ class GetActorStatsQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getactorstats"; + return "get-actor-stats"; } static std::string get_help() { - return "getactorstats []\tget actor stats and print it either in stdout or in "; + return "get-actor-stats []\tget actor stats and print it either in stdout or in "; } std::string name() const override { return get_name(); @@ -1131,10 +1143,11 @@ class GetPerfTimerStatsJsonQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getperftimerstatsjson"; + return "get-perf-timer-stats-json"; } static std::string get_help() { - return "getperftimerstatsjson \tgets min, average and max event processing time for last 60, 300 and 3600 seconds and writes to json file"; + return "get-perf-timer-stats-json \tgets min, average and max event processing time for last 60, 300 and " + "3600 seconds and writes to json file"; } std::string name() const override { return get_name(); @@ -1153,10 +1166,10 @@ class GetShardOutQueueSizeQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getshardoutqueuesize"; + return "get-shard-out-queue-size"; } static std::string get_help() { - return "getshardoutqueuesize [ ]\treturns number of messages in the " + return "get-shard-out-queue-size : [:]\treturns number of messages in the " "queue of the given shard. Destination shard is optional."; } std::string name() const override { @@ -1165,7 +1178,7 @@ class GetShardOutQueueSizeQuery : public Query { private: ton::BlockId block_id_; - td::optional dest_; + ton::ShardIdFull dest_ = ton::ShardIdFull{ton::workchainInvalid}; }; class SetExtMessagesBroadcastDisabledQuery : public Query { @@ -1177,11 +1190,11 @@ class SetExtMessagesBroadcastDisabledQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "setextmessagesbroadcastdisabled"; + return "set-ext-messages-broadcast-disabled"; } static std::string get_help() { - return "setextmessagesbroadcastdisabled \tdisable broadcasting and rebroadcasting ext messages; value is 0 " - "or 1."; + return "set-ext-messages-broadcast-disabled \tdisable broadcasting and rebroadcasting ext messages; value " + "is 0 or 1."; } std::string name() const override { return get_name(); @@ -1200,10 +1213,10 @@ class AddCustomOverlayQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "addcustomoverlay"; + return "add-custom-overlay"; } static std::string get_help() { - return "addcustomoverlay \tadd custom overlay with config from file "; + return "add-custom-overlay \tadd custom overlay with config from file "; } std::string name() const override { return get_name(); @@ -1222,10 +1235,10 @@ class DelCustomOverlayQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "delcustomoverlay"; + return "del-custom-overlay"; } static std::string get_help() { - return "delcustomoverlay \tdelete custom overlay with name "; + return "del-custom-overlay \tdelete custom overlay with name "; } std::string name() const override { return get_name(); @@ -1244,10 +1257,10 @@ class ShowCustomOverlaysQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "showcustomoverlays"; + return "show-custom-overlays"; } static std::string get_help() { - return "showcustomoverlays\tshow all custom overlays"; + return "show-custom-overlays\tshow all custom overlays"; } std::string name() const override { return get_name(); @@ -1263,10 +1276,10 @@ class SetStateSerializerEnabledQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "setstateserializerenabled"; + return "set-state-serializer-enabled"; } static std::string get_help() { - return "setstateserializerenabled \tdisable or enable persistent state serializer; value is 0 or 1"; + return "set-state-serializer-enabled \tdisable or enable persistent state serializer; value is 0 or 1"; } std::string name() const override { return get_name(); @@ -1285,10 +1298,10 @@ class SetCollatorOptionsJsonQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "setcollatoroptionsjson"; + return "set-collator-options-json"; } static std::string get_help() { - return "setcollatoroptionsjson \tset collator options from file "; + return "set-collator-options-json \tset collator options from file "; } std::string name() const override { return get_name(); @@ -1307,10 +1320,10 @@ class ResetCollatorOptionsQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "resetcollatoroptions"; + return "reset-collator-options"; } static std::string get_help() { - return "resetcollatoroptions\tset collator options to default values"; + return "reset-collator-options\tset collator options to default values"; } std::string name() const override { return get_name(); @@ -1326,10 +1339,10 @@ class GetCollatorOptionsJsonQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getcollatoroptionsjson"; + return "get-collator-options-json"; } static std::string get_help() { - return "getcollatoroptionsjson \tsave current collator options to file "; + return "get-collator-options-json \tsave current collator options to file "; } std::string name() const override { return get_name(); @@ -1348,11 +1361,11 @@ class GetAdnlStatsJsonQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getadnlstatsjson"; + return "get-adnl-stats-json"; } static std::string get_help() { - return "getadnlstatsjson [all]\tsave adnl stats to . all - returns all peers (default - only " - "peers with traffic in the last 10 minutes)"; + return "get-adnl-stats-json [all]\tsave adnl stats to . all - returns all peers (default - " + "only peers with traffic in the last 10 minutes)"; } std::string name() const override { return get_name(); @@ -1372,11 +1385,11 @@ class GetAdnlStatsQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "getadnlstats"; + return "get-adnl-stats"; } static std::string get_help() { - return "getadnlstats [all]\tdisplay adnl stats. all - returns all peers (default - only peers with traffic in the " - "last 10 minutes)"; + return "get-adnl-stats [all]\tdisplay adnl stats. all - returns all peers (default - only peers with traffic in " + "the last 10 minutes)"; } std::string name() const override { return get_name(); @@ -1396,18 +1409,17 @@ class AddShardQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "addshard"; + return "add-shard"; } static std::string get_help() { - return "addshard \tstart monitoring shard"; + return "add-shard :\tstart monitoring shard"; } std::string name() const override { return get_name(); } private: - td::int32 wc_; - td::int64 shard_; + ton::ShardIdFull shard_; }; class DelShardQuery : public Query { @@ -1419,16 +1431,15 @@ class DelShardQuery : public Query { td::Status send() override; td::Status receive(td::BufferSlice data) override; static std::string get_name() { - return "delshard"; + return "del-shard"; } static std::string get_help() { - return "delshard \tstop monitoring shard"; + return "del-shard :\tstop monitoring shard"; } std::string name() const override { return get_name(); } private: - td::int32 wc_; - td::int64 shard_; + ton::ShardIdFull shard_; }; \ No newline at end of file diff --git a/validator-engine-console/validator-engine-console.cpp b/validator-engine-console/validator-engine-console.cpp index 85c92564..234cd6a5 100644 --- a/validator-engine-console/validator-engine-console.cpp +++ b/validator-engine-console/validator-engine-console.cpp @@ -206,9 +206,8 @@ void ValidatorEngineConsole::show_help(std::string command, td::Promisehelp() << "\n"; } } else { - auto it = query_runners_.find(command); - if (it != query_runners_.end()) { - td::TerminalIO::out() << it->second->help() << "\n"; + if (auto query = get_query(command)) { + td::TerminalIO::out() << query->help() << "\n"; } else { td::TerminalIO::out() << "unknown command '" << command << "'\n"; } @@ -232,10 +231,9 @@ void ValidatorEngineConsole::parse_line(td::BufferSlice data) { } auto name = tokenizer.get_token().move_as_ok(); - auto it = query_runners_.find(name); - if (it != query_runners_.end()) { + if (auto query = get_query(name)) { running_queries_++; - it->second->run(actor_id(this), std::move(tokenizer)); + query->run(actor_id(this), std::move(tokenizer)); } else { td::TerminalIO::out() << "unknown command '" << name << "'\n"; } diff --git a/validator-engine-console/validator-engine-console.h b/validator-engine-console/validator-engine-console.h index 7a384276..c802794e 100644 --- a/validator-engine-console/validator-engine-console.h +++ b/validator-engine-console/validator-engine-console.h @@ -57,9 +57,23 @@ class ValidatorEngineConsole : public td::actor::Actor { std::unique_ptr make_callback(); std::map> query_runners_; + std::map alternate_names_; + static std::string simplify_name(std::string name) { + std::erase_if(name, [](char c) { return c == '-'; }); + return name; + } void add_query_runner(std::unique_ptr runner) { auto name = runner->name(); query_runners_[name] = std::move(runner); + alternate_names_[simplify_name(name)] = name; + } + QueryRunner* get_query(std::string name) { + auto it = alternate_names_.find(name); + if (it != alternate_names_.end()) { + name = it->second; + } + auto it2 = query_runners_.find(name); + return it2 == query_runners_.end() ? nullptr : it2->second.get(); } public: From f03f6ce7cad7c367f9ad93157ee63a7eb6483820 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Tue, 17 Dec 2024 11:16:43 +0300 Subject: [PATCH 02/61] Fix check_underflow in some instructions --- crypto/vm/dictops.cpp | 4 ++-- crypto/vm/tonops.cpp | 10 +++++++++- doc/GlobalVersions.md | 5 ++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/crypto/vm/dictops.cpp b/crypto/vm/dictops.cpp index d0ea8daa..02f26fdd 100644 --- a/crypto/vm/dictops.cpp +++ b/crypto/vm/dictops.cpp @@ -566,7 +566,7 @@ int exec_dict_getnear(VmState* st, unsigned args) { int exec_pfx_dict_set(VmState* st, Dictionary::SetMode mode, const char* name) { Stack& stack = st->get_stack(); VM_LOG(st) << "execute PFXDICT" << name; - stack.check_underflow(3); + stack.check_underflow(st->get_global_version() >= 9 ? 4 : 3); int n = stack.pop_smallint_range(PrefixDictionary::max_key_bits); PrefixDictionary dict{stack.pop_maybe_cell(), n}; auto key_slice = stack.pop_cellslice(); @@ -580,7 +580,7 @@ int exec_pfx_dict_set(VmState* st, Dictionary::SetMode mode, const char* name) { int exec_pfx_dict_delete(VmState* st) { Stack& stack = st->get_stack(); VM_LOG(st) << "execute PFXDICTDEL\n"; - stack.check_underflow(2); + stack.check_underflow(st->get_global_version() >= 9 ? 3 : 2); int n = stack.pop_smallint_range(PrefixDictionary::max_key_bits); PrefixDictionary dict{stack.pop_maybe_cell(), n}; auto key_slice = stack.pop_cellslice(); diff --git a/crypto/vm/tonops.cpp b/crypto/vm/tonops.cpp index 6c698df4..d134f80b 100644 --- a/crypto/vm/tonops.cpp +++ b/crypto/vm/tonops.cpp @@ -279,6 +279,7 @@ int exec_get_global_id(VmState* st) { int exec_get_gas_fee(VmState* st) { VM_LOG(st) << "execute GETGASFEE"; Stack& stack = st->get_stack(); + stack.check_underflow(st->get_global_version() >= 9 ? 2 : 0); bool is_masterchain = stack.pop_bool(); td::uint64 gas = stack.pop_long_range(std::numeric_limits::max(), 0); block::GasLimitsPrices prices = util::get_gas_prices(get_unpacked_config_tuple(st), is_masterchain); @@ -289,6 +290,7 @@ int exec_get_gas_fee(VmState* st) { int exec_get_storage_fee(VmState* st) { VM_LOG(st) << "execute GETSTORAGEFEE"; Stack& stack = st->get_stack(); + stack.check_underflow(st->get_global_version() >= 9 ? 4 : 0); bool is_masterchain = stack.pop_bool(); td::int64 delta = stack.pop_long_range(std::numeric_limits::max(), 0); td::uint64 bits = stack.pop_long_range(std::numeric_limits::max(), 0); @@ -302,6 +304,7 @@ int exec_get_storage_fee(VmState* st) { int exec_get_forward_fee(VmState* st) { VM_LOG(st) << "execute GETFORWARDFEE"; Stack& stack = st->get_stack(); + stack.check_underflow(st->get_global_version() >= 9 ? 3 : 0); bool is_masterchain = stack.pop_bool(); td::uint64 bits = stack.pop_long_range(std::numeric_limits::max(), 0); td::uint64 cells = stack.pop_long_range(std::numeric_limits::max(), 0); @@ -320,6 +323,7 @@ int exec_get_precompiled_gas(VmState* st) { int exec_get_original_fwd_fee(VmState* st) { VM_LOG(st) << "execute GETORIGINALFWDFEE"; Stack& stack = st->get_stack(); + stack.check_underflow(st->get_global_version() >= 9 ? 2 : 0); bool is_masterchain = stack.pop_bool(); td::RefInt256 fwd_fee = stack.pop_int_finite(); if (fwd_fee->sgn() < 0) { @@ -333,6 +337,7 @@ int exec_get_original_fwd_fee(VmState* st) { int exec_get_gas_fee_simple(VmState* st) { VM_LOG(st) << "execute GETGASFEESIMPLE"; Stack& stack = st->get_stack(); + stack.check_underflow(st->get_global_version() >= 9 ? 2 : 0); bool is_masterchain = stack.pop_bool(); td::uint64 gas = stack.pop_long_range(std::numeric_limits::max(), 0); block::GasLimitsPrices prices = util::get_gas_prices(get_unpacked_config_tuple(st), is_masterchain); @@ -343,6 +348,7 @@ int exec_get_gas_fee_simple(VmState* st) { int exec_get_forward_fee_simple(VmState* st) { VM_LOG(st) << "execute GETFORWARDFEESIMPLE"; Stack& stack = st->get_stack(); + stack.check_underflow(st->get_global_version() >= 9 ? 3 : 0); bool is_masterchain = stack.pop_bool(); td::uint64 bits = stack.pop_long_range(std::numeric_limits::max(), 0); td::uint64 cells = stack.pop_long_range(std::numeric_limits::max(), 0); @@ -373,6 +379,7 @@ void register_ton_config_ops(OpcodeTable& cp0) { .insert(OpcodeInstr::mksimple(0xf833, 16, "CONFIGOPTPARAM", std::bind(exec_get_config_param, _1, true))) .insert(OpcodeInstr::mksimple(0xf83400, 24, "PREVMCBLOCKS", std::bind(exec_get_prev_blocks_info, _1, 0, "PREVMCBLOCKS"))->require_version(4)) .insert(OpcodeInstr::mksimple(0xf83401, 24, "PREVKEYBLOCK", std::bind(exec_get_prev_blocks_info, _1, 1, "PREVKEYBLOCK"))->require_version(4)) + .insert(OpcodeInstr::mksimple(0xf83402, 24, "PREVMCBLOCKS_100", std::bind(exec_get_prev_blocks_info, _1, 2, "PREVMCBLOCKS_100"))->require_version(9)) .insert(OpcodeInstr::mksimple(0xf835, 16, "GLOBALID", exec_get_global_id)->require_version(4)) .insert(OpcodeInstr::mksimple(0xf836, 16, "GETGASFEE", exec_get_gas_fee)->require_version(6)) .insert(OpcodeInstr::mksimple(0xf837, 16, "GETSTORAGEFEE", exec_get_storage_fee)->require_version(6)) @@ -538,9 +545,10 @@ int exec_hash_ext(VmState* st, unsigned args) { VM_LOG(st) << "execute HASHEXT" << (append ? "A" : "") << (rev ? "R" : "") << " " << (hash_id == 255 ? -1 : hash_id); Stack& stack = st->get_stack(); if (hash_id == 255) { + stack.check_underflow(st->get_global_version() >= 9 ? 2 : 0); hash_id = stack.pop_smallint_range(254); } - int cnt = stack.pop_smallint_range(stack.depth() - 1); + int cnt = stack.pop_smallint_range(stack.depth() - 1 - (st->get_global_version() >= 9 ? (int)append : 0)); Hasher hasher{hash_id}; size_t total_bits = 0; long long gas_consumed = 0; diff --git a/doc/GlobalVersions.md b/doc/GlobalVersions.md index 5db1ab76..11b9b68c 100644 --- a/doc/GlobalVersions.md +++ b/doc/GlobalVersions.md @@ -3,6 +3,7 @@ Global version is a parameter specified in `ConfigParam 8` ([block.tlb](https:// Various features are enabled depending on the global version. ## Version 4 +New features of version 4 are desctibed in detail in [the documentation](https://docs.ton.org/v3/documentation/tvm/changelog/tvm-upgrade-2023-07). ### New TVM instructions * `PREVMCBLOCKS`, `PREVKEYBLOCK` @@ -122,4 +123,6 @@ Operations for working with Merkle proofs, where cells can have non-zero level a ### Other changes - Fix `RAWRESERVE` action with flag `4` (use original balance of the account) by explicitly setting `original_balance` to `balance - msg_balance_remaining`. - Previously it did not work if storage fee was greater than the original balance. -- Jumps to nested continuations of depth more than 8 consume 1 gas for eact subsequent continuation (this does not affect most of TVM code). \ No newline at end of file +- Jumps to nested continuations of depth more than 8 consume 1 gas for eact subsequent continuation (this does not affect most of TVM code). +- Fix exception code in some TVM instructions: now `stk_und` has priority over other error codes. + - `PFXDICTADD`, `PFXDICTSET`, `PFXDICTREPLACE`, `PFXDICTDEL`, `GETGASFEE`, `GETSTORAGEFEE`, `GETFORWARDFEE`, `GETORIGINALFWDFEE`, `GETGASFEESIMPLE`, `GETFORWARDFEESIMPLE`, `HASHEXT` \ No newline at end of file From a01c7e2e75b04f00f41bc1eeec8f7855fed7194e Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Tue, 17 Dec 2024 11:17:54 +0300 Subject: [PATCH 03/61] Add more recent blocks to "previous blocks info" --- crypto/block/mc-config.cpp | 34 +++++++++++++++++++++++++++------- crypto/block/transaction.cpp | 3 ++- doc/GlobalVersions.md | 5 +++++ 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/crypto/block/mc-config.cpp b/crypto/block/mc-config.cpp index 56ee85ae..14881913 100644 --- a/crypto/block/mc-config.cpp +++ b/crypto/block/mc-config.cpp @@ -2292,7 +2292,8 @@ Ref ConfigInfo::lookup_library(td::ConstBitPtr root_hash) const { td::Result> ConfigInfo::get_prev_blocks_info() const { // [ wc:Integer shard:Integer seqno:Integer root_hash:Integer file_hash:Integer] = BlockId; // [ last_mc_blocks:[BlockId...] - // prev_key_block:BlockId ] : PrevBlocksInfo + // prev_key_block:BlockId + // last_mc_blocks_100[BlockId...] ] : PrevBlocksInfo auto block_id_to_tuple = [](const ton::BlockIdExt& block_id) -> vm::Ref { td::RefInt256 shard = td::make_refint(block_id.id.shard); if (shard->sgn() < 0) { @@ -2302,25 +2303,44 @@ td::Result> ConfigInfo::get_prev_blocks_info() const { td::make_refint(block_id.id.seqno), td::bits_to_refint(block_id.root_hash.bits(), 256), td::bits_to_refint(block_id.file_hash.bits(), 256)); }; - std::vector last_mc_blocks; + std::vector tuple; + std::vector last_mc_blocks; last_mc_blocks.push_back(block_id_to_tuple(block_id)); for (ton::BlockSeqno seqno = block_id.id.seqno; seqno > 0 && last_mc_blocks.size() < 16;) { --seqno; - ton::BlockIdExt block_id; - if (!get_old_mc_block_id(seqno, block_id)) { + ton::BlockIdExt id; + if (!get_old_mc_block_id(seqno, id)) { return td::Status::Error("cannot fetch old mc block"); } - last_mc_blocks.push_back(block_id_to_tuple(block_id)); + last_mc_blocks.push_back(block_id_to_tuple(id)); } + tuple.push_back(td::make_cnt_ref>(std::move(last_mc_blocks))); ton::BlockIdExt last_key_block; ton::LogicalTime last_key_block_lt; if (!get_last_key_block(last_key_block, last_key_block_lt)) { return td::Status::Error("cannot fetch last key block"); } - return vm::make_tuple_ref(td::make_cnt_ref>(std::move(last_mc_blocks)), - block_id_to_tuple(last_key_block)); + tuple.push_back(block_id_to_tuple(last_key_block)); + + if (get_global_version() >= 9) { + std::vector last_mc_blocks_100; + for (ton::BlockSeqno seqno = block_id.id.seqno / 100 * 100; last_mc_blocks_100.size() < 16;) { + ton::BlockIdExt id; + if (!get_old_mc_block_id(seqno, id)) { + return td::Status::Error("cannot fetch old mc block"); + } + last_mc_blocks_100.push_back(block_id_to_tuple(id)); + if (seqno < 100) { + break; + } + seqno -= 100; + } + tuple.push_back(td::make_cnt_ref>(std::move(last_mc_blocks_100))); + } + + return td::make_cnt_ref>(std::move(tuple)); } td::optional PrecompiledContractsConfig::get_contract( diff --git a/crypto/block/transaction.cpp b/crypto/block/transaction.cpp index a32bad52..7acd1834 100644 --- a/crypto/block/transaction.cpp +++ b/crypto/block/transaction.cpp @@ -1336,7 +1336,8 @@ Ref Transaction::prepare_vm_c7(const ComputePhaseConfig& cfg) const { // See crypto/block/mc-config.cpp#2223 (get_prev_blocks_info) // [ wc:Integer shard:Integer seqno:Integer root_hash:Integer file_hash:Integer] = BlockId; // [ last_mc_blocks:[BlockId...] - // prev_key_block:BlockId ] : PrevBlocksInfo + // prev_key_block:BlockId + // last_mc_blocks_100:[BlockId...] ] : PrevBlocksInfo // The only context where PrevBlocksInfo (13 parameter of c7) is null is inside emulator // where it need to be set via transaction_emulator_set_prev_blocks_info (see emulator/emulator-extern.cpp) // Inside validator, collator and liteserver checking external message contexts diff --git a/doc/GlobalVersions.md b/doc/GlobalVersions.md index 11b9b68c..6b31e3ee 100644 --- a/doc/GlobalVersions.md +++ b/doc/GlobalVersions.md @@ -114,11 +114,16 @@ Operations for working with Merkle proofs, where cells can have non-zero level a ## Version 9 +### c7 tuple +c7 tuple parameter number **13** (previous blocks info tuple) now has the third element. It contains ids of the 16 last masterchain blocks with seqno divisible by 100. +Example: if the last masterchain block seqno is `19071` then the list contains block ids with seqnos `19000`, `18900`, ..., `17500`. + ### New TVM instructions - `SECP256K1_XONLY_PUBKEY_TWEAK_ADD` (`key tweak - 0 or f x y -1`) - performs [`secp256k1_xonly_pubkey_tweak_add`](https://github.com/bitcoin-core/secp256k1/blob/master/include/secp256k1_extrakeys.h#L120). `key` and `tweak` are 256-bit unsigned integers. 65-byte public key is returned as `uint8 f`, `uint256 x, y` (as in `ECRECOVER`). Gas cost: `1276`. - `mask SETCONTCTRMANY` (`cont - cont'`) - takes continuation, performs the equivalent of `c[i] PUSHCTR SWAP c[i] SETCONTCNR` for each `i` that is set in `mask` (mask is in `0..255`). - `SETCONTCTRMANYX` (`cont mask - cont'`) - same as `SETCONTCTRMANY`, but takes `mask` from stack. +- `PREVMCBLOCKS_100` returns the third element of the previous block info tuple (see above). ### Other changes - Fix `RAWRESERVE` action with flag `4` (use original balance of the account) by explicitly setting `original_balance` to `balance - msg_balance_remaining`. From 0fff1bd8c78e89aee179192e40f98545e825b28b Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Tue, 17 Dec 2024 11:18:34 +0300 Subject: [PATCH 04/61] Fix loading library cell in contract code --- crypto/block/transaction.cpp | 3 +- crypto/fift/lib/Asm.fif | 1 + crypto/smc-envelope/SmartContract.cpp | 4 +- crypto/vm/contops.cpp | 6 +-- crypto/vm/vm.cpp | 54 +++++++++++---------------- crypto/vm/vm.h | 18 ++++----- doc/GlobalVersions.md | 3 +- lite-client/lite-client.cpp | 2 +- utils/opcode-timing.cpp | 4 +- validator/impl/liteserver.cpp | 10 ++++- 10 files changed, 49 insertions(+), 56 deletions(-) diff --git a/crypto/block/transaction.cpp b/crypto/block/transaction.cpp index 7acd1834..92043376 100644 --- a/crypto/block/transaction.cpp +++ b/crypto/block/transaction.cpp @@ -1692,9 +1692,8 @@ bool Transaction::prepare_compute_phase(const ComputePhaseConfig& cfg) { } } } - vm::VmState vm{new_code, std::move(stack), gas, 1, new_data, vm_log, compute_vm_libraries(cfg)}; + vm::VmState vm{new_code, cfg.global_version, std::move(stack), gas, 1, new_data, vm_log, compute_vm_libraries(cfg)}; vm.set_max_data_depth(cfg.max_vm_data_depth); - vm.set_global_version(cfg.global_version); vm.set_c7(prepare_vm_c7(cfg)); // tuple with SmartContractInfo vm.set_chksig_always_succeed(cfg.ignore_chksig); vm.set_stop_on_accept_message(cfg.stop_on_accept_message); diff --git a/crypto/fift/lib/Asm.fif b/crypto/fift/lib/Asm.fif index 39cb759d..976093f8 100644 --- a/crypto/fift/lib/Asm.fif +++ b/crypto/fift/lib/Asm.fif @@ -1312,6 +1312,7 @@ x{F832} @Defop CONFIGPARAM x{F833} @Defop CONFIGOPTPARAM x{F83400} @Defop PREVMCBLOCKS x{F83401} @Defop PREVKEYBLOCK +x{F83402} @Defop PREVMCBLOCKS_100 x{F835} @Defop GLOBALID x{F836} @Defop GETGASFEE x{F837} @Defop GETSTORAGEFEE diff --git a/crypto/smc-envelope/SmartContract.cpp b/crypto/smc-envelope/SmartContract.cpp index 2578a951..c8e438ec 100644 --- a/crypto/smc-envelope/SmartContract.cpp +++ b/crypto/smc-envelope/SmartContract.cpp @@ -222,14 +222,14 @@ SmartContract::Answer run_smartcont(SmartContract::State state, td::Refdump(os, 2); LOG(DEBUG) << "VM stack:\n" << os.str(); } - vm::VmState vm{state.code, std::move(stack), gas, 1, state.data, log}; + int global_version = config ? config->get_global_version() : 0; + vm::VmState vm{state.code, global_version, std::move(stack), gas, 1, state.data, log}; vm.set_c7(std::move(c7)); vm.set_chksig_always_succeed(ignore_chksig); if (!libraries.is_null()) { vm.register_library_collection(libraries); } if (config) { - vm.set_global_version(config->get_global_version()); auto r_limits = config->get_size_limits_config(); if (r_limits.is_ok()) { vm.set_max_data_depth(r_limits.ok().max_vm_data_depth); diff --git a/crypto/vm/contops.cpp b/crypto/vm/contops.cpp index 3b892658..1ccf53da 100644 --- a/crypto/vm/contops.cpp +++ b/crypto/vm/contops.cpp @@ -261,10 +261,10 @@ int exec_runvm_common(VmState* st, unsigned mode) { vm::GasLimits gas{gas_limit, gas_max}; VmStateInterface::Guard guard{nullptr}; // Don't consume gas for creating/loading cells during VM init - VmState new_state{std::move(code), std::move(new_stack), gas, (int)mode & 3, std::move(data), - VmLog{}, std::vector>{}, std::move(c7)}; + VmState new_state{ + std::move(code), st->get_global_version(), std::move(new_stack), gas, (int)mode & 3, std::move(data), + VmLog{}, std::vector>{}, std::move(c7)}; new_state.set_chksig_always_succeed(st->get_chksig_always_succeed()); - new_state.set_global_version(st->get_global_version()); st->run_child_vm(std::move(new_state), with_data, mode & 32, mode & 8, mode & 128, ret_vals); return 0; } diff --git a/crypto/vm/vm.cpp b/crypto/vm/vm.cpp index fb774f80..77d5d8f8 100644 --- a/crypto/vm/vm.cpp +++ b/crypto/vm/vm.cpp @@ -22,6 +22,8 @@ #include "vm/log.h" #include "vm/vm.h" #include "cp0.h" +#include "memo.h" + #include namespace vm { @@ -31,33 +33,8 @@ VmState::VmState() : cp(-1), dispatch(&dummy_dispatch_table), quit0(true, 0), qu init_cregs(); } -VmState::VmState(Ref _code) - : code(std::move(_code)), cp(-1), dispatch(&dummy_dispatch_table), quit0(true, 0), quit1(true, 1) { - ensure_throw(init_cp(0)); - init_cregs(); -} - -VmState::VmState(Ref _code, Ref _stack, int flags, Ref _data, VmLog log, - std::vector> _libraries, Ref init_c7) - : code(std::move(_code)) - , stack(std::move(_stack)) - , cp(-1) - , dispatch(&dummy_dispatch_table) - , quit0(true, 0) - , quit1(true, 1) - , log(log) - , libraries(std::move(_libraries)) - , stack_trace((flags >> 2) & 1) { - ensure_throw(init_cp(0)); - set_c4(std::move(_data)); - if (init_c7.not_null()) { - set_c7(std::move(init_c7)); - } - init_cregs(flags & 1, flags & 2); -} - -VmState::VmState(Ref _code, Ref _stack, const GasLimits& gas, int flags, Ref _data, VmLog log, - std::vector> _libraries, Ref init_c7) +VmState::VmState(Ref _code, int global_version, Ref _stack, const GasLimits& gas, int flags, + Ref _data, VmLog log, std::vector> _libraries, Ref init_c7) : code(std::move(_code)) , stack(std::move(_stack)) , cp(-1) @@ -67,7 +44,8 @@ VmState::VmState(Ref _code, Ref _stack, const GasLimits& gas, , log(log) , gas(gas) , libraries(std::move(_libraries)) - , stack_trace((flags >> 2) & 1) { + , stack_trace((flags >> 2) & 1) + , global_version(global_version) { ensure_throw(init_cp(0)); set_c4(std::move(_data)); if (init_c7.not_null()) { @@ -102,12 +80,24 @@ void VmState::init_cregs(bool same_c3, bool push_0) { } } -Ref VmState::convert_code_cell(Ref code_cell) { +Ref VmState::convert_code_cell(Ref code_cell, int global_version, + const std::vector>& libraries) { if (code_cell.is_null()) { return {}; } - Ref csr{true, NoVmOrd(), code_cell}; - if (csr->is_valid()) { + Ref csr; + if (global_version >= 9) { + // Use DummyVmState instead of this to avoid consuming gas for cell loading + DummyVmState dummy{libraries, global_version}; + Guard guard(&dummy); + try { + csr = load_cell_slice_ref(code_cell); + } catch (VmError&) { // NOLINT(*-empty-catch) + } + } else { + csr = td::Ref{true, NoVmOrd(), code_cell}; + } + if (csr.not_null() && csr->is_valid()) { return csr; } return load_cell_slice_ref(CellBuilder{}.store_ref(std::move(code_cell)).finalize()); @@ -577,6 +567,7 @@ int run_vm_code(Ref code, Ref& stack, int flags, Ref* da GasLimits* gas_limits, std::vector> libraries, Ref init_c7, Ref* actions_ptr, int global_version) { VmState vm{code, + global_version, std::move(stack), gas_limits ? *gas_limits : GasLimits{}, flags, @@ -584,7 +575,6 @@ int run_vm_code(Ref code, Ref& stack, int flags, Ref* da log, std::move(libraries), std::move(init_c7)}; - vm.set_global_version(global_version); int res = vm.run(); stack = vm.get_stack_ref(); if (vm.committed() && data_ptr) { diff --git a/crypto/vm/vm.h b/crypto/vm/vm.h index cf532293..04c5e576 100644 --- a/crypto/vm/vm.h +++ b/crypto/vm/vm.h @@ -164,14 +164,12 @@ class VmState final : public VmStateInterface { bls_pairing_element_gas_price = 11800 }; VmState(); - VmState(Ref _code); - VmState(Ref _code, Ref _stack, int flags = 0, Ref _data = {}, VmLog log = {}, - std::vector> _libraries = {}, Ref init_c7 = {}); - VmState(Ref _code, Ref _stack, const GasLimits& _gas, int flags = 0, Ref _data = {}, + VmState(Ref _code, int global_version, Ref _stack, const GasLimits& _gas, int flags = 0, Ref _data = {}, VmLog log = {}, std::vector> _libraries = {}, Ref init_c7 = {}); - template - VmState(Ref code_cell, Args&&... args) - : VmState(convert_code_cell(std::move(code_cell)), std::forward(args)...) { + VmState(Ref _code, int global_version, Ref _stack, const GasLimits& _gas, int flags = 0, + Ref _data = {}, VmLog log = {}, std::vector> _libraries = {}, Ref init_c7 = {}) + : VmState(convert_code_cell(std::move(_code), global_version, _libraries), global_version, std::move(_stack), + _gas, flags, std::move(_data), std::move(log), _libraries, std::move(init_c7)) { } VmState(const VmState&) = delete; VmState(VmState&&) = default; @@ -345,9 +343,6 @@ class VmState final : public VmStateInterface { int get_global_version() const override { return global_version; } - void set_global_version(int version) { - global_version = version; - } int call(Ref cont); int call(Ref cont, int pass_args, int ret_args = -1); int jump(Ref cont); @@ -382,7 +377,8 @@ class VmState final : public VmStateInterface { } return res; } - static Ref convert_code_cell(Ref code_cell); + static Ref convert_code_cell(Ref code_cell, int global_version, + const std::vector>& libraries); bool try_commit(); void force_commit(); diff --git a/doc/GlobalVersions.md b/doc/GlobalVersions.md index 6b31e3ee..64b2342a 100644 --- a/doc/GlobalVersions.md +++ b/doc/GlobalVersions.md @@ -130,4 +130,5 @@ Example: if the last masterchain block seqno is `19071` then the list contains b - Previously it did not work if storage fee was greater than the original balance. - Jumps to nested continuations of depth more than 8 consume 1 gas for eact subsequent continuation (this does not affect most of TVM code). - Fix exception code in some TVM instructions: now `stk_und` has priority over other error codes. - - `PFXDICTADD`, `PFXDICTSET`, `PFXDICTREPLACE`, `PFXDICTDEL`, `GETGASFEE`, `GETSTORAGEFEE`, `GETFORWARDFEE`, `GETORIGINALFWDFEE`, `GETGASFEESIMPLE`, `GETFORWARDFEESIMPLE`, `HASHEXT` \ No newline at end of file + - `PFXDICTADD`, `PFXDICTSET`, `PFXDICTREPLACE`, `PFXDICTDEL`, `GETGASFEE`, `GETSTORAGEFEE`, `GETFORWARDFEE`, `GETORIGINALFWDFEE`, `GETGASFEESIMPLE`, `GETFORWARDFEESIMPLE`, `HASHEXT` +- Now setting the contract code to a library cell does not consume additional gas on execution of the code. \ No newline at end of file diff --git a/lite-client/lite-client.cpp b/lite-client/lite-client.cpp index dc09ae52..1050e6d2 100644 --- a/lite-client/lite-client.cpp +++ b/lite-client/lite-client.cpp @@ -2227,7 +2227,7 @@ void TestNode::run_smc_method(int mode, ton::BlockIdExt ref_blk, ton::BlockIdExt // auto log = create_vm_log(ctx.error_stream ? &ostream_logger : nullptr); vm::GasLimits gas{gas_limit}; LOG(DEBUG) << "creating VM"; - vm::VmState vm{code, std::move(stack), gas, 1, data, vm::VmLog()}; + vm::VmState vm{code, ton::SUPPORTED_VERSION, std::move(stack), gas, 1, data, vm::VmLog()}; vm.set_c7(liteclient::prepare_vm_c7(info.gen_utime, info.gen_lt, td::make_ref(acc.addr->clone()), balance)); // tuple with SmartContractInfo // vm.incr_stack_trace(1); // enable stack dump after each step diff --git a/utils/opcode-timing.cpp b/utils/opcode-timing.cpp index 876ba109..47171eec 100644 --- a/utils/opcode-timing.cpp +++ b/utils/opcode-timing.cpp @@ -135,8 +135,8 @@ runInfo time_run_vm(td::Slice command, td::Ref stack) { CHECK(stack.is_unique()); try { vm::GasLimits gas_limit; - vm::VmState vm{vm::load_cell_slice_ref(cell), std::move(stack), gas_limit, 0, {}, vm::VmLog{}, {}, c7}; - vm.set_global_version(ton::SUPPORTED_VERSION); + vm::VmState vm{ + vm::load_cell_slice_ref(cell), ton::SUPPORTED_VERSION, std::move(stack), gas_limit, 0, {}, vm::VmLog{}, {}, c7}; std::clock_t cStart = std::clock(); int ret = ~vm.run(); std::clock_t cEnd = std::clock(); diff --git a/validator/impl/liteserver.cpp b/validator/impl/liteserver.cpp index 6bd4e421..723dbfe9 100644 --- a/validator/impl/liteserver.cpp +++ b/validator/impl/liteserver.cpp @@ -1520,11 +1520,17 @@ void LiteQuery::finish_runSmcMethod(td::BufferSlice shard_proof, td::BufferSlice libraries.push_back(acc_libs); } vm::GasLimits gas{gas_limit, gas_limit}; - vm::VmState vm{code, std::move(stack_), gas, 1, std::move(data), vm::VmLog::Null(), std::move(libraries)}; + vm::VmState vm{code, + config->get_global_version(), + std::move(stack_), + gas, + 1, + std::move(data), + vm::VmLog::Null(), + std::move(libraries)}; auto c7 = prepare_vm_c7(gen_utime, gen_lt, td::make_ref(acc.addr->clone()), balance, config.get(), std::move(code), due_payment); vm.set_c7(c7); // tuple with SmartContractInfo - vm.set_global_version(config->get_global_version()); // vm.incr_stack_trace(1); // enable stack dump after each step LOG(INFO) << "starting VM to run GET-method of smart contract " << acc_workchain_ << ":" << acc_addr_.to_hex(); // **** RUN VM **** From 46d4e12b4c78d5d4f37850e2f7910ad4b06d51fe Mon Sep 17 00:00:00 2001 From: neodix42 Date: Tue, 7 Jan 2025 08:15:51 -0800 Subject: [PATCH 05/61] extend generate-random-id utility... (#1462) * improve windows builds * install nasm for openssl compilation on win * install nasm for openssl compilation on win for github * add create-state, proxy-liteserver, rldp-http-proxy, http-proxy, adnl-proxy, dht-server, libtonlibjson.so and libemulator.so to docker image * build new artifacts inside Docker * add files smartcont/auto/* to docker image * build arm64 in docker branch build * improve secp256k1 build * extend generate-random-id with -f parameter (to read addr list from a file) --- utils/generate-random-id.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/utils/generate-random-id.cpp b/utils/generate-random-id.cpp index f606f358..a487ac17 100644 --- a/utils/generate-random-id.cpp +++ b/utils/generate-random-id.cpp @@ -84,6 +84,19 @@ int main(int argc, char *argv[]) { TRY_RESULT_PREFIX_ASSIGN(addr_list, ton::adnl::AdnlAddressList::create(addr_list_tl), "bad addr list: "); return td::Status::OK(); }); + p.add_checked_option('f', "path to file with addr-list", "addr list to sign", [&](td::Slice key) { + if (addr_list) { + return td::Status::Error("duplicate '-f' option"); + } + + td::BufferSlice bs(key); + TRY_RESULT_PREFIX(data, td::read_file(key.str()), "failed to read addr-list: "); + TRY_RESULT_PREFIX(as_json_value, td::json_decode(data.as_slice()), "bad addr list JSON: "); + ton::tl_object_ptr addr_list_tl; + TRY_STATUS_PREFIX(td::from_json(addr_list_tl, std::move(as_json_value)), "bad addr list TL: "); + TRY_RESULT_PREFIX_ASSIGN(addr_list, ton::adnl::AdnlAddressList::create(addr_list_tl), "bad addr list: "); + return td::Status::OK(); + }); p.add_checked_option('i', "network-id", "dht network id (default: -1)", [&](td::Slice key) { if (network_id_opt) { return td::Status::Error("duplicate '-i' option"); From 3540424aa1571a95148eed1ce5c66fee89e6f05c Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 16 Dec 2024 21:19:45 +0300 Subject: [PATCH 06/61] [Tolk] AST-based semantic analysis, get rid of Expr This is a huge refactoring focusing on untangling compiler internals (previously forked from FunC). The goal is to convert AST directly to Op (a kind of IR representation), doing all code analysis at AST level. Noteable changes: - AST-based semantic kernel includes: registering global symbols, scope handling and resolving local/global identifiers, lvalue/rvalue calc and check, implicit return detection, mutability analysis, pure/impure validity checks, simple constant folding - values of `const` variables are calculated NOT based on CodeBlob, but via a newly-introduced AST-based constant evaluator - AST vertices are now inherited from expression/statement/other; expression vertices have common properties (TypeExpr, lvalue/rvalue) - symbol table is rewritten completely, SymDef/SymVal no longer exist, lexer now doesn't need to register identifiers - AST vertices have references to symbols, filled at different stages of pipeline - the remaining "FunC legacy part" is almost unchanged besides Expr which was fully dropped; AST is converted to Ops (IR) directly --- .../tests/allow_post_modification.tolk | 93 +- tolk-tester/tests/cells-slices.tolk | 14 +- tolk-tester/tests/codegen_check_demo.tolk | 2 +- tolk-tester/tests/invalid-call-1.tolk | 2 +- tolk-tester/tests/invalid-call-5.tolk | 2 +- tolk-tester/tests/invalid-const-1.tolk | 8 + tolk-tester/tests/invalid-mutate-1.tolk | 2 +- tolk-tester/tests/invalid-mutate-11.tolk | 2 +- tolk-tester/tests/invalid-mutate-2.tolk | 2 +- tolk-tester/tests/invalid-mutate-3.tolk | 2 +- tolk-tester/tests/invalid-mutate-4.tolk | 2 +- tolk-tester/tests/invalid-mutate-5.tolk | 2 +- tolk-tester/tests/invalid-mutate-6.tolk | 2 +- tolk-tester/tests/invalid-mutate-7.tolk | 2 +- tolk-tester/tests/invalid-mutate-8.tolk | 2 +- tolk-tester/tests/invalid-mutate-9.tolk | 2 +- tolk-tester/tests/invalid-pure-1.tolk | 2 +- tolk-tester/tests/invalid-pure-3.tolk | 1 + tolk-tester/tests/invalid-pure-4.tolk | 16 + tolk-tester/tests/invalid-redefinition-6.tolk | 10 + tolk-tester/tests/invalid-typing-6.tolk | 8 + tolk-tester/tests/mutate-methods.tolk | 21 +- tolk-tester/tests/null-keyword.tolk | 14 +- tolk-tester/tests/self-keyword.tolk | 40 + tolk-tester/tests/try-func.tolk | 106 +- tolk-tester/tests/unreachable-1.tolk | 14 + tolk-tester/tests/unreachable-2.tolk | 22 + tolk-tester/tests/var-apply.tolk | 93 + tolk-tester/tolk-tester.js | 8 +- tolk-tester/tolk-tester.py | 5 +- tolk/CMakeLists.txt | 10 +- tolk/abscode.cpp | 35 +- tolk/analyzer.cpp | 30 +- tolk/asmops.cpp | 22 +- tolk/ast-from-tokens.cpp | 159 +- tolk/ast-from-tokens.h | 6 +- tolk/ast-replacer.h | 135 +- tolk/ast-stringifier.h | 20 +- tolk/ast-visitor.h | 68 +- tolk/ast.cpp | 62 +- tolk/ast.h | 535 ++++-- tolk/builtins.cpp | 187 +- tolk/codegen.cpp | 62 +- tolk/compiler-state.cpp | 13 + tolk/compiler-state.h | 34 +- tolk/constant-evaluator.cpp | 313 ++++ tolk/constant-evaluator.h | 45 + tolk/fwd-declarations.h | 39 + tolk/gen-abscode.cpp | 429 ----- tolk/lexer.cpp | 8 +- tolk/lexer.h | 37 +- tolk/pipe-ast-to-legacy.cpp | 1647 ++++++----------- tolk/pipe-calc-rvalue-lvalue.cpp | 192 ++ tolk/pipe-check-pure-impure.cpp | 107 ++ tolk/pipe-check-rvalue-lvalue.cpp | 172 ++ tolk/pipe-constant-folding.cpp | 68 + tolk/pipe-detect-unreachable.cpp | 127 ++ tolk/pipe-discover-parse-sources.cpp | 10 +- tolk/pipe-find-unused-symbols.cpp | 48 +- tolk/pipe-generate-fif-output.cpp | 188 +- tolk/pipe-infer-check-types.cpp | 524 ++++++ tolk/pipe-refine-lvalue-for-mutate.cpp | 118 ++ tolk/pipe-register-symbols.cpp | 230 +-- tolk/pipe-resolve-symbols.cpp | 272 +++ tolk/pipeline.h | 8 + tolk/src-file.h | 5 +- tolk/symtable.cpp | 199 +- tolk/symtable.h | 261 ++- tolk/tolk.cpp | 8 + tolk/tolk.h | 394 +--- tolk/unify-types.cpp | 2 +- 71 files changed, 4270 insertions(+), 3060 deletions(-) create mode 100644 tolk-tester/tests/invalid-const-1.tolk create mode 100644 tolk-tester/tests/invalid-pure-4.tolk create mode 100644 tolk-tester/tests/invalid-redefinition-6.tolk create mode 100644 tolk-tester/tests/invalid-typing-6.tolk create mode 100644 tolk-tester/tests/unreachable-1.tolk create mode 100644 tolk-tester/tests/unreachable-2.tolk create mode 100644 tolk/constant-evaluator.cpp create mode 100644 tolk/constant-evaluator.h create mode 100644 tolk/fwd-declarations.h delete mode 100644 tolk/gen-abscode.cpp create mode 100644 tolk/pipe-calc-rvalue-lvalue.cpp create mode 100644 tolk/pipe-check-pure-impure.cpp create mode 100644 tolk/pipe-check-rvalue-lvalue.cpp create mode 100644 tolk/pipe-constant-folding.cpp create mode 100644 tolk/pipe-detect-unreachable.cpp create mode 100644 tolk/pipe-infer-check-types.cpp create mode 100644 tolk/pipe-refine-lvalue-for-mutate.cpp create mode 100644 tolk/pipe-resolve-symbols.cpp diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index 5cfa2f3d..5e0ce6b9 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -2,85 +2,112 @@ fun unsafe_tuple(x: X): tuple asm "NOP"; fun inc(x: int, y: int): (int, int) { - return (x + y, y * 10); + return (x + y, y * 10); } fun `~inc`(mutate self: int, y: int): int { - val (newX, newY) = inc(self, y); - self = newX; - return newY; + val (newX, newY) = inc(self, y); + self = newX; + return newY; } +fun eq(v: X): X { return v; } +fun eq2(v: (int, int)) { return v; } +fun mul2(mutate dest: int, v: int): int { dest = v*2; return dest; } +fun multens(mutate self: (int, int), v: (int, int)): (int, int) { var (f, s) = self; var (m1, m2) = v; self = (f*m1, s*m2); return self; } + @method_id(11) fun test_return(x: int): (int, int, int, int, int, int, int) { - return (x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); + return (x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } @method_id(12) fun test_assign(x: int): (int, int, int, int, int, int, int) { - var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x.`~inc`(x / 20), x, x=x*2, x, x+=1, x); - return (x1, x2, x3, x4, x5, x6, x7); + var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x.`~inc`(x / 20), x, x=x*2, x, x+=1, x); + return (x1, x2, x3, x4, x5, x6, x7); } @method_id(13) fun test_tuple(x: int): tuple { - var t: tuple = unsafe_tuple([x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]); - return t; + var t: tuple = unsafe_tuple([x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]); + return t; } @method_id(14) fun test_tuple_assign(x: int): (int, int, int, int, int, int, int) { - var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]; - return (x1, x2, x3, x4, x5, x6, x7); + var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]; + return (x1, x2, x3, x4, x5, x6, x7); } fun foo1(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) { - return (x1, x2, x3, x4, x5, x6, x7); + return (x1, x2, x3, x4, x5, x6, x7); } @method_id(15) fun test_call_1(x: int): (int, int, int, int, int, int, int) { - return foo1(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); + return foo1(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } fun foo2(x1: int, x2: int, x3456: (int, int, int, int), x7: int): (int, int, int, int, int, int, int) { - var (x3: int, x4: int, x5: int, x6: int) = x3456; - return (x1, x2, x3, x4, x5, x6, x7); + var (x3: int, x4: int, x5: int, x6: int) = x3456; + return (x1, x2, x3, x4, x5, x6, x7); } @method_id(16) fun test_call_2(x: int): (int, int, int, int, int, int, int) { - return foo2(x, x.`~inc`(x / 20), (x, x = x * 2, x, x += 1), x); + return foo2(x, x.`~inc`(x / 20), (x, x = x * 2, x, x += 1), x); } fun asm_func(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) -asm - (x4 x5 x6 x7 x1 x2 x3->0 1 2 3 4 5 6) "NOP"; + asm (x4 x5 x6 x7 x1 x2 x3->0 1 2 3 4 5 6) "NOP"; @method_id(17) fun test_call_asm_old(x: int): (int, int, int, int, int, int, int) { - return asm_func(x, x += 1, x, x, x.`~inc`(x / 20), x, x = x * 2); + return asm_func(x, x += 1, x, x, x.`~inc`(x / 20), x, x = x * 2); } @method_id(18) fun test_call_asm_new(x: int): (int, int, int, int, int, int, int) { - return asm_func(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); + return asm_func(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } global xx: int; @method_id(19) -fun test_global(x: int): (int, int, int, int, int, int, int) { - xx = x; - return (xx, xx.`~inc`(xx / 20), xx, xx = xx * 2, xx, xx += 1, xx); +fun test_global(x: int) { + xx = x; + return (x, xx, xx.`~inc`(xx / 20), eq(xx += (x *= 0)), xx = xx * 2, xx, xx += 1, xx, x); } @method_id(20) fun test_if_else(x: int): (int, int, int, int, int) { - if (x > 10) { - return (x.`~inc`(8), x + 1, x = 1, x <<= 3, x); - } else { - xx = 9; - return (x, x.`~inc`(-4), x.`~inc`(-1), x >= 1, x = x + xx); - } + if (x > 10) { + return (x.`~inc`(8), x + 1, x = 1, x <<= 3, x); + } else { + xx = 9; + return (x, x.`~inc`(-4), x.`~inc`(-1), x >= 1, x = x + xx); + } +} + +@method_id(21) +fun test_assign_with_inner(x: int) { + return (x, x += 10, [(x, x += 20, eq(x -= 50), x)], eq2((x, x *= eq(x /= 2)))); +} + +@method_id(22) +fun test_assign_with_mutate(x: int) { + return (x, mul2(mutate x, x += 5), x.`~inc`(mul2(mutate x, x)), x); +} + +@method_id(23) +fun test_assign_tensor(x: (int, int)) { + var fs = (0, 0); + return (x, x = (20, 30), fs = x.multens((1, 2)), fs.multens(multens(mutate x, (-1, -1))), x, fs); +} + +global fs: (int, int); +@method_id(24) +fun test_assign_tensor_global(x: (int, int)) { + fs = (0, 0); + return (x, x = (20, 30), fs = x.multens((1, 2)), fs.multens(multens(mutate x, (-1, -1))), x, fs); } fun main() { @@ -96,9 +123,13 @@ fun main() { @testcase | 16 | 100 | 100 50 105 210 210 211 211 @testcase | 17 | 100 | 101 50 106 212 100 101 101 @testcase | 18 | 100 | 210 210 211 211 100 50 105 -@testcase | 19 | 100 | 100 50 105 210 210 211 211 +@testcase | 19 | 100 | 100 100 50 105 210 210 211 211 0 @testcase | 20 | 80 | 80 89 1 8 8 @testcase | 20 | 9 | 9 -40 -10 -1 13 +@testcase | 21 | 100 | 100 110 [ 110 130 80 80 ] 80 3200 +@testcase | 22 | 100 | 100 210 4200 630 +@testcase | 23 | 1 1 | 1 1 20 30 20 60 -400 -3600 -20 -60 -400 -3600 +@testcase | 24 | 1 1 | 1 1 20 30 20 60 -400 -3600 -20 -60 -400 -3600 @fif_codegen """ @@ -107,5 +138,5 @@ fun main() { inc CALLDICT // self newY }> """ -@code_hash 97139400653362069936987769894397430077752335662822462908581556703209313861576 +@code_hash 33262590582878205026101577472505372101182291690814957175155528952950621243206 */ diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index e1d28b8b..adb5ad22 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -216,15 +216,15 @@ Note, that since 'compute-asm-ltr' became on be default, chaining methods codege """ test6 PROC:<{ // - NEWC // _1 - 1 PUSHINT // _1 _2=1 - SWAP // _2=1 _1 + NEWC // _0 + 1 PUSHINT // _0 _1=1 + SWAP // _1=1 _0 32 STU // _0 - 2 PUSHINT // _0 _6=2 - SWAP // _6=2 _0 + 2 PUSHINT // _0 _5=2 + SWAP // _5=2 _0 32 STU // _0 - 3 PUSHINT // _0 _10=3 - SWAP // _10=3 _0 + 3 PUSHINT // _0 _9=3 + SWAP // _9=3 _0 32 STU // _0 }> """ diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk index 02379540..dc78abf8 100644 --- a/tolk-tester/tests/codegen_check_demo.tolk +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -35,7 +35,7 @@ Below, I just give examples of @fif_codegen tag: """ main PROC:<{ // s - 17 PUSHINT // s _3=17 + 17 PUSHINT // s _1=17 OVER // s z=17 t WHILE:<{ ... diff --git a/tolk-tester/tests/invalid-call-1.tolk b/tolk-tester/tests/invalid-call-1.tolk index 1c32422e..57a33c4b 100644 --- a/tolk-tester/tests/invalid-call-1.tolk +++ b/tolk-tester/tests/invalid-call-1.tolk @@ -5,5 +5,5 @@ fun main() { /** @compilation_should_fail The message is weird now, but later I'll rework error messages anyway. -@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??3 with int +@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??2 with int */ diff --git a/tolk-tester/tests/invalid-call-5.tolk b/tolk-tester/tests/invalid-call-5.tolk index 89ab026a..32905cd7 100644 --- a/tolk-tester/tests/invalid-call-5.tolk +++ b/tolk-tester/tests/invalid-call-5.tolk @@ -8,6 +8,6 @@ fun main() { /** @compilation_should_fail -@stderr rvalue expected +@stderr `_` can't be used as a value; it's a placeholder for a left side of assignment @stderr inc(_) */ diff --git a/tolk-tester/tests/invalid-const-1.tolk b/tolk-tester/tests/invalid-const-1.tolk new file mode 100644 index 00000000..10e8303a --- /dev/null +++ b/tolk-tester/tests/invalid-const-1.tolk @@ -0,0 +1,8 @@ +fun main() { + return 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999; +} + +/** +@compilation_should_fail +@stderr invalid integer constant + */ diff --git a/tolk-tester/tests/invalid-mutate-1.tolk b/tolk-tester/tests/invalid-mutate-1.tolk index 237940fc..280d1e99 100644 --- a/tolk-tester/tests/invalid-mutate-1.tolk +++ b/tolk-tester/tests/invalid-mutate-1.tolk @@ -7,5 +7,5 @@ fun cantAssignToVal() { /** @compilation_should_fail -@stderr modifying an immutable variable `x` +@stderr modifying immutable variable `x` */ diff --git a/tolk-tester/tests/invalid-mutate-11.tolk b/tolk-tester/tests/invalid-mutate-11.tolk index 9f2c2601..dfc69851 100644 --- a/tolk-tester/tests/invalid-mutate-11.tolk +++ b/tolk-tester/tests/invalid-mutate-11.tolk @@ -4,5 +4,5 @@ fun load32(self: slice): int { /** @compilation_should_fail -@stderr modifying `self` (call a mutating method), which is immutable by default +@stderr modifying `self`, which is immutable by default */ diff --git a/tolk-tester/tests/invalid-mutate-2.tolk b/tolk-tester/tests/invalid-mutate-2.tolk index 7501fdaf..71afe730 100644 --- a/tolk-tester/tests/invalid-mutate-2.tolk +++ b/tolk-tester/tests/invalid-mutate-2.tolk @@ -6,5 +6,5 @@ fun cantAssignToVal() { /** @compilation_should_fail -@stderr modifying an immutable variable `x` +@stderr modifying immutable variable `x` */ diff --git a/tolk-tester/tests/invalid-mutate-3.tolk b/tolk-tester/tests/invalid-mutate-3.tolk index c49973f7..d556c9ed 100644 --- a/tolk-tester/tests/invalid-mutate-3.tolk +++ b/tolk-tester/tests/invalid-mutate-3.tolk @@ -7,5 +7,5 @@ fun cantAssignToConst() { /** @compilation_should_fail -@stderr modifying an immutable variable `op_increase` +@stderr modifying immutable constant */ diff --git a/tolk-tester/tests/invalid-mutate-4.tolk b/tolk-tester/tests/invalid-mutate-4.tolk index f25a707c..5f2c111d 100644 --- a/tolk-tester/tests/invalid-mutate-4.tolk +++ b/tolk-tester/tests/invalid-mutate-4.tolk @@ -10,5 +10,5 @@ fun cantPassToMutatingFunction() { /** @compilation_should_fail -@stderr modifying an immutable variable `myVal` +@stderr modifying immutable variable `myVal` */ diff --git a/tolk-tester/tests/invalid-mutate-5.tolk b/tolk-tester/tests/invalid-mutate-5.tolk index fd8d1192..2b282cf0 100644 --- a/tolk-tester/tests/invalid-mutate-5.tolk +++ b/tolk-tester/tests/invalid-mutate-5.tolk @@ -9,6 +9,6 @@ fun cantCallMutatingMethod(c: cell) { /** @compilation_should_fail -@stderr modifying an immutable variable `s` (call a mutating method) +@stderr modifying immutable variable `s` @stderr s.loadUint */ diff --git a/tolk-tester/tests/invalid-mutate-6.tolk b/tolk-tester/tests/invalid-mutate-6.tolk index bb577ae4..749d9cab 100644 --- a/tolk-tester/tests/invalid-mutate-6.tolk +++ b/tolk-tester/tests/invalid-mutate-6.tolk @@ -11,6 +11,6 @@ fun cantCallMutatingFunctionWithImmutable() { /** @compilation_should_fail -@stderr modifying an immutable variable `op_increase` (call a mutating function) +@stderr modifying immutable constant @stderr inc(mutate op_increase) */ diff --git a/tolk-tester/tests/invalid-mutate-7.tolk b/tolk-tester/tests/invalid-mutate-7.tolk index 5b6b6afe..de3bce45 100644 --- a/tolk-tester/tests/invalid-mutate-7.tolk +++ b/tolk-tester/tests/invalid-mutate-7.tolk @@ -10,6 +10,6 @@ fun cantCallMutatingFunctionWithRvalue() { /** @compilation_should_fail -@stderr lvalue expected (call a mutating function) +@stderr literal can not be used as lvalue @stderr incBoth(mutate x, mutate 30) */ diff --git a/tolk-tester/tests/invalid-mutate-8.tolk b/tolk-tester/tests/invalid-mutate-8.tolk index 0dd7c568..9b14e28f 100644 --- a/tolk-tester/tests/invalid-mutate-8.tolk +++ b/tolk-tester/tests/invalid-mutate-8.tolk @@ -6,5 +6,5 @@ fun cantRedefImmutable() { /** @compilation_should_fail -@stderr modifying an immutable variable `x` (left side of assignment) +@stderr `redef` for immutable variable */ diff --git a/tolk-tester/tests/invalid-mutate-9.tolk b/tolk-tester/tests/invalid-mutate-9.tolk index 7e79052e..3489a288 100644 --- a/tolk-tester/tests/invalid-mutate-9.tolk +++ b/tolk-tester/tests/invalid-mutate-9.tolk @@ -4,6 +4,6 @@ fun increment(self: int) { /** @compilation_should_fail -@stderr modifying `self` (left side of assignment), which is immutable by default +@stderr modifying `self`, which is immutable by default @stderr probably, you want to declare `mutate self` */ diff --git a/tolk-tester/tests/invalid-pure-1.tolk b/tolk-tester/tests/invalid-pure-1.tolk index 5baa3292..4f0e9142 100644 --- a/tolk-tester/tests/invalid-pure-1.tolk +++ b/tolk-tester/tests/invalid-pure-1.tolk @@ -4,7 +4,7 @@ fun f_pure(): int { return f_impure(); } -fun f_impure(): int {} +fun f_impure(): int { return 0; } fun main(): int { return f_pure(); diff --git a/tolk-tester/tests/invalid-pure-3.tolk b/tolk-tester/tests/invalid-pure-3.tolk index f64b81ce..31d4f021 100644 --- a/tolk-tester/tests/invalid-pure-3.tolk +++ b/tolk-tester/tests/invalid-pure-3.tolk @@ -2,6 +2,7 @@ fun validate_input(input: cell): (int, int) { var (x, y, z, correct) = calculateCellSize(input, 10); assert(correct) throw 102; + return (x, y); } @pure diff --git a/tolk-tester/tests/invalid-pure-4.tolk b/tolk-tester/tests/invalid-pure-4.tolk new file mode 100644 index 00000000..868498f6 --- /dev/null +++ b/tolk-tester/tests/invalid-pure-4.tolk @@ -0,0 +1,16 @@ +global set: int; + +@pure +fun someF(): int { + var set redef = 0; + return set; +} + +/** +@compilation_should_fail +@stderr +""" +an impure operation in a pure function +var set +""" +*/ diff --git a/tolk-tester/tests/invalid-redefinition-6.tolk b/tolk-tester/tests/invalid-redefinition-6.tolk new file mode 100644 index 00000000..e6b087c6 --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-6.tolk @@ -0,0 +1,10 @@ +const s1 = "asdf"; + +fun main() { + var s1 redef = "d"; +} + +/** +@compilation_should_fail +@stderr `redef` for unknown variable + */ diff --git a/tolk-tester/tests/invalid-typing-6.tolk b/tolk-tester/tests/invalid-typing-6.tolk new file mode 100644 index 00000000..dcdab5f1 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-6.tolk @@ -0,0 +1,8 @@ +fun failWhenTernaryConditionNotInt(cs: slice) { + return cs ? 1 : 0; +} + +/** +@compilation_should_fail +@stderr condition of ternary ?: operator must be an integer + */ diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index b9184ca9..73a6591b 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -118,12 +118,19 @@ fun updateTwoItems(mutate self: (int, int), byValue: int) { self = (first + byValue, second + byValue); } +global t107_1: int; +global t107_2: int; + @method_id(107) fun testMutableTensor() { var t = (40, 50); t.updateTwoItems(10); updateTwoItems(mutate t, 10); - return t; + t107_1 = 1; + t107_2 = 2; + (t107_1, t107_2).updateTwoItems(10); + updateTwoItems(mutate (t107_1, t107_2), 10); + return (t, t107_1, t107_2); } @pure @@ -278,7 +285,7 @@ fun main(){} @testcase | 104 | | 1 2 3 @testcase | 105 | | 5 5 110 @testcase | 106 | | 160 110 -@testcase | 107 | | 60 70 +@testcase | 107 | | 60 70 21 22 @testcase | 110 | | 320 @testcase | 111 | | 55 55 @testcase | 112 | | [ 1 13 3 23 33 ] @@ -300,7 +307,7 @@ fun main(){} ... incrementTwoInPlace CALLDICT // x y sum1 -ROT - 10 PUSHINT // sum1 x y _9=10 + 10 PUSHINT // sum1 x y _8=10 incrementTwoInPlace CALLDICT // sum1 x y sum2 s1 s3 s0 XCHG3 // x y sum1 sum2 }> @@ -310,8 +317,8 @@ fun main(){} """ load_next PROC:<{ // cs - 32 LDI // _1 cs - SWAP // cs _1 + 32 LDI // _3 cs + SWAP // cs _3 }> """ @@ -319,7 +326,7 @@ fun main(){} """ testStoreUintPureUnusedResult PROC:<{ // - 0 PUSHINT // _12=0 + 0 PUSHINT // _11=0 }> """ @@ -330,7 +337,7 @@ fun main(){} NEWC // b STIX // _2 DROP // - 0 PUSHINT // _12=0 + 0 PUSHINT // _11=0 }> """ diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index cdfe5acf..8fcf2584 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -145,14 +145,14 @@ fun main() { """ test7 PROC:<{ ... - LDOPTREF // b _20 _19 + LDOPTREF // b _18 _17 DROP // b c - ISNULL // b _13 - 10 MULCONST // b _15 - SWAP // _15 b - ISNULL // _15 _16 - 0 EQINT // _15 _17 - ADD // _18 + ISNULL // b _11 + 10 MULCONST // b _13 + SWAP // _13 b + ISNULL // _13 _14 + 0 EQINT // _13 _15 + ADD // _16 }> """ */ diff --git a/tolk-tester/tests/self-keyword.tolk b/tolk-tester/tests/self-keyword.tolk index a339e7d0..ba779454 100644 --- a/tolk-tester/tests/self-keyword.tolk +++ b/tolk-tester/tests/self-keyword.tolk @@ -158,6 +158,44 @@ fun testNotMutatingChainableSelfMutateAnother(initial: int) { return (arg, c108, c109, x); } +fun pickG110(mutate self: int, mutate pushTo: tuple): self { + self += 10; + pushTo.tuplePush(c110); + return self; +} + +global tup110: tuple; +global c110: int; + +@method_id(110) +fun testMutateGlobalsLValue(init: int) { + c110 = init; + tup110 = createEmptyTuple(); + c110.incChained().incChained().pickG110(mutate tup110).incChained().pickG110(mutate tup110).incChained(); + return (c110, tup110); +} + +fun myTuplePush(mutate self: tuple, value: T): self { + self.tuplePush(value); + return self; +} + +fun myTupleAt(self: tuple, idx: int): T { + return self.tupleAt(idx); +} + +global tup111: tuple; + +@method_id(111) +fun testForallFunctionsWithSelf() { + var t = createEmptyTuple(); + tup111 = createEmptyTuple(); + t.myTuplePush(10); + tup111.myTuplePush(1).myTuplePush(2).myTuplePush(3); + return (t.myTupleAt(0), tup111.myTupleAt(tup111.tupleSize() - 1), tup111); +} + + fun main() { } @@ -179,6 +217,8 @@ fun main() { } @testcase | 109 | 200 | 200 3 1 2 @testcase | 109 | 100 | 100 0 0 1 @testcase | 109 | 102 | 102 2 1 2 +@testcase | 110 | 0 | 24 [ 2 13 ] +@testcase | 111 | | 10 3 [ 1 2 3 ] @fif_codegen """ diff --git a/tolk-tester/tests/try-func.tolk b/tolk-tester/tests/try-func.tolk index 7963a850..5ce03ff1 100644 --- a/tolk-tester/tests/try-func.tolk +++ b/tolk-tester/tests/try-func.tolk @@ -1,7 +1,6 @@ fun unsafeGetInt(any: X): int asm "NOP"; -@method_id(11) fun foo(x: int): int { try { if (x == 7) { @@ -14,7 +13,6 @@ fun foo(x: int): int { } @inline -@method_id(12) fun foo_inline(x: int): int { try { assert(!(x == 7)) throw 44; @@ -25,7 +23,6 @@ fun foo_inline(x: int): int { } @inline_ref -@method_id(13) fun foo_inlineref(x: int): int { try { if (x == 7) { throw (44, 2); } @@ -35,26 +32,25 @@ fun foo_inlineref(x: int): int { } } -@method_id(1) +@method_id(101) fun test(x: int, y: int, z: int): int { y = foo(y); return x * 100 + y * 10 + z; } -@method_id(2) +@method_id(102) fun test_inline(x: int, y: int, z: int): int { y = foo_inline(y); return x * 100 + y * 10 + z; } -@method_id(3) +@method_id(103) fun test_inlineref(x: int, y: int, z: int): int { y = foo_inlineref(y); return x * 100 + y * 10 + z; } @inline -@method_id(14) fun foo_inline_big( x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int @@ -69,7 +65,7 @@ fun foo_inline_big( } } -@method_id(4) +@method_id(104) fun test_inline_big(x: int, y: int, z: int): int { y = foo_inline_big( y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, @@ -77,7 +73,6 @@ fun test_inline_big(x: int, y: int, z: int): int { return x * 1000000 + y * 1000 + z; } -@method_id(15) fun foo_big( x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int @@ -92,7 +87,7 @@ fun foo_big( } } -@method_id(5) +@method_id(105) fun test_big(x: int, y: int, z: int): int { y = foo_big( y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, @@ -100,7 +95,7 @@ fun test_big(x: int, y: int, z: int): int { return x * 1000000 + y * 1000 + z; } -@method_id(16) +@method_id(106) fun test_catch_into_same(x: int): int { var code = x; try { @@ -112,7 +107,7 @@ fun test_catch_into_same(x: int): int { } -@method_id(17) +@method_id(107) fun test_catch_into_same_2(x: int): int { var code = x; try { @@ -124,28 +119,77 @@ fun test_catch_into_same_2(x: int): int { return code; } +global after046: int; + +// this bug existed in FunC and is fixed in v0.4.6 +fun bug_046_internal(op: int) { + if (op == 1) { + return; + } else if (op == 2) { + return; + } else { + throw 1; + } +} + +fun bug_046_called() { + after046 = 0; + try { + bug_046_internal(1337); + after046 = 1; // shouldn't be called + } catch(n) { + return; + } + return; +} + +@method_id(108) +fun bug_046_entrypoint() { + bug_046_called(); + return after046; +} + +global g_reg: int; + +@method_id(109) +fun test109(): (int, int) { + var l_reg = 10; + g_reg = 10; + try { + // note, that regardless of assignment, an exception RESTORES them to previous (to 10) + // it's very unexpected, but is considered to be a TVM feature, not a bug + g_reg = 999; + l_reg = 999; + bug_046_internal(999); // throws + } catch { + } + // returns (10,10) because of an exception, see a comment above + return (g_reg, l_reg); +} + fun main() { } /** - method_id | in | out -@testcase | 1 | 1 2 3 | 123 -@testcase | 1 | 3 8 9 | 389 -@testcase | 1 | 3 7 9 | 329 -@testcase | 2 | 1 2 3 | 123 -@testcase | 2 | 3 8 9 | 389 -@testcase | 2 | 3 7 9 | 329 -@testcase | 3 | 1 2 3 | 123 -@testcase | 3 | 3 8 9 | 389 -@testcase | 3 | 3 7 9 | 329 -@testcase | 4 | 4 8 9 | 4350009 -@testcase | 4 | 4 7 9 | 4001009 -@testcase | 5 | 4 8 9 | 4350009 -@testcase | 5 | 4 7 9 | 4001009 -@testcase | 16 | 5 | 5 -@testcase | 16 | 20 | 44 -@testcase | 17 | 5 | 5 -@testcase | 17 | 20 | 20 + method_id | in | out +@testcase | 101 | 1 2 3 | 123 +@testcase | 101 | 3 8 9 | 389 +@testcase | 101 | 3 7 9 | 329 +@testcase | 102 | 1 2 3 | 123 +@testcase | 102 | 3 8 9 | 389 +@testcase | 102 | 3 7 9 | 329 +@testcase | 103 | 1 2 3 | 123 +@testcase | 103 | 3 8 9 | 389 +@testcase | 103 | 3 7 9 | 329 +@testcase | 104 | 4 8 9 | 4350009 +@testcase | 104 | 4 7 9 | 4001009 +@testcase | 105 | 4 8 9 | 4350009 +@testcase | 105 | 4 7 9 | 4001009 +@testcase | 106 | 5 | 5 +@testcase | 106 | 20 | 44 +@testcase | 107 | 5 | 5 +@testcase | 107 | 20 | 20 +@testcase | 108 | | 0 -@code_hash 73240939343624734070640372352271282883450660826541545137654364443860257436623 +@code_hash 39307974281105539319288356721945232226028429128341177951717392648324358675585 */ diff --git a/tolk-tester/tests/unreachable-1.tolk b/tolk-tester/tests/unreachable-1.tolk new file mode 100644 index 00000000..5b3cb1b0 --- /dev/null +++ b/tolk-tester/tests/unreachable-1.tolk @@ -0,0 +1,14 @@ +fun main(x: int) { + if (x) { + x = 10;;;;; + return x;;; + x = 20; + } + return -1; +} + +/** +@testcase | 0 | 1 | 10 +@stderr warning: unreachable code +@stderr x = 20; + */ diff --git a/tolk-tester/tests/unreachable-2.tolk b/tolk-tester/tests/unreachable-2.tolk new file mode 100644 index 00000000..aeadd8c6 --- /dev/null +++ b/tolk-tester/tests/unreachable-2.tolk @@ -0,0 +1,22 @@ +fun main(x: int) { + if (x) { + if (x > 10) { + return 1; // throw 1; + } else if (true) { + return -1; + } else { + return 2; // throw 2; + } + } else { + {{return 1;} + x = 30;} + } + assert(false, 10); +} + +/** +@testcase | 0 | 1 | -1 +@stderr warning: unreachable code +@stderr assert(false, 10) +@stderr x = 30 + */ diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk index 9bee862a..6a84a4fa 100644 --- a/tolk-tester/tests/var-apply.tolk +++ b/tolk-tester/tests/var-apply.tolk @@ -15,8 +15,101 @@ fun testVarApply1() { return (s.loadInt(32), s.loadInt(32)); } +@inline +fun my_throw_always() { + throw 1000; +} + +@inline +fun get_raiser() { + return my_throw_always; +} + +@method_id(102) +fun testVarApplyWithoutSavingResult() { + try { + var raiser = get_raiser(); + raiser(); // `some_var()` is always impure, the compiler has no considerations about its runtime value + return 0; + } catch (code) { + return code; + } +} + +@inline +fun sum(a: int, b: int) { + assert(a + b < 24, 1000); + return a + b; +} + +@inline +fun mul(a: int, b: int) { + assert(a * b < 24, 1001); + return a * b; +} + +fun demo_handler(op: int, query_id: int, a: int, b: int): int { + if (op == 0xF2) { + val func = query_id % 2 == 0 ? sum : mul; + val result = func(a, b); + return 0; // result not used, we test that func is nevertheless called + } + if (op == 0xF4) { + val func = query_id % 2 == 0 ? sum : mul; + val result = func(a, b); + return result; + } + return -1; +} + +@method_id(103) +fun testVarApplyInTernary() { + var t: tuple = createEmptyTuple(); + try { + t.tuplePush(demo_handler(0xF2, 122, 100, 200)); + } catch(code) { + t.tuplePush(code); + } + try { + t.tuplePush(demo_handler(0xF4, 122, 100, 200)); + } catch(code) { + t.tuplePush(code); + } + try { + t.tuplePush(demo_handler(0xF2, 122, 10, 10)); + } catch(code) { + t.tuplePush(code); + } + try { + t.tuplePush(demo_handler(0xF2, 123, 10, 10)); + } catch(code) { + t.tuplePush(code); + } + return t; +} + +fun always_throw2(x: int) { + throw 239 + x; +} + +global global_f: int -> (); + +@method_id(104) +fun testGlobalVarApply() { + try { + global_f = always_throw2; + global_f(1); + return 0; + } catch (code) { + return code; + } +} + fun main() {} /** @testcase | 101 | | 1 2 +@testcase | 102 | | 1000 +@testcase | 103 | | [ 1000 1000 0 1001 ] +@testcase | 104 | | 240 */ diff --git a/tolk-tester/tolk-tester.js b/tolk-tester/tolk-tester.js index 2a3eb776..c7e71021 100644 --- a/tolk-tester/tolk-tester.js +++ b/tolk-tester/tolk-tester.js @@ -347,11 +347,11 @@ class TolkTestFile { if (exit_code === 0 && this.compilation_should_fail) throw new TolkCompilationSucceededError("compilation succeeded, but it should have failed") - if (exit_code !== 0 && this.compilation_should_fail) { - for (let should_include of this.stderr_includes) - should_include.check(stderr) + for (let should_include of this.stderr_includes) // @stderr is used to check errors and warnings + should_include.check(stderr) + + if (exit_code !== 0 && this.compilation_should_fail) return - } if (exit_code !== 0 && !this.compilation_should_fail) throw new TolkCompilationFailedError(`tolk exit_code = ${exit_code}`, stderr) diff --git a/tolk-tester/tolk-tester.py b/tolk-tester/tolk-tester.py index 261ab496..0b3c774c 100644 --- a/tolk-tester/tolk-tester.py +++ b/tolk-tester/tolk-tester.py @@ -327,9 +327,10 @@ class TolkTestFile: if exit_code == 0 and self.compilation_should_fail: raise TolkCompilationSucceededError("compilation succeeded, but it should have failed") + for should_include in self.stderr_includes: # @stderr is used to check errors and warnings + should_include.check(stderr) + if exit_code != 0 and self.compilation_should_fail: - for should_include in self.stderr_includes: - should_include.check(stderr) return if exit_code != 0 and not self.compilation_should_fail: diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index d2decea7..0c3e7c63 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -7,14 +7,22 @@ set(TOLK_SOURCE compiler-state.cpp ast.cpp ast-from-tokens.cpp + constant-evaluator.cpp pipe-discover-parse-sources.cpp pipe-register-symbols.cpp + pipe-resolve-symbols.cpp + pipe-calc-rvalue-lvalue.cpp + pipe-detect-unreachable.cpp + pipe-infer-check-types.cpp + pipe-refine-lvalue-for-mutate.cpp + pipe-check-rvalue-lvalue.cpp + pipe-check-pure-impure.cpp + pipe-constant-folding.cpp pipe-ast-to-legacy.cpp pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp unify-types.cpp abscode.cpp - gen-abscode.cpp analyzer.cpp asmops.cpp builtins.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index c1add683..253e8012 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -25,17 +25,6 @@ namespace tolk { * */ -TmpVar::TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc) - : v_type(_type), idx(_idx), sym_idx(sym_idx), coord(0), where(loc) { - if (!_type) { - v_type = TypeExpr::new_hole(); - } -} - -void TmpVar::set_location(SrcLocation loc) { - where = loc; -} - void TmpVar::dump(std::ostream& os) const { show(os); os << " : " << v_type << " (width "; @@ -55,8 +44,8 @@ void TmpVar::dump(std::ostream& os) const { } void TmpVar::show(std::ostream& os, int omit_idx) const { - if (!is_unnamed()) { - os << G.symbols.get_name(sym_idx); + if (v_sym) { + os << v_sym->name; if (omit_idx >= 2) { return; } @@ -149,10 +138,6 @@ void VarDescr::set_const(std::string value) { val = _Const; } -void VarDescr::set_const_nan() { - set_const(td::make_refint()); -} - void VarDescr::operator|=(const VarDescr& y) { val &= y.val; if (is_int_const() && y.is_int_const() && cmp(int_const, y.int_const) != 0) { @@ -273,7 +258,7 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx case _Call: os << pfx << dis << "CALL: "; show_var_list(os, left, vars); - os << " := " << (fun_ref ? fun_ref->name() : "(null)") << " "; + os << " := " << (f_sym ? f_sym->name : "(null)") << " "; if ((mode & 4) && args.size() == right.size()) { show_var_list(os, args, vars); } else { @@ -332,11 +317,11 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx case _GlobVar: os << pfx << dis << "GLOBVAR "; show_var_list(os, left, vars); - os << " := " << (fun_ref ? fun_ref->name() : "(null)") << std::endl; + os << " := " << (g_sym ? g_sym->name : "(null)") << std::endl; break; case _SetGlob: os << pfx << dis << "SETGLOB "; - os << (fun_ref ? fun_ref->name() : "(null)") << " := "; + os << (g_sym ? g_sym->name : "(null)") << " := "; show_var_list(os, right, vars); os << std::endl; break; @@ -458,22 +443,22 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation location) { - vars.emplace_back(var_cnt, var_type, sym_idx, location); +var_idx_t CodeBlob::create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation location) { + vars.emplace_back(var_cnt, var_type, v_sym, location); return var_cnt++; } -bool CodeBlob::import_params(FormalArgList arg_list) { +bool CodeBlob::import_params(FormalArgList&& arg_list) { if (var_cnt || in_var_cnt) { return false; } std::vector list; for (const auto& par : arg_list) { TypeExpr* arg_type; - SymDef* arg_sym; + const LocalVarData* arg_sym; SrcLocation arg_loc; std::tie(arg_type, arg_sym, arg_loc) = par; - list.push_back(create_var(arg_type, arg_sym ? arg_sym->sym_idx : 0, arg_loc)); + list.push_back(create_var(arg_type, arg_sym, arg_loc)); } emplace_back(loc, Op::_Import, list); in_var_cnt = var_cnt; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 719df9b7..495ae03b 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -46,7 +46,7 @@ int CodeBlob::split_vars(bool strict) { if (k != 1) { var.coord = ~((n << 8) + k); for (int i = 0; i < k; i++) { - auto v = create_var(comp_types[i], vars[j].sym_idx, vars[j].where); + auto v = create_var(comp_types[i], vars[j].v_sym, vars[j].where); tolk_assert(v == n + i); tolk_assert(vars[v].idx == v); vars[v].coord = ((int)j << 8) + i + 1; @@ -732,15 +732,18 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { } case _Call: { prepare_args(values); - auto func = dynamic_cast(fun_ref->value); - if (func) { + if (!f_sym->is_regular_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { res.emplace_back(i); } AsmOpList tmp; - func->compile(tmp, res, args, where); // abstract interpretation of res := f (args) + if (f_sym->is_asm_function()) { + std::get(f_sym->body)->compile(tmp); // abstract interpretation of res := f (args) + } else { + std::get(f_sym->body)->compile(tmp, res, args, where); + } int j = 0; for (var_idx_t i : left) { values.add_newval(i).set_value(res[j++]); @@ -878,27 +881,10 @@ bool Op::set_noreturn(bool flag) { return flag; } -void Op::set_impure(const CodeBlob &code) { - // todo calling this function with `code` is a bad design (flags are assigned after Op is constructed) - // later it's better to check this somewhere in code.emplace_back() - if (code.flags & CodeBlob::_ForbidImpure) { - throw ParseError(where, "an impure operation in a pure function"); - } +void Op::set_impure_flag() { flags |= _Impure; } -void Op::set_impure(const CodeBlob &code, bool flag) { - if (flag) { - if (code.flags & CodeBlob::_ForbidImpure) { - throw ParseError(where, "an impure operation in a pure function"); - } - flags |= _Impure; - } else { - flags &= ~_Impure; - } -} - - bool Op::mark_noreturn() { switch (cl) { case _Nop: diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp index 8db75091..547922da 100644 --- a/tolk/asmops.cpp +++ b/tolk/asmops.cpp @@ -52,10 +52,10 @@ std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg) { } } -AsmOp AsmOp::Const(int arg, std::string push_op, td::RefInt256 origin) { +AsmOp AsmOp::Const(int arg, const std::string& push_op) { std::ostringstream os; os << arg << ' ' << push_op; - return AsmOp::Const(os.str(), origin); + return AsmOp::Const(os.str()); } AsmOp AsmOp::make_stk2(int a, int b, const char* str, int delta) { @@ -161,36 +161,36 @@ AsmOp AsmOp::UnTuple(int a) { return AsmOp::Custom(os.str(), 1, a); } -AsmOp AsmOp::IntConst(td::RefInt256 x) { +AsmOp AsmOp::IntConst(const td::RefInt256& x) { if (x->signed_fits_bits(8)) { - return AsmOp::Const(dec_string(x) + " PUSHINT", x); + return AsmOp::Const(dec_string(x) + " PUSHINT"); } if (!x->is_valid()) { - return AsmOp::Const("PUSHNAN", x); + return AsmOp::Const("PUSHNAN"); } int k = is_pos_pow2(x); if (k >= 0) { - return AsmOp::Const(k, "PUSHPOW2", x); + return AsmOp::Const(k, "PUSHPOW2"); } k = is_pos_pow2(x + 1); if (k >= 0) { - return AsmOp::Const(k, "PUSHPOW2DEC", x); + return AsmOp::Const(k, "PUSHPOW2DEC"); } k = is_pos_pow2(-x); if (k >= 0) { - return AsmOp::Const(k, "PUSHNEGPOW2", x); + return AsmOp::Const(k, "PUSHNEGPOW2"); } if (!x->mod_pow2_short(23)) { - return AsmOp::Const(dec_string(x) + " PUSHINTX", x); + return AsmOp::Const(dec_string(x) + " PUSHINTX"); } - return AsmOp::Const(dec_string(x) + " PUSHINT", x); + return AsmOp::Const(dec_string(x) + " PUSHINT"); } AsmOp AsmOp::BoolConst(bool f) { return AsmOp::Const(f ? "TRUE" : "FALSE"); } -AsmOp AsmOp::Parse(std::string custom_op) { +AsmOp AsmOp::Parse(const std::string& custom_op) { if (custom_op == "NOP") { return AsmOp::Nop(); } else if (custom_op == "SWAP") { diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 1a1d199e..22d64442 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -75,7 +75,7 @@ static void fire_error_mix_and_or_no_parenthesis(SrcLocation loc, std::string_vi // the only way to suppress this error for the programmer is to use parenthesis // (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator}, // that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis) -static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyV lhs, AnyV rhs) { +static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { // handle "flags & 0xFF != 0" (rhs = "0xFF != 0") if (rhs->type == ast_binary_operator && is_comparison_binary_op(rhs->as()->tok)) { fire_error_lower_precedence(loc, operator_name, rhs->as()->operator_name); @@ -90,7 +90,7 @@ static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operat // similar to above, but detect potentially invalid usage of && and || // since anyway, using parenthesis when both && and || occur in the same expression, // && and || have equal operator precedence in Tolk -static void diagnose_and_or_precedence(SrcLocation loc, AnyV lhs, TokenType rhs_tok, std::string_view rhs_operator_name) { +static void diagnose_and_or_precedence(SrcLocation loc, AnyExprV lhs, TokenType rhs_tok, std::string_view rhs_operator_name) { if (auto lhs_op = lhs->try_as()) { // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") if (is_bitwise_binary_op(lhs_op->tok) && is_bitwise_binary_op(rhs_tok) && lhs_op->tok != rhs_tok) { @@ -105,7 +105,7 @@ static void diagnose_and_or_precedence(SrcLocation loc, AnyV lhs, TokenType rhs_ } // diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) -static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyV rhs) { +static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyExprV rhs) { if (rhs->type == ast_binary_operator && is_add_or_sub_binary_op(rhs->as()->tok)) { fire_error_lower_precedence(loc, bitshift_operator_name, rhs->as()->operator_name); } @@ -122,7 +122,7 @@ static void fire_error_FunC_style_var_declaration(Lexer& lex) { } // replace (a == null) and similar to isNull(a) (call of a built-in function) -static AnyV maybe_replace_eq_null_with_isNull_call(V v) { +static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) { bool has_null = v->get_lhs()->type == ast_null_keyword || v->get_rhs()->type == ast_null_keyword; bool replace = has_null && (v->tok == tok_eq || v->tok == tok_neq); if (!replace) { @@ -130,9 +130,9 @@ static AnyV maybe_replace_eq_null_with_isNull_call(V v) { } auto v_ident = createV(v->loc, "__isNull"); // built-in function - AnyV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); - AnyV v_arg = createV(v->loc, v_null, false); - AnyV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_arg})); + AnyExprV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); + AnyExprV v_arg = createV(v->loc, v_null, false); + AnyExprV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_arg})); if (v->tok == tok_neq) { v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); } @@ -230,7 +230,7 @@ static TypeExpr* parse_type(Lexer& lex, V genericsT_list) { return res; } -AnyV parse_expr(Lexer& lex); +AnyExprV parse_expr(Lexer& lex); static AnyV parse_parameter(Lexer& lex, V genericsT_list, bool is_first) { SrcLocation loc = lex.cur_location(); @@ -256,7 +256,6 @@ static AnyV parse_parameter(Lexer& lex, V genericsT_list, bo } else if (lex.tok() != tok_underscore) { lex.unexpected("parameter name"); } - auto v_ident = createV(lex.cur_location(), param_name); lex.next(); // parameter type after colon, also mandatory (even explicit ":auto") @@ -269,7 +268,7 @@ static AnyV parse_parameter(Lexer& lex, V genericsT_list, bo throw ParseError(loc, "`self` parameter must be strictly typed"); } - return createV(loc, v_ident, param_type, declared_as_mutate); + return createV(loc, param_name, param_type, declared_as_mutate); } static AnyV parse_global_var_declaration(Lexer& lex, const std::vector>& annotations) { @@ -316,7 +315,7 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector parse_parameter_list(Lexer& lex, V(loc, expr, passed_as_mutate); } static V parse_argument_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); - std::vector args; + std::vector args; lex.expect(tok_oppar, "`(`"); if (lex.tok() != tok_clpar) { args.push_back(parse_argument(lex)); @@ -371,7 +370,7 @@ static V parse_argument_list(Lexer& lex) { } // parse (expr) / [expr] / identifier / number -static AnyV parse_expr100(Lexer& lex) { +static AnyExprV parse_expr100(Lexer& lex) { SrcLocation loc = lex.cur_location(); switch (lex.tok()) { case tok_oppar: { @@ -380,12 +379,12 @@ static AnyV parse_expr100(Lexer& lex) { lex.next(); return createV(loc, {}); } - AnyV first = parse_expr(lex); + AnyExprV first = parse_expr(lex); if (lex.tok() == tok_clpar) { lex.next(); - return createV(loc, first); + return createV(loc, first); } - std::vector items(1, first); + std::vector items(1, first); while (lex.tok() == tok_comma) { lex.next(); items.emplace_back(parse_expr(lex)); @@ -399,7 +398,7 @@ static AnyV parse_expr100(Lexer& lex) { lex.next(); return createV(loc, {}); } - std::vector items(1, parse_expr(lex)); + std::vector items(1, parse_expr(lex)); while (lex.tok() == tok_comma) { lex.next(); items.emplace_back(parse_expr(lex)); @@ -408,9 +407,13 @@ static AnyV parse_expr100(Lexer& lex) { return createV(loc, std::move(items)); } case tok_int_const: { - std::string_view int_val = lex.cur_str(); + std::string_view orig_str = lex.cur_str(); + td::RefInt256 intval = td::string_to_int256(static_cast(orig_str)); + if (intval.is_null() || !intval->signed_fits_bits(257)) { + lex.error("invalid integer constant"); + } lex.next(); - return createV(loc, int_val); + return createV(loc, std::move(intval), orig_str); } case tok_string_const: { std::string_view str_val = lex.cur_str(); @@ -459,8 +462,8 @@ static AnyV parse_expr100(Lexer& lex) { } // parse E(args) -static AnyV parse_expr90(Lexer& lex) { - AnyV res = parse_expr100(lex); +static AnyExprV parse_expr90(Lexer& lex) { + AnyExprV res = parse_expr100(lex); if (lex.tok() == tok_oppar) { return createV(res->loc, res, parse_argument_list(lex)); } @@ -468,8 +471,8 @@ static AnyV parse_expr90(Lexer& lex) { } // parse E.method(...) (left-to-right) -static AnyV parse_expr80(Lexer& lex) { - AnyV lhs = parse_expr90(lex); +static AnyExprV parse_expr80(Lexer& lex) { + AnyExprV lhs = parse_expr90(lex); while (lex.tok() == tok_dot) { SrcLocation loc = lex.cur_location(); lex.next(); @@ -482,27 +485,27 @@ static AnyV parse_expr80(Lexer& lex) { } // parse ! ~ - + E (unary) -static AnyV parse_expr75(Lexer& lex) { +static AnyExprV parse_expr75(Lexer& lex) { TokenType t = lex.tok(); if (t == tok_logical_not || t == tok_bitwise_not || t == tok_minus || t == tok_plus) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr75(lex); + AnyExprV rhs = parse_expr75(lex); return createV(loc, operator_name, t, rhs); } return parse_expr80(lex); } // parse E * / % ^/ ~/ E (left-to-right) -static AnyV parse_expr30(Lexer& lex) { - AnyV lhs = parse_expr75(lex); +static AnyExprV parse_expr30(Lexer& lex) { + AnyExprV lhs = parse_expr75(lex); TokenType t = lex.tok(); while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr75(lex); + AnyExprV rhs = parse_expr75(lex); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); } @@ -510,14 +513,14 @@ static AnyV parse_expr30(Lexer& lex) { } // parse E + - E (left-to-right) -static AnyV parse_expr20(Lexer& lex) { - AnyV lhs = parse_expr30(lex); +static AnyExprV parse_expr20(Lexer& lex) { + AnyExprV lhs = parse_expr30(lex); TokenType t = lex.tok(); while (t == tok_minus || t == tok_plus) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr30(lex); + AnyExprV rhs = parse_expr30(lex); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); } @@ -525,14 +528,14 @@ static AnyV parse_expr20(Lexer& lex) { } // parse E << >> ~>> ^>> E (left-to-right) -static AnyV parse_expr17(Lexer& lex) { - AnyV lhs = parse_expr20(lex); +static AnyExprV parse_expr17(Lexer& lex) { + AnyExprV lhs = parse_expr20(lex); TokenType t = lex.tok(); while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr20(lex); + AnyExprV rhs = parse_expr20(lex); diagnose_addition_in_bitshift(loc, operator_name, rhs); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); @@ -541,14 +544,14 @@ static AnyV parse_expr17(Lexer& lex) { } // parse E == < > <= >= != <=> E (left-to-right) -static AnyV parse_expr15(Lexer& lex) { - AnyV lhs = parse_expr17(lex); +static AnyExprV parse_expr15(Lexer& lex) { + AnyExprV lhs = parse_expr17(lex); TokenType t = lex.tok(); if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr17(lex); + AnyExprV rhs = parse_expr17(lex); lhs = createV(loc, operator_name, t, lhs, rhs); if (t == tok_eq || t == tok_neq) { lhs = maybe_replace_eq_null_with_isNull_call(lhs->as()); @@ -558,14 +561,14 @@ static AnyV parse_expr15(Lexer& lex) { } // parse E & | ^ E (left-to-right) -static AnyV parse_expr14(Lexer& lex) { - AnyV lhs = parse_expr15(lex); +static AnyExprV parse_expr14(Lexer& lex) { + AnyExprV lhs = parse_expr15(lex); TokenType t = lex.tok(); while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr15(lex); + AnyExprV rhs = parse_expr15(lex); diagnose_bitwise_precedence(loc, operator_name, lhs, rhs); diagnose_and_or_precedence(loc, lhs, t, operator_name); lhs = createV(loc, operator_name, t, lhs, rhs); @@ -575,14 +578,14 @@ static AnyV parse_expr14(Lexer& lex) { } // parse E && || E (left-to-right) -static AnyV parse_expr13(Lexer& lex) { - AnyV lhs = parse_expr14(lex); +static AnyExprV parse_expr13(Lexer& lex) { + AnyExprV lhs = parse_expr14(lex); TokenType t = lex.tok(); while (t == tok_logical_and || t == tok_logical_or) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr14(lex); + AnyExprV rhs = parse_expr14(lex); diagnose_and_or_precedence(loc, lhs, t, operator_name); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); @@ -591,8 +594,8 @@ static AnyV parse_expr13(Lexer& lex) { } // parse E = += -= E and E ? E : E (right-to-left) -static AnyV parse_expr10(Lexer& lex) { - AnyV lhs = parse_expr13(lex); +static AnyExprV parse_expr10(Lexer& lex) { + AnyExprV lhs = parse_expr13(lex); TokenType t = lex.tok(); if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || @@ -601,36 +604,36 @@ static AnyV parse_expr10(Lexer& lex) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr10(lex); + AnyExprV rhs = parse_expr10(lex); return createV(loc, operator_name, t, lhs, rhs); } if (t == tok_question) { SrcLocation loc = lex.cur_location(); lex.next(); - AnyV when_true = parse_expr10(lex); + AnyExprV when_true = parse_expr10(lex); lex.expect(tok_colon, "`:`"); - AnyV when_false = parse_expr10(lex); + AnyExprV when_false = parse_expr10(lex); return createV(loc, lhs, when_true, when_false); } return lhs; } -AnyV parse_expr(Lexer& lex) { +AnyExprV parse_expr(Lexer& lex) { return parse_expr10(lex); } AnyV parse_statement(Lexer& lex); -static AnyV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { +static AnyExprV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_oppar) { lex.next(); - AnyV first = parse_var_declaration_lhs(lex, is_immutable); + AnyExprV first = parse_var_declaration_lhs(lex, is_immutable); if (lex.tok() == tok_clpar) { lex.next(); - return createV(loc, first); + return createV(loc, first); } - std::vector args(1, first); + std::vector args(1, first); while (lex.tok() == tok_comma) { lex.next(); args.push_back(parse_var_declaration_lhs(lex, is_immutable)); @@ -640,7 +643,7 @@ static AnyV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { } if (lex.tok() == tok_opbracket) { lex.next(); - std::vector args(1, parse_var_declaration_lhs(lex, is_immutable)); + std::vector args(1, parse_var_declaration_lhs(lex, is_immutable)); while (lex.tok() == tok_comma) { lex.next(); args.push_back(parse_var_declaration_lhs(lex, is_immutable)); @@ -679,12 +682,12 @@ static AnyV parse_local_vars_declaration(Lexer& lex) { bool is_immutable = lex.tok() == tok_val; lex.next(); - AnyV lhs = parse_var_declaration_lhs(lex, is_immutable); + AnyExprV lhs = parse_var_declaration_lhs(lex, is_immutable); if (lex.tok() != tok_assign) { lex.error("variables declaration must be followed by assignment: `var xxx = ...`"); } lex.next(); - AnyV assigned_val = parse_expr(lex); + AnyExprV assigned_val = parse_expr(lex); if (lex.tok() == tok_comma) { lex.error("multiple declarations are not allowed, split variables on separate lines"); @@ -708,7 +711,7 @@ static V parse_sequence(Lexer& lex) { static AnyV parse_return_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_return, "`return`"); - AnyV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void) + AnyExprV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void) ? createV(lex.cur_location(), {}) : parse_expr(lex); lex.expect(tok_semicolon, "`;`"); @@ -720,7 +723,7 @@ static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { lex.expect(tok_if, "`if`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); // replace if(!expr) with ifnot(expr) (this should be done later, but for now, let this be right at parsing time) if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { @@ -748,7 +751,7 @@ static AnyV parse_repeat_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_repeat, "`repeat`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); V body = parse_sequence(lex); return createV(loc, cond, body); @@ -758,7 +761,7 @@ static AnyV parse_while_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_while, "`while`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); V body = parse_sequence(lex); return createV(loc, cond, body); @@ -770,13 +773,13 @@ static AnyV parse_do_while_statement(Lexer& lex) { V body = parse_sequence(lex); lex.expect(tok_while, "`while`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); lex.expect(tok_semicolon, "`;`"); return createV(loc, body, cond); } -static AnyV parse_catch_variable(Lexer& lex) { +static AnyExprV parse_catch_variable(Lexer& lex) { SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_identifier) { std::string_view var_name = lex.cur_str(); @@ -794,7 +797,7 @@ static AnyV parse_throw_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_throw, "`throw`"); - AnyV thrown_code, thrown_arg; + AnyExprV thrown_code, thrown_arg; if (lex.tok() == tok_oppar) { // throw (code) or throw (code, arg) lex.next(); thrown_code = parse_expr(lex); @@ -802,12 +805,12 @@ static AnyV parse_throw_statement(Lexer& lex) { lex.next(); thrown_arg = parse_expr(lex); } else { - thrown_arg = createV(loc); + thrown_arg = createV(loc); } lex.expect(tok_clpar, "`)`"); } else { // throw code thrown_code = parse_expr(lex); - thrown_arg = createV(loc); + thrown_arg = createV(loc); } lex.expect(tok_semicolon, "`;`"); @@ -819,8 +822,8 @@ static AnyV parse_assert_statement(Lexer& lex) { lex.expect(tok_assert, "`assert`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); - AnyV thrown_code; + AnyExprV cond = parse_expr(lex); + AnyExprV thrown_code; if (lex.tok() == tok_comma) { // assert(cond, code) lex.next(); thrown_code = parse_expr(lex); @@ -840,7 +843,7 @@ static AnyV parse_try_catch_statement(Lexer& lex) { lex.expect(tok_try, "`try`"); V try_body = parse_sequence(lex); - std::vector catch_args; + std::vector catch_args; lex.expect(tok_catch, "`catch`"); SrcLocation catch_loc = lex.cur_location(); if (lex.tok() == tok_oppar) { @@ -889,13 +892,13 @@ AnyV parse_statement(Lexer& lex) { case tok_semicolon: { SrcLocation loc = lex.cur_location(); lex.next(); - return createV(loc); + return createV(loc); } case tok_break: case tok_continue: lex.error("break/continue from loops are not supported yet"); default: { - AnyV expr = parse_expr(lex); + AnyExprV expr = parse_expr(lex); lex.expect(tok_semicolon, "`;`"); return expr; } @@ -976,7 +979,7 @@ static V parse_annotation(Lexer& lex) { if (lex.tok() == tok_oppar) { SrcLocation loc_args = lex.cur_location(); lex.next(); - std::vector args; + std::vector args; args.push_back(parse_expr(lex)); while (lex.tok() == tok_comma) { lex.next(); @@ -1038,7 +1041,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector v_param_list = parse_parameter_list(lex, genericsT_list)->as(); - bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->get_identifier()->name == "self"; + bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->param_name == "self"; int n_mutate_params = v_param_list->get_mutate_params_count(); TypeExpr* ret_type = nullptr; @@ -1069,7 +1072,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vectorget_params()) { if (v_param->as()->declared_as_mutate) { - ret_tensor_items.emplace_back(v_param->as()->param_type); + ret_tensor_items.emplace_back(v_param->as()->declared_type); } } ret_tensor_items.emplace_back(ret_type ? ret_type : TypeExpr::new_hole()); @@ -1079,7 +1082,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector(lex.cur_location()); + v_body = createV(lex.cur_location()); lex.next(); lex.expect(tok_semicolon, "`;`"); } else if (lex.tok() == tok_opbrace) { @@ -1098,7 +1101,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vectoris_entrypoint = is_entrypoint; f_declaration->genericsT_list = genericsT_list; f_declaration->marked_as_get_method = is_get_method; - f_declaration->marked_as_builtin = v_body->type == ast_empty; + f_declaration->marked_as_builtin = v_body->type == ast_empty_statement; f_declaration->accepts_self = accepts_self; f_declaration->returns_self = returns_self; @@ -1142,7 +1145,7 @@ static AnyV parse_tolk_required_version(Lexer& lex) { loc.show_warning("the contract is written in Tolk v" + semver + ", but you use Tolk compiler v" + TOLK_VERSION + "; probably, it will lead to compilation errors or hash changes"); } - return createV(loc, tok_eq, semver); // semicolon is not necessary + return createV(loc, semver); // semicolon is not necessary } static AnyV parse_import_statement(Lexer& lex) { diff --git a/tolk/ast-from-tokens.h b/tolk/ast-from-tokens.h index 5f380c56..39574f9c 100644 --- a/tolk/ast-from-tokens.h +++ b/tolk/ast-from-tokens.h @@ -16,12 +16,10 @@ */ #pragma once -#include "src-file.h" +#include "fwd-declarations.h" namespace tolk { -struct ASTNodeBase; - -const ASTNodeBase* parse_src_file_to_ast(const SrcFile* file); +AnyV parse_src_file_to_ast(const SrcFile* file); } // namespace tolk diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h index 478994e8..45f4c638 100644 --- a/tolk/ast-replacer.h +++ b/tolk/ast-replacer.h @@ -35,25 +35,39 @@ namespace tolk { class ASTReplacer { protected: - GNU_ATTRIBUTE_ALWAYS_INLINE static AnyV replace_children(const ASTNodeLeaf* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE static AnyExprV replace_children(const ASTExprLeaf* v) { return v; } - GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeUnary* v) { - auto* v_mutable = const_cast(v); + GNU_ATTRIBUTE_ALWAYS_INLINE AnyExprV replace_children(const ASTExprUnary* v) { + auto* v_mutable = const_cast(v); v_mutable->child = replace(v_mutable->child); return v_mutable; } - GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeBinary* v) { - auto* v_mutable = const_cast(v); + GNU_ATTRIBUTE_ALWAYS_INLINE AnyExprV replace_children(const ASTExprBinary* v) { + auto* v_mutable = const_cast(v); v_mutable->lhs = replace(v->lhs); v_mutable->rhs = replace(v->rhs); return v_mutable; } - GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeVararg* v) { - auto* v_mutable = const_cast(v); + GNU_ATTRIBUTE_ALWAYS_INLINE AnyExprV replace_children(const ASTExprVararg* v) { + auto* v_mutable = const_cast(v); + for (AnyExprV& child : v_mutable->children) { + child = replace(child); + } + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTStatementUnary* v) { + auto* v_mutable = const_cast(v); + v_mutable->child = replace(v_mutable->child); + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTStatementVararg* v) { + auto* v_mutable = const_cast(v); for (AnyV& child : v_mutable->children) { child = replace(child); } @@ -64,44 +78,50 @@ public: virtual ~ASTReplacer() = default; virtual AnyV replace(AnyV v) = 0; + virtual AnyExprV replace(AnyExprV v) = 0; }; class ASTReplacerInFunctionBody : public ASTReplacer { protected: using parent = ASTReplacerInFunctionBody; - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } - AnyV replace(AnyV v) final { + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + + AnyExprV replace(AnyExprV v) final { switch (v->type) { - case ast_empty: return replace(v->as()); - case ast_parenthesized_expr: return replace(v->as()); + case ast_empty_expression: return replace(v->as()); + case ast_parenthesized_expression: return replace(v->as()); case ast_tensor: return replace(v->as()); case ast_tensor_square: return replace(v->as()); case ast_identifier: return replace(v->as()); @@ -110,12 +130,23 @@ protected: case ast_bool_const: return replace(v->as()); case ast_null_keyword: return replace(v->as()); case ast_self_keyword: return replace(v->as()); + case ast_argument: return replace(v->as()); + case ast_argument_list: return replace(v->as()); case ast_function_call: return replace(v->as()); case ast_dot_method_call: return replace(v->as()); case ast_underscore: return replace(v->as()); case ast_unary_operator: return replace(v->as()); case ast_binary_operator: return replace(v->as()); case ast_ternary_operator: return replace(v->as()); + case ast_local_var: return replace(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::replace"); + } + } + + AnyV replace(AnyV v) final { + switch (v->type) { + case ast_empty_statement: return replace(v->as()); case ast_return_statement: return replace(v->as()); case ast_sequence: return replace(v->as()); case ast_repeat_statement: return replace(v->as()); @@ -125,11 +156,13 @@ protected: case ast_assert_statement: return replace(v->as()); case ast_try_catch_statement: return replace(v->as()); case ast_if_statement: return replace(v->as()); - case ast_local_var: return replace(v->as()); case ast_local_vars_declaration: return replace(v->as()); case ast_asm_body: return replace(v->as()); - default: - throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::visit"); + default: { + // be very careful, don't forget to handle all statements (not expressions) above! + AnyExprV as_expr = reinterpret_cast(v); + return replace(as_expr); + } } } @@ -139,22 +172,18 @@ public: } }; -class ASTReplacerAllFunctionsInFile : public ASTReplacerInFunctionBody { -protected: - using parent = ASTReplacerAllFunctionsInFile; - - virtual bool should_enter_function(V v) = 0; - -public: - void start_replacing_in_file(V v_file) { - for (AnyV v : v_file->get_toplevel_declarations()) { - if (auto v_function = v->try_as()) { - if (should_enter_function(v_function)) { - replace(v_function->get_body()); +template +void replace_ast_of_all_functions(const AllSrcFiles& all_files) { + for (const SrcFile* file : all_files) { + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + if (v_func->is_regular_function()) { + BodyReplacerT visitor; + visitor.start_replacing_in_function(v_func); } } } } -}; +} } // namespace tolk diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index 759873b0..cc91371c 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -31,8 +31,9 @@ namespace tolk { class ASTStringifier final : public ASTVisitor { constexpr static std::pair name_pairs[] = { - {ast_empty, "ast_empty"}, - {ast_parenthesized_expr, "ast_parenthesized_expr"}, + {ast_empty_statement, "ast_empty_statement"}, + {ast_empty_expression, "ast_empty_expression"}, + {ast_parenthesized_expression, "ast_parenthesized_expression"}, {ast_tensor, "ast_tensor"}, {ast_tensor_square, "ast_tensor_square"}, {ast_identifier, "ast_identifier"}, @@ -115,7 +116,7 @@ class ASTStringifier final : public ASTVisitor { case ast_identifier: return static_cast(v->as()->name); case ast_int_const: - return static_cast(v->as()->int_val); + return static_cast(v->as()->orig_str); case ast_string_const: if (char modifier = v->as()->modifier) { return "\"" + static_cast(v->as()->str_val) + "\"" + std::string(1, modifier); @@ -146,21 +147,21 @@ class ASTStringifier final : public ASTVisitor { return annotation_kinds[static_cast(v->as()->kind)].second; case ast_parameter: { std::ostringstream os; - os << v->as()->param_type; - return static_cast(v->as()->get_identifier()->name) + ": " + os.str(); + os << v->as()->declared_type; + return static_cast(v->as()->param_name) + ": " + os.str(); } case ast_function_declaration: { std::string param_names; for (int i = 0; i < v->as()->get_num_params(); i++) { if (!param_names.empty()) param_names += ","; - param_names += v->as()->get_param(i)->get_identifier()->name; + param_names += v->as()->get_param(i)->param_name; } return "fun " + static_cast(v->as()->get_identifier()->name) + "(" + param_names + ")"; } case ast_local_var: { std::ostringstream os; - os << v->as()->declared_type; + os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); if (auto v_ident = v->as()->get_identifier()->try_as()) { return static_cast(v_ident->name) + ":" + os.str(); } @@ -202,8 +203,9 @@ public: void visit(AnyV v) override { switch (v->type) { - case ast_empty: return handle_vertex(v->as()); - case ast_parenthesized_expr: return handle_vertex(v->as()); + case ast_empty_statement: return handle_vertex(v->as()); + case ast_empty_expression: return handle_vertex(v->as()); + case ast_parenthesized_expression: return handle_vertex(v->as()); case ast_tensor: return handle_vertex(v->as()); case ast_tensor_square: return handle_vertex(v->as()); case ast_identifier: return handle_vertex(v->as()); diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index d0a7bfaf..a67f6800 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -37,20 +37,40 @@ namespace tolk { class ASTVisitor { protected: - GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTNodeLeaf* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTExprLeaf* v) { static_cast(v); } - GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeUnary* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTExprUnary* v) { visit(v->child); } - GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeBinary* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTExprBinary* v) { visit(v->lhs); visit(v->rhs); } - GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeVararg* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTExprVararg* v) { + for (AnyExprV child : v->children) { + visit(child); + } + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTStatementUnary* v) { + visit(v->child); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTStatementVararg* v) { + for (AnyV child : v->children) { + visit(child); + } + } + + GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTOtherLeaf* v) { + static_cast(v); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTOtherVararg* v) { for (AnyV child : v->children) { visit(child); } @@ -66,8 +86,9 @@ class ASTVisitorFunctionBody : public ASTVisitor { protected: using parent = ASTVisitorFunctionBody; - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -76,8 +97,10 @@ protected: virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -87,6 +110,8 @@ protected: virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -95,8 +120,9 @@ protected: void visit(AnyV v) final { switch (v->type) { - case ast_empty: return visit(v->as()); - case ast_parenthesized_expr: return visit(v->as()); + case ast_empty_statement: return visit(v->as()); + case ast_empty_expression: return visit(v->as()); + case ast_parenthesized_expression: return visit(v->as()); case ast_tensor: return visit(v->as()); case ast_tensor_square: return visit(v->as()); case ast_identifier: return visit(v->as()); @@ -105,6 +131,8 @@ protected: case ast_bool_const: return visit(v->as()); case ast_null_keyword: return visit(v->as()); case ast_self_keyword: return visit(v->as()); + case ast_argument: return visit(v->as()); + case ast_argument_list: return visit(v->as()); case ast_function_call: return visit(v->as()); case ast_dot_method_call: return visit(v->as()); case ast_underscore: return visit(v->as()); @@ -129,27 +157,23 @@ protected: } public: - void start_visiting_function(V v_function) { + virtual void start_visiting_function(V v_function) { visit(v_function->get_body()); } }; -class ASTVisitorAllFunctionsInFile : public ASTVisitorFunctionBody { -protected: - using parent = ASTVisitorAllFunctionsInFile; - - virtual bool should_enter_function(V v) = 0; - -public: - void start_visiting_file(V v_file) { - for (AnyV v : v_file->get_toplevel_declarations()) { +template +void visit_ast_of_all_functions(const AllSrcFiles& all_files) { + for (const SrcFile* file : all_files) { + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { if (auto v_func = v->try_as()) { - if (should_enter_function(v_func)) { - visit(v_func->get_body()); + if (v_func->is_regular_function()) { + BodyVisitorT visitor; + visitor.start_visiting_function(v_func); } } } } -}; +} } // namespace tolk diff --git a/tolk/ast.cpp b/tolk/ast.cpp index b1af5100..4e78b013 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -79,7 +79,7 @@ int Vertex::lookup_idx(std::string_view nameT) const { int Vertex::lookup_idx(std::string_view param_name) const { for (size_t idx = 0; idx < children.size(); ++idx) { - if (children[idx] && children[idx]->as()->get_identifier()->name == param_name) { + if (children[idx] && children[idx]->as()->param_name == param_name) { return static_cast(idx); } } @@ -96,8 +96,64 @@ int Vertex::get_mutate_params_count() const { return n; } -void Vertex::mutate_set_src_file(const SrcFile* file) const { - const_cast(this)->file = file; +// --------------------------------------------------------- +// "assign" methods +// +// From the user's point of view, all AST vertices are constant, fields are public, but can't be modified. +// The only way to modify a field is to call "mutate()" and then use these "assign_*" methods. +// Therefore, there is a guarantee, that all AST mutations are done via these methods, +// easily searched by usages, and there is no another way to modify any other field. + +void ASTNodeExpressionBase::assign_inferred_type(TypeExpr* type) { + this->inferred_type = type; +} + +void ASTNodeExpressionBase::assign_rvalue_true() { + this->is_rvalue = true; +} + +void ASTNodeExpressionBase::assign_lvalue_true() { + this->is_lvalue = true; +} + +void Vertex::assign_sym(const Symbol* sym) { + this->sym = sym; +} + +void Vertex::assign_param_ref(const LocalVarData* self_param) { + this->param_ref = self_param; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_maybe = fun_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_var_ref(const GlobalVarData* var_ref) { + this->var_ref = var_ref; +} + +void Vertex::assign_const_ref(const GlobalConstData* const_ref) { + this->const_ref = const_ref; +} + +void Vertex::assign_param_ref(const LocalVarData* param_ref) { + this->param_ref = param_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_var_ref(const Symbol* var_ref) { + this->var_maybe = var_ref; +} + +void Vertex::assign_src_file(const SrcFile* file) { + this->file = file; } } // namespace tolk diff --git a/tolk/ast.h b/tolk/ast.h index fd2b27cb..ccc4ac58 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -17,10 +17,12 @@ #pragma once #include +#include "fwd-declarations.h" #include "platform-utils.h" #include "src-file.h" #include "type-expr.h" #include "lexer.h" +#include "symtable.h" /* * Here we introduce AST representation of Tolk source code. @@ -32,14 +34,18 @@ * * From the user's point of view, all AST vertices are constant. All API is based on constancy. * Even though fields of vertex structs are public, they can't be modified, since vertices are accepted by const ref. - * Generally, there are two ways of accepting a vertex: + * Generally, there are three ways of accepting a vertex: * * AnyV (= const ASTNodeBase*) * the only you can do with this vertex is to see v->type (ASTNodeType) and to cast via v->as() + * * AnyExprV (= const ASTNodeExpressionBase*) + * in contains expression-specific properties (lvalue/rvalue, inferred type) * * V (= const Vertex*) * a specific type of vertex, you can use its fields and methods * There is one way of creating a vertex: * * createV(...constructor_args) (= new Vertex(...)) * vertices are currently created on a heap, without any custom memory arena, just allocated and never deleted + * The only way to modify a field is to use "mutate()" method (drops constancy, the only point of mutation) + * and then to call "assign_*" method, like "assign_sym", "assign_src_file", etc. * * Having AnyV and knowing its node_type, a call * v->as() @@ -59,8 +65,9 @@ namespace tolk { enum ASTNodeType { - ast_empty, - ast_parenthesized_expr, + ast_empty_statement, + ast_empty_expression, + ast_parenthesized_expression, ast_tensor, ast_tensor_square, ast_identifier, @@ -111,10 +118,6 @@ enum class AnnotationKind { unknown, }; -struct ASTNodeBase; - -using AnyV = const ASTNodeBase*; - template struct Vertex; @@ -157,7 +160,7 @@ struct ASTNodeBase { return type == node_type ? static_cast>(this) : nullptr; } - #ifdef TOLK_DEBUG +#ifdef TOLK_DEBUG std::string to_debug_string() const { return to_debug_string(false); } std::string to_debug_string(bool colored) const; void debug_print() const; @@ -167,46 +170,120 @@ struct ASTNodeBase { void error(const std::string& err_msg) const; }; -struct ASTNodeLeaf : ASTNodeBase { +struct ASTNodeExpressionBase : ASTNodeBase { + TypeExpr* inferred_type = nullptr; // todo make it const + bool is_rvalue: 1 = false; + bool is_lvalue: 1 = false; + + ASTNodeExpressionBase* mutate() const { return const_cast(this); } + void assign_inferred_type(TypeExpr* type); + void assign_rvalue_true(); + void assign_lvalue_true(); + + ASTNodeExpressionBase(ASTNodeType type, SrcLocation loc) : ASTNodeBase(type, loc) {} +}; + +struct ASTNodeStatementBase : ASTNodeBase { + ASTNodeStatementBase(ASTNodeType type, SrcLocation loc) : ASTNodeBase(type, loc) {} +}; + +struct ASTExprLeaf : ASTNodeExpressionBase { friend class ASTVisitor; friend class ASTReplacer; protected: - ASTNodeLeaf(ASTNodeType type, SrcLocation loc) - : ASTNodeBase(type, loc) {} + ASTExprLeaf(ASTNodeType type, SrcLocation loc) + : ASTNodeExpressionBase(type, loc) {} }; -struct ASTNodeUnary : ASTNodeBase { +struct ASTExprUnary : ASTNodeExpressionBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyExprV child; + + ASTExprUnary(ASTNodeType type, SrcLocation loc, AnyExprV child) + : ASTNodeExpressionBase(type, loc), child(child) {} +}; + +struct ASTExprBinary : ASTNodeExpressionBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyExprV lhs; + AnyExprV rhs; + + ASTExprBinary(ASTNodeType type, SrcLocation loc, AnyExprV lhs, AnyExprV rhs) + : ASTNodeExpressionBase(type, loc), lhs(lhs), rhs(rhs) {} +}; + +struct ASTExprVararg : ASTNodeExpressionBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + std::vector children; + + ASTExprVararg(ASTNodeType type, SrcLocation loc, std::vector children) + : ASTNodeExpressionBase(type, loc), children(std::move(children)) {} + +public: + int size() const { return static_cast(children.size()); } + bool empty() const { return children.empty(); } +}; + +struct ASTStatementUnary : ASTNodeStatementBase { friend class ASTVisitor; friend class ASTReplacer; protected: AnyV child; - ASTNodeUnary(ASTNodeType type, SrcLocation loc, AnyV child) - : ASTNodeBase(type, loc), child(child) {} + AnyExprV child_as_expr() const { return reinterpret_cast(child); } + + ASTStatementUnary(ASTNodeType type, SrcLocation loc, AnyV child) + : ASTNodeStatementBase(type, loc), child(child) {} }; -struct ASTNodeBinary : ASTNodeBase { - friend class ASTVisitor; - friend class ASTReplacer; - -protected: - AnyV lhs; - AnyV rhs; - - ASTNodeBinary(ASTNodeType type, SrcLocation loc, AnyV lhs, AnyV rhs) - : ASTNodeBase(type, loc), lhs(lhs), rhs(rhs) {} -}; - -struct ASTNodeVararg : ASTNodeBase { +struct ASTStatementVararg : ASTNodeStatementBase { friend class ASTVisitor; friend class ASTReplacer; protected: std::vector children; - ASTNodeVararg(ASTNodeType type, SrcLocation loc, std::vector children) + AnyV child(int i) const { return children.at(i); } + AnyExprV child_as_expr(int i) const { return reinterpret_cast(children.at(i)); } + + ASTStatementVararg(ASTNodeType type, SrcLocation loc, std::vector children) + : ASTNodeStatementBase(type, loc), children(std::move(children)) {} + +public: + int size() const { return static_cast(children.size()); } + bool empty() const { return children.empty(); } +}; + +struct ASTOtherLeaf : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + ASTOtherLeaf(ASTNodeType type, SrcLocation loc) + : ASTNodeBase(type, loc) {} +}; + +struct ASTOtherVararg : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + std::vector children; + + AnyV child(int i) const { return children.at(i); } + + ASTOtherVararg(ASTNodeType type, SrcLocation loc, std::vector children) : ASTNodeBase(type, loc), children(std::move(children)) {} public: @@ -217,309 +294,383 @@ public: // --------------------------------------------------------- template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTStatementVararg { explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_empty, loc) {} + : ASTStatementVararg(ast_empty_statement, loc, {}) {} }; template<> -struct Vertex final : ASTNodeUnary { - AnyV get_expr() const { return child; } - - Vertex(SrcLocation loc, AnyV expr) - : ASTNodeUnary(ast_parenthesized_expr, loc, expr) {} +struct Vertex final : ASTExprLeaf { + explicit Vertex(SrcLocation loc) + : ASTExprLeaf(ast_empty_expression, loc) {} }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_items() const { return children; } - AnyV get_item(int i) const { return children.at(i); } +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } - Vertex(SrcLocation loc, std::vector items) - : ASTNodeVararg(ast_tensor, loc, std::move(items)) {} + Vertex(SrcLocation loc, AnyExprV expr) + : ASTExprUnary(ast_parenthesized_expression, loc, expr) {} }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_items() const { return children; } - AnyV get_item(int i) const { return children.at(i); } +struct Vertex final : ASTExprVararg { + const std::vector& get_items() const { return children; } + AnyExprV get_item(int i) const { return children.at(i); } - Vertex(SrcLocation loc, std::vector items) - : ASTNodeVararg(ast_tensor_square, loc, std::move(items)) {} + Vertex(SrcLocation loc, std::vector items) + : ASTExprVararg(ast_tensor, loc, std::move(items)) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprVararg { + const std::vector& get_items() const { return children; } + AnyExprV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTExprVararg(ast_tensor_square, loc, std::move(items)) {} +}; + +template<> +struct Vertex final : ASTExprLeaf { + const Symbol* sym = nullptr; // always filled (after resolved); points to local / global / function / constant std::string_view name; + Vertex* mutate() const { return const_cast(this); } + void assign_sym(const Symbol* sym); + Vertex(SrcLocation loc, std::string_view name) - : ASTNodeLeaf(ast_identifier, loc), name(name) {} + : ASTExprLeaf(ast_identifier, loc) + , name(name) {} }; template<> -struct Vertex final : ASTNodeLeaf { - std::string_view int_val; +struct Vertex final : ASTExprLeaf { + td::RefInt256 intval; // parsed value, 255 for "0xFF" + std::string_view orig_str; // original "0xFF"; empty for nodes generated by compiler (e.g. in constant folding) - Vertex(SrcLocation loc, std::string_view int_val) - : ASTNodeLeaf(ast_int_const, loc), int_val(int_val) {} + Vertex(SrcLocation loc, td::RefInt256 intval, std::string_view orig_str) + : ASTExprLeaf(ast_int_const, loc) + , intval(std::move(intval)) + , orig_str(orig_str) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { std::string_view str_val; char modifier; + bool is_bitslice() const { + char m = modifier; + return m == 0 || m == 's' || m == 'a'; + } + bool is_intval() const { + char m = modifier; + return m == 'u' || m == 'h' || m == 'H' || m == 'c'; + } + Vertex(SrcLocation loc, std::string_view str_val, char modifier) - : ASTNodeLeaf(ast_string_const, loc), str_val(str_val), modifier(modifier) {} + : ASTExprLeaf(ast_string_const, loc) + , str_val(str_val), modifier(modifier) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { bool bool_val; Vertex(SrcLocation loc, bool bool_val) - : ASTNodeLeaf(ast_bool_const, loc), bool_val(bool_val) {} + : ASTExprLeaf(ast_bool_const, loc) + , bool_val(bool_val) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_null_keyword, loc) {} + : ASTExprLeaf(ast_null_keyword, loc) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { + const LocalVarData* param_ref = nullptr; // filled after resolve identifiers, points to `self` parameter + + Vertex* mutate() const { return const_cast(this); } + void assign_param_ref(const LocalVarData* self_param); + explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_self_keyword, loc) {} + : ASTExprLeaf(ast_self_keyword, loc) {} }; template<> -struct Vertex final : ASTNodeUnary { +struct Vertex final : ASTExprUnary { bool passed_as_mutate; // when called `f(mutate arg)`, not `f(arg)` - AnyV get_expr() const { return child; } + AnyExprV get_expr() const { return child; } - explicit Vertex(SrcLocation loc, AnyV expr, bool passed_as_mutate) - : ASTNodeUnary(ast_argument, loc, expr), passed_as_mutate(passed_as_mutate) {} + Vertex(SrcLocation loc, AnyExprV expr, bool passed_as_mutate) + : ASTExprUnary(ast_argument, loc, expr) + , passed_as_mutate(passed_as_mutate) {} }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_arguments() const { return children; } +struct Vertex final : ASTExprVararg { + const std::vector& get_arguments() const { return children; } auto get_arg(int i) const { return children.at(i)->as(); } - explicit Vertex(SrcLocation loc, std::vector arguments) - : ASTNodeVararg(ast_argument_list, loc, std::move(arguments)) {} + Vertex(SrcLocation loc, std::vector arguments) + : ASTExprVararg(ast_argument_list, loc, std::move(arguments)) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_called_f() const { return lhs; } +struct Vertex final : ASTExprBinary { + const FunctionData* fun_maybe = nullptr; // filled after resolve; remains nullptr for `localVar()` / `getF()()` + + AnyExprV get_called_f() const { return lhs; } auto get_arg_list() const { return rhs->as(); } int get_num_args() const { return rhs->as()->size(); } auto get_arg(int i) const { return rhs->as()->get_arg(i); } - Vertex(SrcLocation loc, AnyV lhs_f, V arguments) - : ASTNodeBinary(ast_function_call, loc, lhs_f, arguments) {} + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + + Vertex(SrcLocation loc, AnyExprV lhs_f, V arguments) + : ASTExprBinary(ast_function_call, loc, lhs_f, arguments) {} }; template<> -struct Vertex final : ASTNodeBinary { +struct Vertex final : ASTExprBinary { + const FunctionData* fun_ref = nullptr; // points to global function (after resolve) std::string_view method_name; - AnyV get_obj() const { return lhs; } + AnyExprV get_obj() const { return lhs; } auto get_arg_list() const { return rhs->as(); } + int get_num_args() const { return rhs->as()->size(); } + auto get_arg(int i) const { return rhs->as()->get_arg(i); } - Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, V arguments) - : ASTNodeBinary(ast_dot_method_call, loc, lhs, arguments), method_name(method_name) {} + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + + Vertex(SrcLocation loc, std::string_view method_name, AnyExprV lhs, V arguments) + : ASTExprBinary(ast_dot_method_call, loc, lhs, arguments) + , method_name(method_name) {} }; template<> -struct Vertex final : ASTNodeUnary { - TypeExpr* declared_type; // may be nullptr +struct Vertex final : ASTStatementUnary { + const GlobalVarData* var_ref = nullptr; // filled after register + TypeExpr* declared_type; auto get_identifier() const { return child->as(); } + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const GlobalVarData* var_ref); + Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type) - : ASTNodeUnary(ast_global_var_declaration, loc, name_identifier), declared_type(declared_type) {} + : ASTStatementUnary(ast_global_var_declaration, loc, name_identifier) + , declared_type(declared_type) {} }; template<> -struct Vertex final : ASTNodeBinary { +struct Vertex final : ASTStatementVararg { + const GlobalConstData* const_ref = nullptr; // filled after register TypeExpr* declared_type; // may be nullptr - auto get_identifier() const { return lhs->as(); } - AnyV get_init_value() const { return rhs; } + auto get_identifier() const { return child(0)->as(); } + AnyExprV get_init_value() const { return child_as_expr(1); } - Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type, AnyV init_value) - : ASTNodeBinary(ast_constant_declaration, loc, name_identifier, init_value), declared_type(declared_type) {} + Vertex* mutate() const { return const_cast(this); } + void assign_const_ref(const GlobalConstData* const_ref); + + Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type, AnyExprV init_value) + : ASTStatementVararg(ast_constant_declaration, loc, {name_identifier, init_value}) + , declared_type(declared_type) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_underscore, loc) {} + : ASTExprLeaf(ast_underscore, loc) {} }; template<> -struct Vertex final : ASTNodeUnary { +struct Vertex final : ASTExprUnary { std::string_view operator_name; TokenType tok; - AnyV get_rhs() const { return child; } + AnyExprV get_rhs() const { return child; } - Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV rhs) - : ASTNodeUnary(ast_unary_operator, loc, rhs), operator_name(operator_name), tok(tok) {} + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV rhs) + : ASTExprUnary(ast_unary_operator, loc, rhs) + , operator_name(operator_name), tok(tok) {} }; template<> -struct Vertex final : ASTNodeBinary { +struct Vertex final : ASTExprBinary { std::string_view operator_name; TokenType tok; - AnyV get_lhs() const { return lhs; } - AnyV get_rhs() const { return rhs; } + AnyExprV get_lhs() const { return lhs; } + AnyExprV get_rhs() const { return rhs; } - Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV lhs, AnyV rhs) - : ASTNodeBinary(ast_binary_operator, loc, lhs, rhs), operator_name(operator_name), tok(tok) {} + bool is_set_assign() const { + TokenType t = tok; + return t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || + t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || + t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor; + } + + bool is_assign() const { + return tok == tok_assign; + } + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) + : ASTExprBinary(ast_binary_operator, loc, lhs, rhs) + , operator_name(operator_name), tok(tok) {} }; template<> -struct Vertex final : ASTNodeVararg { - AnyV get_cond() const { return children.at(0); } - AnyV get_when_true() const { return children.at(1); } - AnyV get_when_false() const { return children.at(2); } +struct Vertex final : ASTExprVararg { + AnyExprV get_cond() const { return children.at(0); } + AnyExprV get_when_true() const { return children.at(1); } + AnyExprV get_when_false() const { return children.at(2); } - Vertex(SrcLocation loc, AnyV cond, AnyV when_true, AnyV when_false) - : ASTNodeVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} + Vertex(SrcLocation loc, AnyExprV cond, AnyExprV when_true, AnyExprV when_false) + : ASTExprVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} }; template<> -struct Vertex : ASTNodeUnary { - AnyV get_return_value() const { return child; } +struct Vertex : ASTStatementUnary { + AnyExprV get_return_value() const { return child_as_expr(); } - Vertex(SrcLocation loc, AnyV child) - : ASTNodeUnary(ast_return_statement, loc, child) {} + Vertex(SrcLocation loc, AnyExprV child) + : ASTStatementUnary(ast_return_statement, loc, child) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTStatementVararg { SrcLocation loc_end; const std::vector& get_items() const { return children; } AnyV get_item(int i) const { return children.at(i); } Vertex(SrcLocation loc, SrcLocation loc_end, std::vector items) - : ASTNodeVararg(ast_sequence, loc, std::move(items)), loc_end(loc_end) {} + : ASTStatementVararg(ast_sequence, loc, std::move(items)) + , loc_end(loc_end) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_cond() const { return lhs; } - auto get_body() const { return rhs->as(); } +struct Vertex final : ASTStatementVararg { + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_body() const { return child(1)->as(); } - Vertex(SrcLocation loc, AnyV cond, V body) - : ASTNodeBinary(ast_repeat_statement, loc, cond, body) {} + Vertex(SrcLocation loc, AnyExprV cond, V body) + : ASTStatementVararg(ast_repeat_statement, loc, {cond, body}) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_cond() const { return lhs; } - auto get_body() const { return rhs->as(); } +struct Vertex final : ASTStatementVararg { + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_body() const { return child(1)->as(); } - Vertex(SrcLocation loc, AnyV cond, V body) - : ASTNodeBinary(ast_while_statement, loc, cond, body) {} + Vertex(SrcLocation loc, AnyExprV cond, V body) + : ASTStatementVararg(ast_while_statement, loc, {cond, body}) {} }; template<> -struct Vertex final : ASTNodeBinary { - auto get_body() const { return lhs->as(); } - AnyV get_cond() const { return rhs; } +struct Vertex final : ASTStatementVararg { + auto get_body() const { return child(0)->as(); } + AnyExprV get_cond() const { return child_as_expr(1); } - Vertex(SrcLocation loc, V body, AnyV cond) - : ASTNodeBinary(ast_do_while_statement, loc, body, cond) {} + Vertex(SrcLocation loc, V body, AnyExprV cond) + : ASTStatementVararg(ast_do_while_statement, loc, {body, cond}) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_thrown_code() const { return lhs; } - AnyV get_thrown_arg() const { return rhs; } // may be ast_empty - bool has_thrown_arg() const { return rhs->type != ast_empty; } +struct Vertex final : ASTStatementVararg { + AnyExprV get_thrown_code() const { return child_as_expr(0); } + AnyExprV get_thrown_arg() const { return child_as_expr(1); } // may be ast_empty + bool has_thrown_arg() const { return child_as_expr(1)->type != ast_empty_expression; } - Vertex(SrcLocation loc, AnyV thrown_code, AnyV thrown_arg) - : ASTNodeBinary(ast_throw_statement, loc, thrown_code, thrown_arg) {} + Vertex(SrcLocation loc, AnyExprV thrown_code, AnyExprV thrown_arg) + : ASTStatementVararg(ast_throw_statement, loc, {thrown_code, thrown_arg}) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_cond() const { return lhs; } - AnyV get_thrown_code() const { return rhs; } +struct Vertex final : ASTStatementVararg { + AnyExprV get_cond() const { return child_as_expr(0); } + AnyExprV get_thrown_code() const { return child_as_expr(1); } - Vertex(SrcLocation loc, AnyV cond, AnyV thrown_code) - : ASTNodeBinary(ast_assert_statement, loc, cond, thrown_code) {} + Vertex(SrcLocation loc, AnyExprV cond, AnyExprV thrown_code) + : ASTStatementVararg(ast_assert_statement, loc, {cond, thrown_code}) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTStatementVararg { auto get_try_body() const { return children.at(0)->as(); } auto get_catch_expr() const { return children.at(1)->as(); } // (excNo, arg), always len 2 auto get_catch_body() const { return children.at(2)->as(); } Vertex(SrcLocation loc, V try_body, V catch_expr, V catch_body) - : ASTNodeVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} + : ASTStatementVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTStatementVararg { bool is_ifnot; // if(!cond), to generate more optimal fift code - AnyV get_cond() const { return children.at(0); } - auto get_if_body() const { return children.at(1)->as(); } - auto get_else_body() const { return children.at(2)->as(); } // always exists (when else omitted, it's empty) + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_if_body() const { return child(1)->as(); } + auto get_else_body() const { return child(2)->as(); } // always exists (when else omitted, it's empty) - Vertex(SrcLocation loc, bool is_ifnot, AnyV cond, V if_body, V else_body) - : ASTNodeVararg(ast_if_statement, loc, {cond, if_body, else_body}), is_ifnot(is_ifnot) {} + Vertex(SrcLocation loc, bool is_ifnot, AnyExprV cond, V if_body, V else_body) + : ASTStatementVararg(ast_if_statement, loc, {cond, if_body, else_body}) + , is_ifnot(is_ifnot) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTOtherLeaf { TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates std::string_view nameT; Vertex(SrcLocation loc, TypeExpr* created_type, std::string_view nameT) - : ASTNodeLeaf(ast_genericsT_item, loc), created_type(created_type), nameT(nameT) {} + : ASTOtherLeaf(ast_genericsT_item, loc) + , created_type(created_type), nameT(nameT) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTOtherVararg { std::vector get_items() const { return children; } auto get_item(int i) const { return children.at(i)->as(); } Vertex(SrcLocation loc, std::vector genericsT_items) - : ASTNodeVararg(ast_genericsT_list, loc, std::move(genericsT_items)) {} + : ASTOtherVararg(ast_genericsT_list, loc, std::move(genericsT_items)) {} int lookup_idx(std::string_view nameT) const; }; template<> -struct Vertex final : ASTNodeUnary { - TypeExpr* param_type; +struct Vertex final : ASTOtherLeaf { + const LocalVarData* param_ref = nullptr; // filled after resolved + std::string_view param_name; + TypeExpr* declared_type; bool declared_as_mutate; // declared as `mutate param_name` - auto get_identifier() const { return child->as(); } // for underscore, name is empty - bool is_underscore() const { return child->as()->name.empty(); } + bool is_underscore() const { return param_name.empty(); } - Vertex(SrcLocation loc, V name_identifier, TypeExpr* param_type, bool declared_as_mutate) - : ASTNodeUnary(ast_parameter, loc, name_identifier), param_type(param_type), declared_as_mutate(declared_as_mutate) {} + Vertex* mutate() const { return const_cast(this); } + void assign_param_ref(const LocalVarData* param_ref); + + Vertex(SrcLocation loc, std::string_view param_name, TypeExpr* declared_type, bool declared_as_mutate) + : ASTOtherLeaf(ast_parameter, loc) + , param_name(param_name), declared_type(declared_type), declared_as_mutate(declared_as_mutate) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTOtherVararg { const std::vector& get_params() const { return children; } auto get_param(int i) const { return children.at(i)->as(); } Vertex(SrcLocation loc, std::vector params) - : ASTNodeVararg(ast_parameter_list, loc, std::move(params)) {} + : ASTOtherVararg(ast_parameter_list, loc, std::move(params)) {} int lookup_idx(std::string_view param_name) const; int get_mutate_params_count() const; @@ -527,57 +678,64 @@ struct Vertex final : ASTNodeVararg { }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTStatementVararg { std::vector arg_order; std::vector ret_order; const std::vector& get_asm_commands() const { return children; } // ast_string_const[] Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) - : ASTNodeVararg(ast_asm_body, loc, std::move(asm_commands)), arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} + : ASTStatementVararg(ast_asm_body, loc, std::move(asm_commands)) + , arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} }; template<> -struct Vertex final : ASTNodeUnary { +struct Vertex final : ASTOtherVararg { AnnotationKind kind; - auto get_arg() const { return child->as(); } + auto get_arg() const { return child(0)->as(); } static AnnotationKind parse_kind(std::string_view name); Vertex(SrcLocation loc, AnnotationKind kind, V arg_probably_empty) - : ASTNodeUnary(ast_annotation, loc, arg_probably_empty), kind(kind) {} + : ASTOtherVararg(ast_annotation, loc, {arg_probably_empty}) + , kind(kind) {} }; template<> -struct Vertex final : ASTNodeUnary { +struct Vertex final : ASTExprUnary { + const Symbol* var_maybe = nullptr; // typically local var; can be global var if `var g_v redef`; remains nullptr for underscore TypeExpr* declared_type; bool is_immutable; // declared via 'val', not 'var' bool marked_as_redef; // var (existing_var redef, new_var: int) = ... - AnyV get_identifier() const { return child; } // ast_identifier / ast_underscore + AnyExprV get_identifier() const { return child; } // ast_identifier / ast_underscore - Vertex(SrcLocation loc, AnyV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef) - : ASTNodeUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const Symbol* var_ref); + + Vertex(SrcLocation loc, AnyExprV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef) + : ASTExprUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_lhs() const { return lhs; } // ast_local_var / ast_tensor / ast_tensor_square - AnyV get_assigned_val() const { return rhs; } +struct Vertex final : ASTStatementVararg { + AnyExprV get_lhs() const { return child_as_expr(0); } // ast_local_var / ast_tensor / ast_tensor_square + AnyExprV get_assigned_val() const { return child_as_expr(1); } - Vertex(SrcLocation loc, AnyV lhs, AnyV assigned_val) - : ASTNodeBinary(ast_local_vars_declaration, loc, lhs, assigned_val) {} + Vertex(SrcLocation loc, AnyExprV lhs, AnyExprV assigned_val) + : ASTStatementVararg(ast_local_vars_declaration, loc, {lhs, assigned_val}) {} }; template<> -struct Vertex final : ASTNodeVararg { - auto get_identifier() const { return children.at(0)->as(); } - int get_num_params() const { return children.at(1)->as()->size(); } - auto get_param_list() const { return children.at(1)->as(); } - auto get_param(int i) const { return children.at(1)->as()->get_param(i); } - AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body +struct Vertex final : ASTOtherVararg { + auto get_identifier() const { return child(0)->as(); } + int get_num_params() const { return child(1)->as()->size(); } + auto get_param_list() const { return child(1)->as(); } + auto get_param(int i) const { return child(1)->as()->get_param(i); } + AnyV get_body() const { return child(2); } // ast_sequence / ast_asm_body + const FunctionData* fun_ref = nullptr; // filled after register TypeExpr* ret_type = nullptr; V genericsT_list = nullptr; bool is_entrypoint = false; @@ -591,42 +749,49 @@ struct Vertex final : ASTNodeVararg { V method_id = nullptr; bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } + bool is_regular_function() const { return children.at(2)->type == ast_sequence; } + bool is_builtin_function() const { return marked_as_builtin; } + + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body) - : ASTNodeVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {} + : ASTOtherVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {} }; template<> -struct Vertex final : ASTNodeLeaf { - TokenType cmp_tok; +struct Vertex final : ASTOtherLeaf { std::string_view semver; - Vertex(SrcLocation loc, TokenType cmp_tok, std::string_view semver) - : ASTNodeLeaf(ast_tolk_required_version, loc), cmp_tok(cmp_tok), semver(semver) {} + Vertex(SrcLocation loc, std::string_view semver) + : ASTOtherLeaf(ast_tolk_required_version, loc) + , semver(semver) {} }; template<> -struct Vertex final : ASTNodeUnary { - const SrcFile* file = nullptr; // assigned after includes have been resolved +struct Vertex final : ASTOtherVararg { + const SrcFile* file = nullptr; // assigned after imports have been resolved - auto get_file_leaf() const { return child->as(); } + auto get_file_leaf() const { return child(0)->as(); } - std::string get_file_name() const { return static_cast(child->as()->str_val); } + std::string get_file_name() const { return static_cast(child(0)->as()->str_val); } - void mutate_set_src_file(const SrcFile* file) const; + Vertex* mutate() const { return const_cast(this); } + void assign_src_file(const SrcFile* file); Vertex(SrcLocation loc, V file_name) - : ASTNodeUnary(ast_import_statement, loc, file_name) {} + : ASTOtherVararg(ast_import_statement, loc, {file_name}) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTOtherVararg { const SrcFile* const file; const std::vector& get_toplevel_declarations() const { return children; } Vertex(const SrcFile* file, std::vector toplevel_declarations) - : ASTNodeVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)), file(file) {} + : ASTOtherVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)) + , file(file) {} }; } // namespace tolk diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index d18cfa64..01b096da 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -20,82 +20,64 @@ namespace tolk { using namespace std::literals::string_literals; -/* - * - * SYMBOL VALUES - * - */ - -SymDef* define_builtin_func_impl(const std::string& name, SymValAsmFunc* func_val) { - sym_idx_t name_idx = G.symbols.lookup_add(name); - SymDef* def = define_global_symbol(name_idx); - tolk_assert(!def->value); - - def->value = func_val; -#ifdef TOLK_DEBUG - def->value->sym_name = name; -#endif - return def; -} - -// given func_type = `(slice, int) -> slice` and func flags, create SymDef for parameters +// given func_type = `(slice, int) -> slice` and func flags, create SymLocalVarOrParameter // currently (see at the bottom) parameters of built-in functions are unnamed: // built-in functions are created using a resulting type -static std::vector define_builtin_parameters(const TypeExpr* func_type, int func_flags) { +static std::vector define_builtin_parameters(const TypeExpr* func_type, int func_flags) { // `loadInt()`, `storeInt()`: they accept `self` and mutate it; no other options available in built-ins for now - bool is_mutate_self = func_flags & SymValFunc::flagHasMutateParams; + bool is_mutate_self = func_flags & FunctionData::flagHasMutateParams; // func_type a map (params_type -> ret_type), probably surrounded by forall (internal representation of ) TypeExpr* params_type = func_type->constr == TypeExpr::te_ForAll ? func_type->args[0]->args[0] : func_type->args[0]; - std::vector parameters; + std::vector parameters; if (params_type->constr == TypeExpr::te_Tensor) { // multiple parameters: it's a tensor parameters.reserve(params_type->args.size()); for (int i = 0; i < static_cast(params_type->args.size()); ++i) { - SymDef* sym_def = define_parameter(i, {}); - SymValVariable* sym_val = new SymValVariable(i, params_type->args[i]); + LocalVarData p_sym("", {}, i, params_type->args[i]); if (i == 0 && is_mutate_self) { - sym_val->flags |= SymValVariable::flagMutateParameter; + p_sym.flags |= LocalVarData::flagMutateParameter; } - sym_def->value = sym_val; - parameters.emplace_back(sym_def); + parameters.push_back(std::move(p_sym)); } } else { // single parameter - SymDef* sym_def = define_parameter(0, {}); - SymValVariable* sym_val = new SymValVariable(0, params_type); + LocalVarData p_sym("", {}, 0, params_type); if (is_mutate_self) { - sym_val->flags |= SymValVariable::flagMutateParameter; + p_sym.flags |= LocalVarData::flagMutateParameter; } - sym_def->value = sym_val; - parameters.emplace_back(sym_def); + parameters.push_back(std::move(p_sym)); } return parameters; } -static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) { - return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction)); +static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) { + auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func)); + G.symtable.add_function(f_sym); } -static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) { - return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, make_simple_compile(macro), flags | SymValFunc::flagBuiltinFunction)); +static void define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) { + auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(make_simple_compile(macro))); + G.symtable.add_function(f_sym); } -static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags, - std::initializer_list arg_order, std::initializer_list ret_order) { - return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction, arg_order, ret_order)); +static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags, + std::initializer_list arg_order, std::initializer_list ret_order) { + auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func)); + f_sym->arg_order = arg_order; + f_sym->ret_order = ret_order; + G.symtable.add_function(f_sym); } -bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, - SrcLocation where) const { - if (simple_compile) { - return dest.append(simple_compile(out, in, where)); - } else if (ext_compile) { - return ext_compile(dest, out, in); - } else { - return false; - } +void FunctionBodyBuiltin::compile(AsmOpList& dest, std::vector& out, std::vector& in, + SrcLocation where) const { + dest.append(simple_compile(out, in, where)); } +void FunctionBodyAsm::compile(AsmOpList& dest) const { + dest.append(ops); +} + + /* * * DEFINE BUILT-IN FUNCTIONS @@ -1119,91 +1101,71 @@ void define_builtins() { TypeExpr* throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); define_builtin_func("_+_", arith_bin_op, compile_add, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_-_", arith_bin_op, compile_sub, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("-_", arith_un_op, compile_unary_minus, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("+_", arith_un_op, compile_unary_plus, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_*_", arith_bin_op, compile_mul, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_<<_", arith_bin_op, compile_lshift, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("!_", arith_un_op, compile_logical_not, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("~_", arith_un_op, compile_bitwise_not, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_&_", arith_bin_op, compile_bitwise_and, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_|_", arith_bin_op, compile_bitwise_or, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_+=_", arith_bin_op, compile_add, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_-=_", arith_bin_op, compile_sub, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_*=_", arith_bin_op, compile_mul, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_<<=_", arith_bin_op, compile_lshift, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL"), - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null, - SymValFunc::flagMarkedAsPure); + FunctionData::flagMarkedAsPure); define_builtin_func("__throw", impure_un_op, compile_throw, 0); define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, @@ -1211,23 +1173,28 @@ void define_builtins() { define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), compile_throw_if_unless, 0); define_builtin_func("loadInt", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, + {}, {1, 0}); define_builtin_func("loadUint", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, + {}, {1, 0}); define_builtin_func("loadBits", fetch_slice_op_mutate, std::bind(compile_fetch_slice, _1, _2, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, + {}, {1, 0}); define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); define_builtin_func("storeInt", store_int_mutate, std::bind(compile_store_int, _1, _2, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, + {1, 0, 2}, {}); define_builtin_func("storeUint", store_int_mutate, std::bind(compile_store_int, _1, _2, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, + {1, 0, 2}, {}); define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at, - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); define_builtin_func("debugPrint", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), AsmOp::Custom("s0 DUMP DROP", 1, 1), 0); diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 9a90a3ed..0529696f 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -314,7 +314,7 @@ bool Op::generate_code_step(Stack& stack) { return true; } case _GlobVar: - if (dynamic_cast(fun_ref->value)) { + if (g_sym) { bool used = false; for (auto i : left) { auto p = next->var_info[i]; @@ -325,8 +325,7 @@ bool Op::generate_code_step(Stack& stack) { if (!used || disabled()) { return true; } - std::string name = G.symbols.get_name(fun_ref->sym_idx); - stack.o << AsmOp::Custom(name + " GETGLOB", 0, 1); + stack.o << AsmOp::Custom(g_sym->name + " GETGLOB", 0, 1); if (left.size() != 1) { tolk_assert(left.size() <= 15); stack.o << AsmOp::UnTuple((int)left.size()); @@ -343,14 +342,14 @@ bool Op::generate_code_step(Stack& stack) { } stack.o << "CONT:<{"; stack.o.indent(); - auto func = dynamic_cast(fun_ref->value); - if (func) { + if (f_sym->is_asm_function() || f_sym->is_builtin_function()) { // TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly) std::vector args0, res; - TypeExpr::remove_indirect(func->sym_type); - tolk_assert(func->get_type()->is_map()); - auto wr = func->get_type()->args.at(0)->get_width(); - auto wl = func->get_type()->args.at(1)->get_width(); + TypeExpr* func_type = f_sym->full_type; + TypeExpr::remove_indirect(func_type); + tolk_assert(func_type->is_map()); + auto wr = func_type->args.at(0)->get_width(); + auto wl = func_type->args.at(1)->get_width(); tolk_assert(wl >= 0 && wr >= 0); for (int i = 0; i < wl; i++) { res.emplace_back(0); @@ -358,10 +357,13 @@ bool Op::generate_code_step(Stack& stack) { for (int i = 0; i < wr; i++) { args0.emplace_back(0); } - func->compile(stack.o, res, args0, where); // compile res := f (args0) + if (f_sym->is_asm_function()) { + std::get(f_sym->body)->compile(stack.o); // compile res := f (args0) + } else { + std::get(f_sym->body)->compile(stack.o, res, args0, where); // compile res := f (args0) + } } else { - std::string name = G.symbols.get_name(fun_ref->sym_idx); - stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + stack.o << AsmOp::Custom(f_sym->name + " CALLDICT", (int)right.size(), (int)left.size()); } stack.o.undent(); stack.o << "}>"; @@ -438,10 +440,9 @@ bool Op::generate_code_step(Stack& stack) { if (disabled()) { return true; } - // fun_ref can be nullptr for Op::_CallInd (invoke a variable, not a function) - SymValFunc* func = (fun_ref ? dynamic_cast(fun_ref->value) : nullptr); - auto arg_order = (func ? func->get_arg_order() : nullptr); - auto ret_order = (func ? func->get_ret_order() : nullptr); + // f_sym can be nullptr for Op::_CallInd (invoke a variable, not a function) + const std::vector* arg_order = f_sym ? f_sym->get_arg_order() : nullptr; + const std::vector* ret_order = f_sym ? f_sym->get_ret_order() : nullptr; tolk_assert(!arg_order || arg_order->size() == right.size()); tolk_assert(!ret_order || ret_order->size() == left.size()); std::vector right1; @@ -488,23 +489,25 @@ bool Op::generate_code_step(Stack& stack) { }; if (cl == _CallInd) { exec_callxargs((int)right.size() - 1, (int)left.size()); - } else if (auto asm_fv = dynamic_cast(fun_ref->value)) { + } else if (!f_sym->is_regular_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { res.emplace_back(i); } - asm_fv->compile(stack.o, res, args, where); // compile res := f (args) + if (f_sym->is_asm_function()) { + std::get(f_sym->body)->compile(stack.o); // compile res := f (args) + } else { + std::get(f_sym->body)->compile(stack.o, res, args, where); // compile res := f (args) + } } else { - auto fv = dynamic_cast(fun_ref->value); - std::string name = G.symbols.get_name(fun_ref->sym_idx); - if (fv->is_inline() || fv->is_inline_ref()) { - stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); - } else if (fv->code && fv->code->require_callxargs) { - stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2); + if (f_sym->is_inline() || f_sym->is_inline_ref()) { + stack.o << AsmOp::Custom(f_sym->name + " INLINECALLDICT", (int)right.size(), (int)left.size()); + } else if (f_sym->is_regular_function() && std::get(f_sym->body)->code->require_callxargs) { + stack.o << AsmOp::Custom(f_sym->name + (" PREPAREDICT"), 0, 2); exec_callxargs((int)right.size() + 1, (int)left.size()); } else { - stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + stack.o << AsmOp::Custom(f_sym->name + " CALLDICT", (int)right.size(), (int)left.size()); } } stack.s.resize(k); @@ -515,7 +518,7 @@ bool Op::generate_code_step(Stack& stack) { return true; } case _SetGlob: { - tolk_assert(fun_ref && dynamic_cast(fun_ref->value)); + tolk_assert(g_sym); std::vector last; for (var_idx_t x : right) { last.push_back(var_info[x] && var_info[x]->is_last()); @@ -534,8 +537,7 @@ bool Op::generate_code_step(Stack& stack) { stack.o << AsmOp::Tuple((int)right.size()); } if (!right.empty()) { - std::string name = G.symbols.get_name(fun_ref->sym_idx); - stack.o << AsmOp::Custom(name + " SETGLOB", 1, 0); + stack.o << AsmOp::Custom(g_sym->name + " SETGLOB", 1, 0); } stack.s.resize(k); return true; @@ -826,6 +828,8 @@ bool Op::generate_code_step(Stack& stack) { catch_stack.push_new_var(left[1]); stack.rearrange_top(catch_vars, catch_last); stack.opt_show(); + stack.o << "c1 PUSH"; + stack.o << "c3 PUSH"; stack.o << "c4 PUSH"; stack.o << "c5 PUSH"; stack.o << "c7 PUSH"; @@ -842,6 +846,8 @@ bool Op::generate_code_step(Stack& stack) { stack.o << "c7 SETCONT"; stack.o << "c5 SETCONT"; stack.o << "c4 SETCONT"; + stack.o << "c3 SETCONT"; + stack.o << "c1 SETCONT"; for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) { begin = end >= block_size ? end - block_size : 0; stack.o << std::to_string(end - begin) + " PUSHINT"; diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp index fb70022f..14d064d9 100644 --- a/tolk/compiler-state.cpp +++ b/tolk/compiler-state.cpp @@ -27,6 +27,19 @@ void ExperimentalOption::mark_deprecated(const char* deprecated_from_v, const ch this->deprecated_reason = deprecated_reason; } +std::string_view PersistentHeapAllocator::copy_string_to_persistent_memory(std::string_view str_in_tmp_memory) { + size_t len = str_in_tmp_memory.size(); + char* allocated = new char[len]; + memcpy(allocated, str_in_tmp_memory.data(), str_in_tmp_memory.size()); + auto new_chunk = std::make_unique(allocated, std::move(head)); + head = std::move(new_chunk); + return {head->allocated, len}; +} + +void PersistentHeapAllocator::clear() { + head = nullptr; +} + void CompilerSettings::enable_experimental_option(std::string_view name) { ExperimentalOption* to_enable = nullptr; diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index aec1945e..56158876 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -19,6 +19,7 @@ #include "src-file.h" #include "symtable.h" #include "td/utils/Status.h" +#include #include #include @@ -64,6 +65,26 @@ struct CompilerSettings { void parse_experimental_options_cmd_arg(const std::string& cmd_arg); }; +// AST nodes contain std::string_view referencing to contents of .tolk files (kept in memory after reading). +// It's more than enough, except a situation when we create new AST nodes inside the compiler +// and want some "persistent place" for std::string_view to point to. +// This class copies strings to heap, so that they remain valid after closing scope. +class PersistentHeapAllocator { + struct ChunkInHeap { + const char* allocated; + std::unique_ptr next; + + ChunkInHeap(const char* allocated, std::unique_ptr&& next) + : allocated(allocated), next(std::move(next)) {} + }; + + std::unique_ptr head = nullptr; + +public: + std::string_view copy_string_to_persistent_memory(std::string_view str_in_tmp_memory); + void clear(); +}; + // CompilerState contains a mutable state that is changed while the compilation is going on. // It's a "global state" of all compilation. // Historically, in FunC, this global state was spread along many global C++ variables. @@ -71,14 +92,13 @@ struct CompilerSettings { struct CompilerState { CompilerSettings settings; - SymTable symbols; - int scope_level = 0; - SymDef* sym_def[SymTable::SIZE_PRIME + 1]{}; - SymDef* global_sym_def[SymTable::SIZE_PRIME + 1]{}; - std::vector> symbol_stack; - std::vector scope_opened_at; + GlobalSymbolTable symtable; + PersistentHeapAllocator persistent_mem; - std::vector all_code_functions, all_global_vars, all_get_methods, all_constants; + std::vector all_code_functions; + std::vector all_get_methods; + std::vector all_global_vars; + std::vector all_constants; AllRegisteredSrcFiles all_src_files; bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } diff --git a/tolk/constant-evaluator.cpp b/tolk/constant-evaluator.cpp new file mode 100644 index 00000000..73c80b9a --- /dev/null +++ b/tolk/constant-evaluator.cpp @@ -0,0 +1,313 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "constant-evaluator.h" +#include "ast.h" +#include "tolk.h" +#include "openssl/digest.hpp" +#include "crypto/common/util.h" +#include "td/utils/crypto.h" +#include "ton/ton-types.h" + +namespace tolk { + +// parse address like "EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5" +// based on unpack_std_smc_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_friendly_address(const char packed[48], ton::WorkchainId& workchain, ton::StdSmcAddress& addr) { + unsigned char buffer[36]; + if (!td::buff_base64_decode(td::MutableSlice{buffer, 36}, td::Slice{packed, 48}, true)) { + return false; + } + td::uint16 crc = td::crc16(td::Slice{buffer, 34}); + if (buffer[34] != (crc >> 8) || buffer[35] != (crc & 0xff) || (buffer[0] & 0x3f) != 0x11) { + return false; + } + workchain = static_cast(buffer[1]); + std::memcpy(addr.data(), buffer + 2, 32); + return true; +} + +// parse address like "0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8" +// based on StdAddress::parse_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_raw_address(const std::string& acc_string, int& workchain, ton::StdSmcAddress& addr) { + size_t pos = acc_string.find(':'); + if (pos != std::string::npos) { + td::Result r_wc = td::to_integer_safe(acc_string.substr(0, pos)); + if (r_wc.is_error()) { + return false; + } + workchain = r_wc.move_as_ok(); + pos++; + } else { + pos = 0; + } + if (acc_string.size() != pos + 64) { + return false; + } + + for (int i = 0; i < 64; ++i) { // loop through each hex digit + char c = acc_string[pos + i]; + int x; + if (c >= '0' && c <= '9') { + x = c - '0'; + } else if (c >= 'a' && c <= 'z') { + x = c - 'a' + 10; + } else if (c >= 'A' && c <= 'Z') { + x = c - 'A' + 10; + } else { + return false; + } + + if ((i & 1) == 0) { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0x0F) | (x << 4)); + } else { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0xF0) | x); + } + } + return true; +} + + +static std::string parse_vertex_string_const_as_slice(V v) { + std::string str = static_cast(v->str_val); + switch (v->modifier) { + case 0: { + return td::hex_encode(str); + } + case 's': { + unsigned char buff[128]; + long bits = td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); + if (bits < 0) { + v->error("invalid hex bitstring constant '" + str + "'"); + } + return str; + } + case 'a': { // MsgAddress + ton::WorkchainId workchain; + ton::StdSmcAddress addr; + bool correct = (str.size() == 48 && parse_friendly_address(str.data(), workchain, addr)) || + (str.size() != 48 && parse_raw_address(str, workchain, addr)); + if (!correct) { + v->error("invalid standard address '" + str + "'"); + } + if (workchain < -128 || workchain >= 128) { + v->error("anycast addresses not supported"); + } + + unsigned char data[3 + 8 + 256]; // addr_std$10 anycast:(Maybe Anycast) workchain_id:int8 address:bits256 = MsgAddressInt; + td::bitstring::bits_store_long_top(data, 0, static_cast(4) << (64 - 3), 3); + td::bitstring::bits_store_long_top(data, 3, static_cast(workchain) << (64 - 8), 8); + td::bitstring::bits_memcpy(data, 3 + 8, addr.bits().ptr, 0, ton::StdSmcAddress::size()); + return td::BitSlice{data, sizeof(data)}.to_hex(); + } + default: + tolk_assert(false); + } +} + +static td::RefInt256 parse_vertex_string_const_as_int(V v) { + std::string str = static_cast(v->str_val); + switch (v->modifier) { + case 'u': { + td::RefInt256 intval = td::hex_string_to_int256(td::hex_encode(str)); + if (str.empty()) { + v->error("empty integer ascii-constant"); + } + if (intval.is_null()) { + v->error("too long integer ascii-constant"); + } + return intval; + } + case 'h': + case 'H': { + unsigned char hash[32]; + digest::hash_str(hash, str.data(), str.size()); + return td::bits_to_refint(hash, (v->modifier == 'h') ? 32 : 256, false); + } + case 'c': { + return td::make_refint(td::crc32(td::Slice{str})); + } + default: + tolk_assert(false); + } +} + + +struct ConstantEvaluator { + static bool is_overflow(const td::RefInt256& intval) { + return intval.is_null() || !intval->signed_fits_bits(257); + } + + static ConstantValue handle_unary_operator(V v, const ConstantValue& rhs) { + if (!rhs.is_int()) { + v->error("invalid operator, expecting integer"); + } + td::RefInt256 intval = std::get(rhs.value); + + switch (v->tok) { + case tok_minus: + intval = -intval; + break; + case tok_plus: + break; + case tok_bitwise_not: + intval = ~intval; + break; + case tok_logical_not: + intval = td::make_refint(intval == 0 ? -1 : 0); + break; + default: + v->error("not a constant expression"); + } + + if (is_overflow(intval)) { + v->error("integer overflow"); + } + return ConstantValue::from_int(std::move(intval)); + } + + static ConstantValue handle_binary_operator(V v, const ConstantValue& lhs, const ConstantValue& rhs) { + if (!lhs.is_int() || !rhs.is_int()) { + v->error("invalid operator, expecting integer"); + } + td::RefInt256 lhs_intval = std::get(lhs.value); + td::RefInt256 rhs_intval = std::get(rhs.value); + td::RefInt256 intval; + + switch (v->tok) { + case tok_minus: + intval = lhs_intval - rhs_intval; + break; + case tok_plus: + intval = lhs_intval + rhs_intval; + break; + case tok_mul: + intval = lhs_intval * rhs_intval; + break; + case tok_div: + intval = lhs_intval / rhs_intval; + break; + case tok_mod: + intval = lhs_intval % rhs_intval; + break; + case tok_lshift: + intval = lhs_intval << static_cast(rhs_intval->to_long()); + break; + case tok_rshift: + intval = lhs_intval >> static_cast(rhs_intval->to_long()); + break; + case tok_bitwise_and: + intval = lhs_intval & rhs_intval; + break; + case tok_bitwise_or: + intval = lhs_intval | rhs_intval; + break; + case tok_bitwise_xor: + intval = lhs_intval ^ rhs_intval; + break; + case tok_eq: + intval = td::make_refint(lhs_intval == rhs_intval ? -1 : 0); + break; + case tok_lt: + intval = td::make_refint(lhs_intval < rhs_intval ? -1 : 0); + break; + case tok_gt: + intval = td::make_refint(lhs_intval > rhs_intval ? -1 : 0); + break; + case tok_leq: + intval = td::make_refint(lhs_intval <= rhs_intval ? -1 : 0); + break; + case tok_geq: + intval = td::make_refint(lhs_intval >= rhs_intval ? -1 : 0); + break; + case tok_neq: + intval = td::make_refint(lhs_intval != rhs_intval ? -1 : 0); + break; + default: + v->error("unsupported binary operator in constant expression"); + } + + if (is_overflow(intval)) { + v->error("integer overflow"); + } + return ConstantValue::from_int(std::move(intval)); + } + + static ConstantValue handle_identifier(V v) { + // todo better handle "appears, directly or indirectly, in its own initializer" + const Symbol* sym = lookup_global_symbol(v->name); + if (!sym) { + v->error("undefined symbol `" + static_cast(v->name) + "`"); + } + const GlobalConstData* const_ref = sym->try_as(); + if (!const_ref) { + v->error("symbol `" + static_cast(v->name) + "` is not a constant"); + } + return {const_ref->value}; + } + + static ConstantValue visit(AnyExprV v) { + if (auto v_int = v->try_as()) { + return ConstantValue::from_int(v_int->intval); + } + if (auto v_bool = v->try_as()) { + return ConstantValue::from_int(v_bool->bool_val ? -1 : 0); + } + if (auto v_unop = v->try_as()) { + return handle_unary_operator(v_unop, visit(v_unop->get_rhs())); + } + if (auto v_binop = v->try_as()) { + return handle_binary_operator(v_binop, visit(v_binop->get_lhs()), visit(v_binop->get_rhs())); + } + if (auto v_ident = v->try_as()) { + return handle_identifier(v_ident); + } + if (auto v_par = v->try_as()) { + return visit(v_par->get_expr()); + } + if (v->try_as()) { + return eval_const_init_value(v); + } + v->error("not a constant expression"); + } + + static ConstantValue eval_const_init_value(AnyExprV init_value) { + // it init_value is incorrect, an exception is thrown + return visit(init_value); + } +}; + +ConstantValue eval_const_init_value(AnyExprV init_value) { + // at first, handle most simple cases, not to launch heavy computation algorithm: just a number, just a string + // just `c = 1` or `c = 0xFF` + if (auto v_int = init_value->try_as()) { + return {v_int->intval}; + } + // just `c = "strval"`, probably with modifier (address, etc.) + if (auto v_string = init_value->try_as()) { + if (v_string->is_bitslice()) { + return {parse_vertex_string_const_as_slice(v_string)}; + } else { + return {parse_vertex_string_const_as_int(v_string)}; + } + } + // something more complex, like `c = anotherC` or `c = 1 << 8` + return ConstantEvaluator::eval_const_init_value(init_value); +} + +} // namespace tolk diff --git a/tolk/constant-evaluator.h b/tolk/constant-evaluator.h new file mode 100644 index 00000000..0f99867d --- /dev/null +++ b/tolk/constant-evaluator.h @@ -0,0 +1,45 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "fwd-declarations.h" +#include "crypto/common/refint.h" +#include + +namespace tolk { + +struct ConstantValue { + std::variant value; + + bool is_int() const { return std::holds_alternative(value); } + bool is_slice() const { return std::holds_alternative(value); } + + td::RefInt256 as_int() const { return std::get(value); } + const std::string& as_slice() const { return std::get(value); } + + static ConstantValue from_int(int value) { + return {td::make_refint(value)}; + } + + static ConstantValue from_int(td::RefInt256 value) { + return {std::move(value)}; + } +}; + +ConstantValue eval_const_init_value(AnyExprV init_value); + +} // namespace tolk diff --git a/tolk/fwd-declarations.h b/tolk/fwd-declarations.h new file mode 100644 index 00000000..d2197e66 --- /dev/null +++ b/tolk/fwd-declarations.h @@ -0,0 +1,39 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +namespace tolk { + +struct ASTNodeBase; +struct ASTNodeExpressionBase; +struct ASTNodeStatementBase; + +using AnyV = const ASTNodeBase*; +using AnyExprV = const ASTNodeExpressionBase*; +using AnyStatementV = const ASTNodeStatementBase*; + +struct Symbol; +struct LocalVarData; +struct FunctionData; +struct GlobalVarData; +struct GlobalConstData; + +struct TypeExpr; + +struct SrcFile; + +} // namespace tolk diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp deleted file mode 100644 index fb085ae9..00000000 --- a/tolk/gen-abscode.cpp +++ /dev/null @@ -1,429 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" -#include "compiler-state.h" - -using namespace std::literals::string_literals; - -namespace tolk { - -/* - * - * EXPRESSIONS - * - */ - -Expr* Expr::copy() const { - auto res = new Expr{*this}; - for (auto& arg : res->args) { - arg = arg->copy(); - } - return res; -} - -Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { - sym = lookup_symbol(name_idx); - if (!sym) { - } -} - -void Expr::deduce_type() { - if (e_type) { - return; - } - switch (cls) { - case _Apply: { - if (!sym) { - return; - } - SymValFunc* sym_val = dynamic_cast(sym->value); - if (!sym_val || !sym_val->get_type()) { - return; - } - std::vector arg_types; - arg_types.reserve(args.size()); - for (const Expr* arg : args) { - arg_types.push_back(arg->e_type); - } - TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_tensor(arg_types), TypeExpr::new_hole()); - try { - unify(fun_type, sym_val->sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " - << fun_type->args[0] << ": " << ue; - throw ParseError(here, os.str()); - } - e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - return; - } - case _VarApply: { - tolk_assert(args.size() == 2); - TypeExpr* fun_type = TypeExpr::new_map(args[1]->e_type, TypeExpr::new_hole()); - try { - unify(fun_type, args[0]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type - << ": " << ue; - throw ParseError(here, os.str()); - } - e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - return; - } - case _GrabMutatedVars: { - tolk_assert(args.size() == 2 && args[0]->cls == _Apply && sym); - SymValFunc* called_f = dynamic_cast(sym->value); - tolk_assert(called_f->has_mutate_params()); - TypeExpr* sym_type = called_f->get_type(); - if (sym_type->constr == TypeExpr::te_ForAll) { - TypeExpr::remove_forall(sym_type); - } - tolk_assert(sym_type->args[1]->constr == TypeExpr::te_Tensor); - e_type = sym_type->args[1]->args[sym_type->args[1]->args.size() - 1]; - TypeExpr::remove_indirect(e_type); - return; - } - case _ReturnSelf: { - tolk_assert(args.size() == 2 && sym); - Expr* this_arg = args[1]; - e_type = this_arg->e_type; - TypeExpr::remove_indirect(e_type); - return; - } - case _Letop: { - tolk_assert(args.size() == 2); - try { - // std::cerr << "in assignment: " << args[0]->e_type << " from " << args[1]->e_type << std::endl; - unify(args[0]->e_type, args[1]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " - << args[0]->e_type << ": " << ue; - throw ParseError(here, os.str()); - } - e_type = args[0]->e_type; - TypeExpr::remove_indirect(e_type); - return; - } - case _CondExpr: { - tolk_assert(args.size() == 3); - auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(args[0]->e_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; - throw ParseError(here, os.str()); - } - try { - unify(args[1]->e_type, args[2]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " - << args[2]->e_type << " : " << ue; - throw ParseError(here, os.str()); - } - e_type = args[1]->e_type; - TypeExpr::remove_indirect(e_type); - return; - } - default: - throw Fatal("unexpected cls=" + std::to_string(cls) + " in Expr::deduce_type()"); - } -} - -void Expr::define_new_vars(CodeBlob& code) { - switch (cls) { - case _Tensor: - case _MkTuple: { - for (Expr* item : args) { - item->define_new_vars(code); - } - break; - } - case _Var: - if (val < 0) { - val = code.create_var(e_type, sym->sym_idx, here); - sym->value->idx = val; - } - break; - case _Hole: - if (val < 0) { - val = code.create_tmp_var(e_type, here); - } - break; - default: - break; - } -} - -void Expr::predefine_vars() { - switch (cls) { - case _Tensor: - case _MkTuple: { - for (Expr* item : args) { - item->predefine_vars(); - } - break; - } - case _Var: - if (!sym) { - tolk_assert(val < 0 && here.is_defined()); - sym = define_symbol(~val, false, here); - // std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl; - if (!sym) { - throw ParseError{here, std::string{"redefined variable `"} + G.symbols.get_name(~val) + "`"}; - } - sym->value = new SymValVariable(-1, e_type); - if (is_immutable()) { - dynamic_cast(sym->value)->flags |= SymValVariable::flagImmutable; - } - } - break; - default: - break; - } -} - -var_idx_t Expr::new_tmp(CodeBlob& code) const { - return code.create_tmp_var(e_type, here); -} - -void add_set_globs(CodeBlob& code, std::vector>& globs, SrcLocation here) { - for (const auto& p : globs) { - auto& op = code.emplace_back(here, Op::_SetGlob, std::vector{}, std::vector{ p.second }, p.first); - op.set_impure(code); - } -} - -std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) { - if (lhs->is_mktuple()) { - if (rhs->is_mktuple()) { - return pre_compile_let(code, lhs->args.at(0), rhs->args.at(0), here); - } - auto right = rhs->pre_compile(code); - TypeExpr::remove_indirect(rhs->e_type); - auto unpacked_type = rhs->e_type->args.at(0); - std::vector tmp{code.create_tmp_var(unpacked_type, rhs->here)}; - code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); - auto tvar = new Expr{Expr::_Var, lhs->here}; - tvar->set_val(tmp[0]); - tvar->set_location(rhs->here); - tvar->e_type = unpacked_type; - pre_compile_let(code, lhs->args.at(0), tvar, here); - return tmp; - } - auto right = rhs->pre_compile(code); - std::vector> globs; - auto left = lhs->pre_compile(code, &globs); - for (var_idx_t v : left) { - code.on_var_modification(v, here); - } - code.emplace_back(here, Op::_Let, std::move(left), right); - add_set_globs(code, globs, here); - return right; -} - -std::vector pre_compile_tensor(const std::vector& args, CodeBlob &code, - std::vector> *lval_globs) { - const size_t n = args.size(); - if (n == 0) { // just `()` - return {}; - } - if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments - return args[0]->pre_compile(code, lval_globs); - } - std::vector> res_lists(n); - - struct ModifiedVar { - size_t i, j; - std::unique_ptr* cur_ops; // `LET tmp = v_ij` will be inserted before this - }; - std::vector modified_vars; - for (size_t i = 0; i < n; ++i) { - res_lists[i] = args[i]->pre_compile(code, lval_globs); - for (size_t j = 0; j < res_lists[i].size(); ++j) { - TmpVar& var = code.vars.at(res_lists[i][j]); - if (!lval_globs && !var.is_unnamed()) { - var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable { - if (!done) { - done = true; - modified_vars.push_back({i, j, cur_ops}); - } - }); - } else { - var.on_modification.push_back([](SrcLocation) { - }); - } - } - } - for (const auto& list : res_lists) { - for (var_idx_t v : list) { - tolk_assert(!code.vars.at(v).on_modification.empty()); - code.vars.at(v).on_modification.pop_back(); - } - } - for (size_t idx = modified_vars.size(); idx--; ) { - const ModifiedVar &m = modified_vars[idx]; - var_idx_t orig_v = res_lists[m.i][m.j]; - var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where); - std::unique_ptr op = std::make_unique(code.vars[orig_v].where, Op::_Let); - op->left = {tmp_v}; - op->right = {orig_v}; - op->next = std::move((*m.cur_ops)); - *m.cur_ops = std::move(op); - res_lists[m.i][m.j] = tmp_v; - } - std::vector res; - for (const auto& list : res_lists) { - res.insert(res.end(), list.cbegin(), list.cend()); - } - return res; -} - -std::vector Expr::pre_compile(CodeBlob& code, std::vector>* lval_globs) const { - if (lval_globs && !(cls == _Tensor || cls == _Var || cls == _Hole || cls == _GlobVar)) { - std::cerr << "lvalue expression constructor is " << cls << std::endl; - throw Fatal{"cannot compile lvalue expression with unknown constructor"}; - } - switch (cls) { - case _Tensor: { - return pre_compile_tensor(args, code, lval_globs); - } - case _Apply: { - tolk_assert(sym); - std::vector res = pre_compile_tensor(args, code, lval_globs);; - auto rvect = new_tmp_vect(code); - auto& op = code.emplace_back(here, Op::_Call, rvect, res, sym); - if (flags & _IsImpure) { - op.set_impure(code); - } - return rvect; - } - case _GrabMutatedVars: { - SymValFunc* func_val = dynamic_cast(sym->value); - tolk_assert(func_val && func_val->has_mutate_params()); - tolk_assert(args.size() == 2 && args[0]->cls == _Apply && args[1]->cls == _Tensor); - auto right = args[0]->pre_compile(code); // apply (returning function result and mutated) - std::vector> local_globs; - if (!lval_globs) { - lval_globs = &local_globs; - } - auto left = args[1]->pre_compile(code, lval_globs); // mutated (lvalue) - auto rvect = new_tmp_vect(code); - left.push_back(rvect[0]); - for (var_idx_t v : left) { - code.on_var_modification(v, here); - } - code.emplace_back(here, Op::_Let, std::move(left), std::move(right)); - add_set_globs(code, local_globs, here); - return rvect; - } - case _ReturnSelf: { - tolk_assert(args.size() == 2 && sym); - Expr* this_arg = args[1]; - auto right = args[0]->pre_compile(code); - return this_arg->pre_compile(code); - } - case _Var: - case _Hole: - if (val < 0) { - throw ParseError{here, "unexpected variable definition"}; - } - return {val}; - case _VarApply: - if (args[0]->cls == _GlobFunc) { - auto res = args[1]->pre_compile(code); - auto rvect = new_tmp_vect(code); - auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), args[0]->sym); - if (args[0]->flags & _IsImpure) { - op.set_impure(code); - } - return rvect; - } else { - auto res = args[1]->pre_compile(code); - auto tfunc = args[0]->pre_compile(code); - if (tfunc.size() != 1) { - throw Fatal{"stack tuple used as a function"}; - } - res.push_back(tfunc[0]); - auto rvect = new_tmp_vect(code); - code.emplace_back(here, Op::_CallInd, rvect, std::move(res)); - return rvect; - } - case _Const: { - auto rvect = new_tmp_vect(code); - code.emplace_back(here, Op::_IntConst, rvect, intval); - return rvect; - } - case _GlobFunc: - case _GlobVar: { - if (auto fun_ref = dynamic_cast(sym->value)) { - fun_ref->flags |= SymValFunc::flagUsedAsNonCall; - if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { - throw ParseError(here, "saving `" + sym->name() + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); - } - if (fun_ref->has_mutate_params()) { - throw ParseError(here, "saving `" + sym->name() + "` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly"); - } - } - auto rvect = new_tmp_vect(code); - if (lval_globs) { - lval_globs->push_back({ sym, rvect[0] }); - return rvect; - } else { - code.emplace_back(here, Op::_GlobVar, rvect, std::vector{}, sym); - return rvect; - } - } - case _Letop: { - return pre_compile_let(code, args.at(0), args.at(1), here); - } - case _MkTuple: { - auto left = new_tmp_vect(code); - auto right = args[0]->pre_compile(code); - code.emplace_back(here, Op::_Tuple, left, std::move(right)); - return left; - } - case _CondExpr: { - auto cond = args[0]->pre_compile(code); - tolk_assert(cond.size() == 1); - auto rvect = new_tmp_vect(code); - Op& if_op = code.emplace_back(here, Op::_If, cond); - code.push_set_cur(if_op.block0); - code.emplace_back(here, Op::_Let, rvect, args[1]->pre_compile(code)); - code.close_pop_cur(args[1]->here); - code.push_set_cur(if_op.block1); - code.emplace_back(here, Op::_Let, rvect, args[2]->pre_compile(code)); - code.close_pop_cur(args[2]->here); - return rvect; - } - case _SliceConst: { - auto rvect = new_tmp_vect(code); - code.emplace_back(here, Op::_SliceConst, rvect, strval); - return rvect; - } - default: - std::cerr << "expression constructor is " << cls << std::endl; - throw Fatal{"cannot compile expression with unknown constructor"}; - } -} - -} // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 17eb4544..4278f040 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -15,9 +15,9 @@ along with TON Blockchain Library. If not, see . */ #include "lexer.h" -#include "compiler-state.h" -#include "symtable.h" #include +#include +#include namespace tolk { @@ -406,7 +406,6 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (TokenType kw_tok = maybe_keyword(str_val)) { lex->add_token(kw_tok, str_val); } else { - G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); } return true; @@ -421,7 +420,7 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { const char* str_begin = lex->c_str(); lex->skip_chars(1); while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') { - if (std::isspace(lex->char_at())) { // probably, I'll remove this restriction after rewriting symtable and cur_sym_idx + if (std::isspace(lex->char_at())) { lex->error("an identifier can't have a space in its name (even inside backticks)"); } lex->skip_chars(1); @@ -432,7 +431,6 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); lex->skip_chars(1); - G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); return true; } diff --git a/tolk/lexer.h b/tolk/lexer.h index 8a25f952..bf116188 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -57,10 +57,29 @@ enum TokenType { tok_dot, tok_plus, + tok_set_plus, tok_minus, + tok_set_minus, tok_mul, + tok_set_mul, tok_div, + tok_set_div, tok_mod, + tok_set_mod, + tok_lshift, + tok_set_lshift, + tok_rshift, + tok_set_rshift, + tok_rshiftR, + tok_rshiftC, + tok_bitwise_and, + tok_set_bitwise_and, + tok_bitwise_or, + tok_set_bitwise_or, + tok_bitwise_xor, + tok_set_bitwise_xor, + tok_bitwise_not, + tok_question, tok_comma, tok_semicolon, @@ -77,32 +96,14 @@ enum TokenType { tok_logical_not, tok_logical_and, tok_logical_or, - tok_bitwise_and, - tok_bitwise_or, - tok_bitwise_xor, - tok_bitwise_not, tok_eq, tok_neq, tok_leq, tok_geq, tok_spaceship, - tok_lshift, - tok_rshift, - tok_rshiftR, - tok_rshiftC, tok_divR, tok_divC, - tok_set_plus, - tok_set_minus, - tok_set_mul, - tok_set_div, - tok_set_mod, - tok_set_lshift, - tok_set_rshift, - tok_set_bitwise_and, - tok_set_bitwise_or, - tok_set_bitwise_xor, tok_return, tok_repeat, diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 7257bfb0..3c02c7d1 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -19,1066 +19,591 @@ #include "ast.h" #include "compiler-state.h" #include "common/refint.h" -#include "openssl/digest.hpp" -#include "crypto/common/util.h" -#include "td/utils/crypto.h" -#include "ton/ton-types.h" +#include "constant-evaluator.h" /* - * In this module, we convert modern AST representation to legacy representation - * (global state, Expr, CodeBlob, etc.) to make the rest of compiling process remain unchanged for now. - * Since time goes, I'll gradually get rid of legacy, since most of the code analysis - * should be done at AST level. + * This pipe is the last one operating AST: it transforms AST to IR. + * IR is described as "Op" struct. So, here AST is transformed to Ops, and then all the rest "legacy" + * kernel (initially forked from FunC) comes into play. + * Up to this point, all types have been inferred, all validity checks have been passed, etc. + * All properties in AST nodes are assigned and can be safely used (fun_ref, etc.). + * So, if execution reaches this pass, the input is correct, and code generation should succeed. */ namespace tolk { -static int calc_sym_idx(std::string_view sym_name) { - return G.symbols.lookup(sym_name); -} +struct LValGlobs { + std::vector> globs; -void Expr::fire_error_rvalue_expected() const { - // generally, almost all vertices are rvalue, that's why code leading to "not rvalue" - // should be very strange, like `var x = _` - throw ParseError(here, "rvalue expected"); -} + void add_modified_glob(const GlobalVarData* g_sym, var_idx_t local_ir_idx) { + globs.emplace_back(g_sym, local_ir_idx); + } -void Expr::fire_error_lvalue_expected(const std::string& details) const { - // "lvalue expected" is when a user modifies something unmodifiable - // example: `f() = 32` - // example: `loadUint(c.beginParse(), 32)` (since `loadUint()` mutates the first argument) - throw ParseError(here, "lvalue expected (" + details + ")"); -} + void gen_ops_set_globs(CodeBlob& code, SrcLocation loc) const { + for (const auto& [g_sym, ir_idx] : globs) { + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, std::vector{ ir_idx }, g_sym); + op.set_impure_flag(); + } + } +}; -void Expr::fire_error_modifying_immutable(const std::string& details) const { - // "modifying immutable variable" is when a user assigns to a variable declared `val` - // example: `immutable_val = 32` - // example: `(regular_var, immutable_val) = f()` - // for better error message, try to print out variable name if possible - std::string variable_name; - if (cls == _Var || cls == _Const) { - variable_name = sym->name(); - } else if (cls == _Tensor || cls == _MkTuple) { - for (const Expr* arg : (cls == _Tensor ? args : args[0]->args)) { - if (arg->is_immutable() && (arg->cls == _Var || arg->cls == _Const)) { - variable_name = arg->sym->name(); - break; +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs = nullptr); +void process_statement(AnyV v, CodeBlob& code); + + +static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, + LValGlobs* lval_globs) { + const int n = static_cast(args.size()); + if (n == 0) { // just `()` + return {}; + } + if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments + return {pre_compile_expr(args[0], code, lval_globs)}; + } + + // the purpose is to handle such cases: `return (x, x += y, x)` + // without this, ops will be { _Call $2 = +($0_x, $1_y); _Return $0_x, $2, $0_x } - invalid + // with this, ops will be { _Let $3 = $0_x; _Call $2 = ...; _Return $3, $2, $0_x } - valid, tmp var for x + // how it works: for every arg, after transforming to ops, start tracking ir_idx inside it + // on modification attempt, create Op::_Let to a tmp var and replace old ir_idx with tmp_idx in result + struct WatchingVarList { + std::vector watched_vars; + std::vector> res_lists; + + explicit WatchingVarList(int n_args) { + res_lists.reserve(n_args); + } + + bool is_watched(var_idx_t ir_idx) const { + return std::find(watched_vars.begin(), watched_vars.end(), ir_idx) != watched_vars.end(); + } + + void add_and_watch_modifications(std::vector&& vars_of_ith_arg, CodeBlob& code) { + for (var_idx_t ir_idx : vars_of_ith_arg) { + if (code.vars[ir_idx].v_sym && !is_watched(ir_idx)) { + watched_vars.emplace_back(ir_idx); + code.vars[ir_idx].on_modification.emplace_back([this, &code, ir_idx](SrcLocation loc) { + on_var_modified(ir_idx, loc, code); + }); + } + } + res_lists.emplace_back(std::move(vars_of_ith_arg)); + } + + void on_var_modified(var_idx_t ir_idx, SrcLocation loc, CodeBlob& code) { + tolk_assert(is_watched(ir_idx)); + var_idx_t tmp_idx = code.create_tmp_var(code.vars[ir_idx].v_type, loc); + code.emplace_back(loc, Op::_Let, std::vector{tmp_idx}, std::vector{ir_idx}); + for (std::vector& prev_vars : res_lists) { + std::replace(prev_vars.begin(), prev_vars.end(), ir_idx, tmp_idx); } } - } - if (variable_name == "self") { - throw ParseError(here, "modifying `self` (" + details + "), which is immutable by default; probably, you want to declare `mutate self`"); - } else if (!variable_name.empty()) { - throw ParseError(here, "modifying an immutable variable `" + variable_name + "` (" + details + ")"); - } else { - throw ParseError(here, "modifying an immutable variable (" + details + ")"); - } -} - -GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN -static void fire_error_invalid_mutate_arg_passed(SrcLocation loc, const SymDef* func_sym, const SymDef* param_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { - std::string func_name = func_sym->name(); - std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as()->name : "obj"); - const SymValFunc* func_val = dynamic_cast(func_sym->value); - const SymValVariable* param_val = dynamic_cast(param_sym->value); - - // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` - if (param_val->is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && param_val->idx == 0 && func_val->does_accept_self()) { - throw ParseError(loc, "`" + func_name + "` is a mutating method; consider calling `" + arg_str + "." + func_name + "()`, not `" + func_name + "(" + arg_str + ")`"); - } - // case: `cs.mutating_function()`; suggest: `mutating_function(mutate cs)` or make it a method - if (param_val->is_mutate_parameter() && called_as_method && param_val->idx == 0 && !func_val->does_accept_self()) { - throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; consider calling `" + func_name + "(mutate " + arg_str + ")`, not `" + arg_str + "." + func_name + "`(); alternatively, rename parameter to `self` to make it a method"); - } - // case: `mutating_function(arg)`; suggest: `mutate arg` - if (param_val->is_mutate_parameter() && !arg_passed_as_mutate) { - throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; you need to specify `mutate` when passing an argument, like `mutate " + arg_str + "`"); - } - // case: `usual_function(mutate arg)` - if (!param_val->is_mutate_parameter() && arg_passed_as_mutate) { - throw ParseError(loc, "incorrect `mutate`, since `" + func_name + "` does not mutate this parameter"); - } - throw Fatal("unreachable"); -} - -// parse address like "EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5" -// based on unpack_std_smc_addr() from block.cpp -// (which is not included to avoid linking with ton_crypto) -static bool parse_friendly_address(const char packed[48], ton::WorkchainId& workchain, ton::StdSmcAddress& addr) { - unsigned char buffer[36]; - if (!td::buff_base64_decode(td::MutableSlice{buffer, 36}, td::Slice{packed, 48}, true)) { - return false; - } - td::uint16 crc = td::crc16(td::Slice{buffer, 34}); - if (buffer[34] != (crc >> 8) || buffer[35] != (crc & 0xff) || (buffer[0] & 0x3f) != 0x11) { - return false; - } - workchain = (td::int8)buffer[1]; - std::memcpy(addr.data(), buffer + 2, 32); - return true; -} - -// parse address like "0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8" -// based on StdAddress::parse_addr() from block.cpp -// (which is not included to avoid linking with ton_crypto) -static bool parse_raw_address(const std::string& acc_string, int& workchain, ton::StdSmcAddress& addr) { - size_t pos = acc_string.find(':'); - if (pos != std::string::npos) { - td::Result r_wc = td::to_integer_safe(acc_string.substr(0, pos)); - if (r_wc.is_error()) { - return false; - } - workchain = r_wc.move_as_ok(); - pos++; - } else { - pos = 0; - } - if (acc_string.size() != pos + 64) { - return false; - } - - for (int i = 0; i < 64; ++i) { // loop through each hex digit - char c = acc_string[pos + i]; - int x; - if (c >= '0' && c <= '9') { - x = c - '0'; - } else if (c >= 'a' && c <= 'z') { - x = c - 'a' + 10; - } else if (c >= 'A' && c <= 'Z') { - x = c - 'A' + 10; - } else { - return false; - } - - if ((i & 1) == 0) { - addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0x0F) | (x << 4)); - } else { - addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0xF0) | x); - } - } - return true; -} - -static Expr* create_expr_apply(SrcLocation loc, SymDef* sym, std::vector&& args) { - Expr* apply = new Expr(Expr::_Apply, sym, std::move(args)); - apply->here = loc; - apply->flags = Expr::_IsRvalue; - apply->deduce_type(); - return apply; -} - -static Expr* create_expr_int_const(SrcLocation loc, int int_val) { - Expr* int_const = new Expr(Expr::_Const, loc); - int_const->intval = td::make_refint(int_val); - int_const->flags = Expr::_IsRvalue; - int_const->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - return int_const; -} - -namespace blk_fl { -enum { end = 1, ret = 2, empty = 4 }; -typedef int val; -constexpr val init = end | empty; -void combine(val& x, const val y) { - x |= y & ret; - x &= y | ~(end | empty); -} -void combine_parallel(val& x, const val y) { - x &= y | ~(ret | empty); - x |= y & end; -} -} // namespace blk_fl - -Expr* process_expr(AnyV v, CodeBlob& code); -blk_fl::val process_statement(AnyV v, CodeBlob& code); - -static void check_global_func(SrcLocation loc, sym_idx_t func_name) { - SymDef* sym_def = lookup_symbol(func_name); - if (!sym_def) { - throw ParseError(loc, "undefined symbol `" + G.symbols.get_name(func_name) + "`"); - } -} - -static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_sym) { - if (!v_usage->loc.is_symbol_from_same_or_builtin_file(used_sym->loc)) { - const SrcFile* declared_in = used_sym->loc.get_src_file(); - bool has_import = false; - for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { - if (import_stmt.imported_file == declared_in) { - has_import = true; + std::vector> clear_and_stop_watching(CodeBlob& code) { + for (var_idx_t ir_idx : watched_vars) { + code.vars[ir_idx].on_modification.pop_back(); } + watched_vars.clear(); + return std::move(res_lists); } - if (!has_import) { - v_usage->error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); - } + }; + + WatchingVarList watched_vars(n); + for (int arg_idx = 0; arg_idx < n; ++arg_idx) { + std::vector vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, lval_globs); + watched_vars.add_and_watch_modifications(std::move(vars_of_ith_arg), code); } + return watched_vars.clear_and_stop_watching(code); } -static Expr* create_new_local_variable(SrcLocation loc, std::string_view var_name, TypeExpr* var_type, bool is_immutable) { - SymDef* sym = lookup_symbol(calc_sym_idx(var_name)); - if (sym) { // creating a new variable, but something found in symtable - if (sym->level != G.scope_level) { - sym = nullptr; // declaring a new variable with the same name, but in another scope - } else { - throw ParseError(loc, "redeclaration of local variable `" + static_cast(var_name) + "`"); - } +static std::vector pre_compile_tensor(CodeBlob& code, const std::vector& args, + LValGlobs* lval_globs = nullptr) { + std::vector> res_lists = pre_compile_tensor_inner(code, args, lval_globs); + std::vector res; + for (const std::vector& list : res_lists) { + res.insert(res.end(), list.cbegin(), list.cend()); } - Expr* x = new Expr{Expr::_Var, loc}; - x->val = ~calc_sym_idx(var_name); - x->e_type = var_type; - x->flags = Expr::_IsLvalue | (is_immutable ? Expr::_IsImmutable : 0); - return x; + return res; } -static Expr* create_new_underscore_variable(SrcLocation loc, TypeExpr* var_type) { - Expr* x = new Expr{Expr::_Hole, loc}; - x->val = -1; - x->flags = Expr::_IsLvalue; - x->e_type = var_type; - return x; +static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) { + // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" + if (lhs->type == ast_tensor_square && rhs->type == ast_tensor_square) { + std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); + LValGlobs globs; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + code.on_var_modification(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), right); + globs.gen_ops_set_globs(code, loc); + return right; + } + // [lhs] = rhs; it's un-tuple to N left vars + if (lhs->type == ast_tensor_square) { + std::vector right = pre_compile_expr(rhs, code); + TypeExpr* rhs_type = rhs->inferred_type; + TypeExpr::remove_indirect(rhs_type); + TypeExpr* unpacked_type = rhs_type->args.at(0); // rhs->inferred_type is tuple> + std::vector rvect = {code.create_tmp_var(unpacked_type, rhs->loc)}; + code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); + LValGlobs globs; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + code.on_var_modification(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), rvect); + globs.gen_ops_set_globs(code, loc); + return rvect; + } + // lhs = rhs + std::vector right = pre_compile_expr(rhs, code); + LValGlobs globs; + std::vector left = pre_compile_expr(lhs, code, &globs); + code.on_var_modification(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), right); + globs.gen_ops_set_globs(code, loc); + return right; } -static Expr* process_expr(V v, CodeBlob& code) { +static std::vector gen_op_call(CodeBlob& code, TypeExpr* ret_type, SrcLocation here, + std::vector&& args_vars, const FunctionData* fun_ref) { + std::vector rvect = {code.create_tmp_var(ret_type, here)}; + Op& op = code.emplace_back(here, Op::_Call, rvect, std::move(args_vars), fun_ref); + if (!fun_ref->is_marked_as_pure()) { + op.set_impure_flag(); + } + return rvect; +} + + +static std::vector process_binary_operator(V v, CodeBlob& code) { TokenType t = v->tok; std::string operator_name = static_cast(v->operator_name); - if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || - t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || - t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { - Expr* x = process_expr(v->get_lhs(), code); - x->chk_rvalue(); - if (!x->is_lvalue()) { - x->fire_error_lvalue_expected("left side of assignment"); - } - if (x->is_immutable()) { - x->fire_error_modifying_immutable("left side of assignment"); - } - SymDef* sym = lookup_symbol(calc_sym_idx("^_" + operator_name + "_")); - Expr* y = process_expr(v->get_rhs(), code); - y->chk_rvalue(); - Expr* z = create_expr_apply(v->loc, sym, {x, y}); - Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; - res->here = v->loc; - res->flags = x->flags | Expr::_IsRvalue; - res->deduce_type(); - return res; + if (v->is_set_assign()) { + std::string_view calc_operator = std::string_view{operator_name}.substr(0, operator_name.size() - 1); + auto v_apply = createV(v->loc, calc_operator, static_cast(t - 1), v->get_lhs(), v->get_rhs()); + v_apply->assign_inferred_type(v->inferred_type); + return pre_compile_let(code, v->get_lhs(), v_apply, v->loc); } - if (t == tok_assign) { - Expr* x = process_expr(v->get_lhs(), code); - if (!x->is_lvalue()) { - x->fire_error_lvalue_expected("left side of assignment"); - } - if (x->is_immutable()) { - x->fire_error_modifying_immutable("left side of assignment"); - } - Expr* y = process_expr(v->get_rhs(), code); - y->chk_rvalue(); - x->predefine_vars(); - x->define_new_vars(code); - Expr* res = new Expr{Expr::_Letop, {x, y}}; - res->here = v->loc; - res->flags = x->flags | Expr::_IsRvalue; - res->deduce_type(); - return res; + if (v->is_assign()) { + return pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); } if (t == tok_minus || t == tok_plus || t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor || t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship || t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR || t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { - Expr* res = process_expr(v->get_lhs(), code); - res->chk_rvalue(); - SymDef* sym = lookup_symbol(calc_sym_idx("_" + operator_name + "_")); - Expr* x = process_expr(v->get_rhs(), code); - x->chk_rvalue(); - res = create_expr_apply(v->loc, sym, {res, x}); - return res; + const FunctionData* fun_ref = lookup_global_symbol("_" + operator_name + "_")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref); } if (t == tok_logical_and || t == tok_logical_or) { // do the following transformations: // a && b -> a ? (b != 0) : 0 // a || b -> a ? 1 : (b != 0) - SymDef* sym_neq = lookup_symbol(calc_sym_idx("_!=_")); - Expr* lhs = process_expr(v->get_lhs(), code); - Expr* rhs = process_expr(v->get_rhs(), code); - Expr* e_neq0 = create_expr_apply(v->loc, sym_neq, {rhs, create_expr_int_const(v->loc, 0)}); - Expr* e_when_true = t == tok_logical_and ? e_neq0 : create_expr_int_const(v->loc, -1); - Expr* e_when_false = t == tok_logical_and ? create_expr_int_const(v->loc, 0) : e_neq0; - Expr* e_ternary = new Expr(Expr::_CondExpr, {lhs, e_when_true, e_when_false}); - e_ternary->here = v->loc; - e_ternary->flags = Expr::_IsRvalue; - e_ternary->deduce_type(); - return e_ternary; + AnyExprV v_0 = createV(v->loc, td::make_refint(0), "0"); + v_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + AnyExprV v_1 = createV(v->loc, td::make_refint(-1), "-1"); + v_1->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + AnyExprV v_b_ne_0 = createV(v->loc, "!=", tok_neq, v->get_rhs(), v_0); + v_b_ne_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + std::vector cond = pre_compile_expr(v->get_lhs(), code); + tolk_assert(cond.size() == 1); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code)); + code.close_pop_cur(v->loc); + code.push_set_cur(if_op.block1); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code)); + code.close_pop_cur(v->loc); + return rvect; } - v->error("unsupported binary operator"); + throw UnexpectedASTNodeType(v, "process_binary_operator"); } -static Expr* process_expr(V v, CodeBlob& code) { - TokenType t = v->tok; - SymDef* sym = lookup_symbol(calc_sym_idx(static_cast(v->operator_name) + "_")); - Expr* x = process_expr(v->get_rhs(), code); - x->chk_rvalue(); - - // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) - // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later) - // in FunC, it was: - // `var fst = -1;` // is constantly 1 - // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on - // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), - // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here - if (t == tok_minus && x->cls == Expr::_Const) { - x->intval = -x->intval; - if (!x->intval->signed_fits_bits(257)) { - v->error("integer overflow"); - } - return x; - } - if (t == tok_plus && x->cls == Expr::_Const) { - return x; - } - - return create_expr_apply(v->loc, sym, {x}); +static std::vector process_unary_operator(V v, CodeBlob& code) { + const FunctionData* fun_ref = lookup_global_symbol(static_cast(v->operator_name) + "_")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_rhs()}); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref); } -static Expr* process_expr(V v, CodeBlob& code) { - Expr* cond = process_expr(v->get_cond(), code); - cond->chk_rvalue(); - Expr* x = process_expr(v->get_when_true(), code); - x->chk_rvalue(); - Expr* y = process_expr(v->get_when_false(), code); - y->chk_rvalue(); - Expr* res = new Expr{Expr::_CondExpr, {cond, x, y}}; - res->here = v->loc; - res->flags = Expr::_IsRvalue; - res->deduce_type(); - return res; +static std::vector process_ternary_operator(V v, CodeBlob& code) { + std::vector cond = pre_compile_expr(v->get_cond(), code); + tolk_assert(cond.size() == 1); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code)); + code.close_pop_cur(v->get_when_true()->loc); + code.push_set_cur(if_op.block1); + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code)); + code.close_pop_cur(v->get_when_false()->loc); + return rvect; } -static Expr* process_function_arguments(SymDef* func_sym, V v, Expr* lhs_of_dot_call, CodeBlob& code) { - SymValFunc* func_val = dynamic_cast(func_sym->value); - int delta_self = lhs_of_dot_call ? 1 : 0; - int n_arguments = static_cast(v->get_arguments().size()) + delta_self; - int n_parameters = static_cast(func_val->parameters.size()); - - // Tolk doesn't have optional parameters currently, so just compare counts - if (n_parameters < n_arguments) { - v->error("too many arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - if (n_arguments < n_parameters) { - v->error("too few arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - - std::vector apply_args; - apply_args.reserve(n_arguments); - if (lhs_of_dot_call) { - apply_args.push_back(lhs_of_dot_call); - } - for (int i = delta_self; i < n_arguments; ++i) { - auto v_arg = v->get_arg(i - delta_self); - if (SymDef* param_sym = func_val->parameters[i]) { // can be null (for underscore parameter) - SymValVariable* param_val = dynamic_cast(param_sym->value); - if (param_val->is_mutate_parameter() != v_arg->passed_as_mutate) { - fire_error_invalid_mutate_arg_passed(v_arg->loc, func_sym, param_sym, false, v_arg->passed_as_mutate, v_arg->get_expr()); - } - } - - Expr* arg = process_expr(v_arg->get_expr(), code); - arg->chk_rvalue(); - apply_args.push_back(arg); - } - - Expr* apply = new Expr{Expr::_Apply, func_sym, std::move(apply_args)}; - apply->flags = Expr::_IsRvalue | (!func_val->is_marked_as_pure() * Expr::_IsImpure); - apply->here = v->loc; - apply->deduce_type(); - - return apply; -} - -static Expr* process_function_call(V v, CodeBlob& code) { - // special error for "null()" which is a FunC syntax - if (v->get_called_f()->type == ast_null_keyword) { - v->error("null is not a function: use `null`, not `null()`"); - } - +static std::vector process_function_call(V v, CodeBlob& code) { // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` - Expr* lhs = process_expr(v->get_called_f(), code); - if (lhs->cls != Expr::_GlobFunc) { - Expr* tensor_arg = new Expr(Expr::_Tensor, v->loc); - std::vector type_list; - type_list.reserve(v->get_num_args()); + const FunctionData* fun_ref = v->fun_maybe; + if (!fun_ref) { + std::vector args; + args.reserve(v->get_num_args()); for (int i = 0; i < v->get_num_args(); ++i) { - auto v_arg = v->get_arg(i); - if (v_arg->passed_as_mutate) { - v_arg->error("`mutate` used for non-mutate argument"); - } - Expr* arg = process_expr(v_arg->get_expr(), code); - arg->chk_rvalue(); - tensor_arg->pb_arg(arg); - type_list.push_back(arg->e_type); + args.push_back(v->get_arg(i)->get_expr()); } - tensor_arg->flags = Expr::_IsRvalue; - tensor_arg->e_type = TypeExpr::new_tensor(std::move(type_list)); - - Expr* var_apply = new Expr{Expr::_VarApply, {lhs, tensor_arg}}; - var_apply->here = v->loc; - var_apply->flags = Expr::_IsRvalue; - var_apply->deduce_type(); - return var_apply; + std::vector args_vars = pre_compile_tensor(code, args); + std::vector tfunc = pre_compile_expr(v->get_called_f(), code); + tolk_assert(tfunc.size() == 1); + args_vars.push_back(tfunc[0]); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars)); + op.set_impure_flag(); + return rvect; } - Expr* apply = process_function_arguments(lhs->sym, v->get_arg_list(), nullptr, code); + std::vector args; + args.reserve(v->get_num_args()); + for (int i = 0; i < v->get_num_args(); ++i) { + args.push_back(v->get_arg(i)->get_expr()); + } + std::vector args_vars = pre_compile_tensor(code, args); - if (dynamic_cast(apply->sym->value)->has_mutate_params()) { - const std::vector& args = apply->args; - SymValFunc* func_val = dynamic_cast(apply->sym->value); - tolk_assert(func_val->parameters.size() == args.size()); - Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); - std::vector type_list; - for (int i = 0; i < static_cast(args.size()); ++i) { - SymDef* param_def = func_val->parameters[i]; - if (param_def && dynamic_cast(param_def->value)->is_mutate_parameter()) { - if (!args[i]->is_lvalue()) { - args[i]->fire_error_lvalue_expected("call a mutating function"); - } - if (args[i]->is_immutable()) { - args[i]->fire_error_modifying_immutable("call a mutating function"); - } - grabbed_vars->pb_arg(args[i]->copy()); - type_list.emplace_back(args[i]->e_type); + TypeExpr* op_call_type = v->inferred_type; + if (fun_ref->has_mutate_params()) { + std::vector types_list; + for (int i = 0; i < v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { + types_list.push_back(args[i]->inferred_type); } } - grabbed_vars->flags = Expr::_IsRvalue; - Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, apply->sym, {apply, grabbed_vars}); - grab_mutate->here = v->loc; - grab_mutate->flags = apply->flags; - grab_mutate->deduce_type(); - return grab_mutate; + types_list.push_back(v->inferred_type); + op_call_type = TypeExpr::new_tensor(std::move(types_list)); } - return apply; + std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref); + + if (fun_ref->has_mutate_params()) { + LValGlobs local_globs; + std::vector left; + for (int i = 0; i < v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { + AnyExprV arg_i = v->get_arg(i)->get_expr(); + tolk_assert(arg_i->is_lvalue); + std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); + left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); + } + } + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + left.push_back(rvect[0]); + code.on_var_modification(left, v->loc); + code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); + local_globs.gen_ops_set_globs(code, v->loc); + return rvect; + } + + return rvect_apply; } -static Expr* process_dot_method_call(V v, CodeBlob& code) { - sym_idx_t name_idx = calc_sym_idx(v->method_name); - check_global_func(v->loc, name_idx); - SymDef* func_sym = lookup_symbol(name_idx); - SymValFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val != nullptr); - - Expr* obj = process_expr(v->get_obj(), code); - obj->chk_rvalue(); - - if (func_val->parameters.empty()) { - v->error("`" + func_sym->name() + "` has no parameters and can not be called as method"); - } - if (!func_val->does_accept_self() && func_val->parameters[0] && dynamic_cast(func_val->parameters[0]->value)->is_mutate_parameter()) { - fire_error_invalid_mutate_arg_passed(v->loc, func_sym, func_val->parameters[0], true, false, v->get_obj()); +static std::vector process_dot_method_call(V v, CodeBlob& code) { + std::vector args; + args.reserve(1 + v->get_num_args()); + args.push_back(v->get_obj()); + for (int i = 0; i < v->get_num_args(); ++i) { + args.push_back(v->get_arg(i)->get_expr()); } + std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, nullptr); - Expr* apply = process_function_arguments(func_sym, v->get_arg_list(), obj, code); - - Expr* obj_lval = apply->args[0]; - if (!obj_lval->is_lvalue()) { - if (obj_lval->cls == Expr::_ReturnSelf) { - obj_lval = obj_lval->args[1]; - } else { - Expr* tmp_var = create_new_underscore_variable(v->loc, obj_lval->e_type); - tmp_var->define_new_vars(code); - Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, obj_lval}); - assign_to_tmp_var->here = v->loc; - assign_to_tmp_var->flags = Expr::_IsRvalue; - assign_to_tmp_var->deduce_type(); - apply->args[0] = assign_to_tmp_var; - obj_lval = tmp_var; + TypeExpr* op_call_type = v->inferred_type; + TypeExpr* real_ret_type = v->inferred_type; + if (v->fun_ref->does_return_self()) { + real_ret_type = TypeExpr::new_unit(); + if (!v->fun_ref->parameters[0].is_mutate_parameter()) { + op_call_type = TypeExpr::new_unit(); } } + if (v->fun_ref->has_mutate_params()) { + std::vector types_list; + for (int i = 0; i < 1 + v->get_num_args(); ++i) { + if (v->fun_ref->parameters[i].is_mutate_parameter()) { + types_list.push_back(args[i]->inferred_type); + } + } + types_list.push_back(real_ret_type); + op_call_type = TypeExpr::new_tensor(std::move(types_list)); + } - if (func_val->has_mutate_params()) { - tolk_assert(func_val->parameters.size() == apply->args.size()); - Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); - std::vector type_list; - for (int i = 0; i < static_cast(apply->args.size()); ++i) { - SymDef* param_sym = func_val->parameters[i]; - if (param_sym && dynamic_cast(param_sym->value)->is_mutate_parameter()) { - Expr* ith_arg = apply->args[i]; - if (ith_arg->is_immutable()) { - ith_arg->fire_error_modifying_immutable("call a mutating method"); - } + std::vector args_vars; + for (const std::vector& list : vars_per_arg) { + args_vars.insert(args_vars.end(), list.cbegin(), list.cend()); + } + std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), v->fun_ref); - Expr* var_to_mutate = nullptr; - if (ith_arg->is_lvalue()) { - var_to_mutate = ith_arg->copy(); - } else if (i == 0) { - var_to_mutate = obj_lval; + AnyExprV obj_leftmost = args[0]; + while (obj_leftmost->type == ast_dot_method_call && obj_leftmost->as()->fun_ref->does_return_self()) { + obj_leftmost = obj_leftmost->as()->get_obj(); + } + + if (v->fun_ref->has_mutate_params()) { + LValGlobs local_globs; + std::vector left; + for (int i = 0; i < 1 + v->get_num_args(); ++i) { + if (v->fun_ref->parameters[i].is_mutate_parameter()) { + AnyExprV arg_i = i == 0 ? obj_leftmost : args[i]; + tolk_assert (arg_i->is_lvalue || i == 0); + if (arg_i->is_lvalue) { + std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); + left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); } else { - ith_arg->fire_error_lvalue_expected("call a mutating method"); + left.insert(left.end(), vars_per_arg[0].begin(), vars_per_arg[0].end()); } - tolk_assert(var_to_mutate->is_lvalue() && !var_to_mutate->is_immutable()); - grabbed_vars->pb_arg(var_to_mutate); - type_list.emplace_back(var_to_mutate->e_type); } } - grabbed_vars->flags = Expr::_IsRvalue; - - Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, func_sym, {apply, grabbed_vars}); - grab_mutate->here = v->loc; - grab_mutate->flags = apply->flags; - grab_mutate->deduce_type(); - - apply = grab_mutate; + std::vector rvect = {code.create_tmp_var(real_ret_type, v->loc)}; + left.push_back(rvect[0]); + code.on_var_modification(left, v->loc); + code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); + local_globs.gen_ops_set_globs(code, v->loc); + rvect_apply = rvect; } - if (func_val->does_return_self()) { - Expr* self_arg = obj_lval; - tolk_assert(self_arg->is_lvalue()); - - Expr* return_self = new Expr(Expr::_ReturnSelf, func_sym, {apply, self_arg}); - return_self->here = v->loc; - return_self->flags = Expr::_IsRvalue; - return_self->deduce_type(); - - apply = return_self; - } - - return apply; -} - -static Expr* process_expr(V v, CodeBlob& code) { - if (v->empty()) { - Expr* res = new Expr{Expr::_Tensor, {}}; - res->flags = Expr::_IsRvalue; - res->here = v->loc; - res->e_type = TypeExpr::new_unit(); - return res; - } - - Expr* res = process_expr(v->get_item(0), code); - std::vector type_list; - type_list.push_back(res->e_type); - int f = res->flags; - res = new Expr{Expr::_Tensor, {res}}; - for (int i = 1; i < v->size(); ++i) { - Expr* x = process_expr(v->get_item(i), code); - res->pb_arg(x); - f &= (x->flags | Expr::_IsImmutable); - f |= (x->flags & Expr::_IsImmutable); - type_list.push_back(x->e_type); - } - res->here = v->loc; - res->flags = f; - res->e_type = TypeExpr::new_tensor(std::move(type_list)); - return res; -} - -static Expr* process_expr(V v, CodeBlob& code) { - if (v->empty()) { - Expr* res = new Expr{Expr::_Tensor, {}}; - res->flags = Expr::_IsRvalue; - res->here = v->loc; - res->e_type = TypeExpr::new_unit(); - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = Expr::_IsRvalue; - res->here = v->loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - return res; - } - - Expr* res = process_expr(v->get_item(0), code); - std::vector type_list; - type_list.push_back(res->e_type); - int f = res->flags; - res = new Expr{Expr::_Tensor, {res}}; - for (int i = 1; i < v->size(); ++i) { - Expr* x = process_expr(v->get_item(i), code); - res->pb_arg(x); - f &= (x->flags | Expr::_IsImmutable); - f |= (x->flags & Expr::_IsImmutable); - type_list.push_back(x->e_type); - } - res->here = v->loc; - res->flags = f; - res->e_type = TypeExpr::new_tensor(std::move(type_list), false); - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = f; - res->here = v->loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - return res; -} - -static Expr* process_expr(V v) { - Expr* res = new Expr{Expr::_Const, v->loc}; - res->flags = Expr::_IsRvalue; - res->intval = td::string_to_int256(static_cast(v->int_val)); - if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { - v->error("invalid integer constant"); - } - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - return res; -} - -static Expr* process_expr(V v) { - std::string str = static_cast(v->str_val); - Expr* res; - switch (v->modifier) { - case 0: - case 's': - case 'a': - res = new Expr{Expr::_SliceConst, v->loc}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); - break; - case 'u': - case 'h': - case 'H': - case 'c': - res = new Expr{Expr::_Const, v->loc}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - break; - default: - v->error("invalid string modifier '" + std::string(1, v->modifier) + "'"); - } - res->flags = Expr::_IsRvalue; - switch (v->modifier) { - case 0: { - res->strval = td::hex_encode(str); - break; + if (v->fun_ref->does_return_self()) { + if (obj_leftmost->is_lvalue) { // to handle if obj is global var, potentially re-assigned inside a chain + rvect_apply = pre_compile_expr(obj_leftmost, code); + } else { // temporary object, not lvalue, pre_compile_expr + rvect_apply = vars_per_arg[0]; } - case 's': { - res->strval = str; - unsigned char buff[128]; - int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); - if (bits < 0) { - v->error("invalid hex bitstring constant '" + str + "'"); - } - break; - } - case 'a': { // MsgAddress - int workchain; - ton::StdSmcAddress addr; - bool correct = (str.size() == 48 && parse_friendly_address(str.data(), workchain, addr)) || - (str.size() != 48 && parse_raw_address(str, workchain, addr)); - if (!correct) { - v->error("invalid standard address '" + str + "'"); - } - if (workchain < -128 || workchain >= 128) { - v->error("anycast addresses not supported"); - } - - unsigned char data[3 + 8 + 256]; // addr_std$10 anycast:(Maybe Anycast) workchain_id:int8 address:bits256 = MsgAddressInt; - td::bitstring::bits_store_long_top(data, 0, static_cast(4) << (64 - 3), 3); - td::bitstring::bits_store_long_top(data, 3, static_cast(workchain) << (64 - 8), 8); - td::bitstring::bits_memcpy(data, 3 + 8, addr.bits().ptr, 0, addr.size()); - res->strval = td::BitSlice{data, sizeof(data)}.to_hex(); - break; - } - case 'u': { - res->intval = td::hex_string_to_int256(td::hex_encode(str)); - if (str.empty()) { - v->error("empty integer ascii-constant"); - } - if (res->intval.is_null()) { - v->error("too long integer ascii-constant"); - } - break; - } - case 'h': - case 'H': { - unsigned char hash[32]; - digest::hash_str(hash, str.data(), str.size()); - res->intval = td::bits_to_refint(hash, (v->modifier == 'h') ? 32 : 256, false); - break; - } - case 'c': { - res->intval = td::make_refint(td::crc32(td::Slice{str})); - break; - } - default: - tolk_assert(false); } - return res; + + return rvect_apply; } -static Expr* process_expr(V v) { - SymDef* builtin_sym = lookup_symbol(calc_sym_idx(v->bool_val ? "__true" : "__false")); - return create_expr_apply(v->loc, builtin_sym, {}); +static std::vector process_tensor(V v, CodeBlob& code, LValGlobs* lval_globs) { + return pre_compile_tensor(code, v->get_items(), lval_globs); } -static Expr* process_expr(V v) { - SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__null")); - return create_expr_apply(v->loc, builtin_sym, {}); +static std::vector process_tensor_square(V v, CodeBlob& code, LValGlobs* lval_globs) { + if (lval_globs) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work + v->error("[...] can not be used as lvalue here"); + } + std::vector left = std::vector{code.create_tmp_var(v->inferred_type, v->loc)}; + std::vector right = pre_compile_tensor(code, v->get_items()); + code.emplace_back(v->loc, Op::_Tuple, left, std::move(right)); + return left; } -static Expr* process_expr(V v, CodeBlob& code) { - if (!code.func_val->does_accept_self()) { - v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); - } - SymDef* sym = lookup_symbol(calc_sym_idx("self")); - tolk_assert(sym); - SymValVariable* sym_val = dynamic_cast(sym->value); - Expr* res = new Expr(Expr::_Var, v->loc); - res->sym = sym; - res->val = sym_val->idx; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (sym_val->is_immutable() ? Expr::_IsImmutable : 0); - res->e_type = sym_val->get_type(); - return res; +static std::vector process_int_const(V v, CodeBlob& code) { + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + code.emplace_back(v->loc, Op::_IntConst, rvect, v->intval); + return rvect; } -static Expr* process_identifier(V v) { - SymDef* sym = lookup_symbol(calc_sym_idx(v->name)); - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(v, sym); - Expr* res = new Expr{Expr::_GlobVar, v->loc}; - res->e_type = sym->value->get_type(); - res->sym = sym; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(v, sym); - auto val = dynamic_cast(sym->value); - Expr* res = nullptr; - if (val->get_kind() == SymValConst::IntConst) { - res = new Expr{Expr::_Const, v->loc}; - res->intval = val->get_int_value(); - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - } else if (val->get_kind() == SymValConst::SliceConst) { - res = new Expr{Expr::_SliceConst, v->loc}; - res->strval = val->get_str_value(); - res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); - } else { - v->error("invalid symbolic constant type"); - } - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImmutable; - res->sym = sym; - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(v, sym); - } - Expr* res = new Expr{Expr::_Var, v->loc}; - if (!sym) { - check_global_func(v->loc, calc_sym_idx(v->name)); - sym = lookup_symbol(calc_sym_idx(v->name)); - tolk_assert(sym); - } - res->sym = sym; - bool impure = false; - bool immutable = false; - if (const SymValFunc* func_val = dynamic_cast(sym->value)) { - res->e_type = func_val->get_type(); - res->cls = Expr::_GlobFunc; - impure = !func_val->is_marked_as_pure(); - } else if (const SymValVariable* var_val = dynamic_cast(sym->value)) { - tolk_assert(var_val->idx >= 0) - res->val = var_val->idx; - res->e_type = var_val->get_type(); - immutable = var_val->is_immutable(); - // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; +static std::vector process_string_const(V v, CodeBlob& code) { + ConstantValue value = eval_const_init_value(v); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + if (value.is_int()) { + code.emplace_back(v->loc, Op::_IntConst, rvect, value.as_int()); } else { - v->error("undefined identifier '" + static_cast(v->name) + "'"); + code.emplace_back(v->loc, Op::_SliceConst, rvect, value.as_slice()); } - // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0) | (immutable ? Expr::_IsImmutable : 0); - res->deduce_type(); - return res; + return rvect; } -Expr* process_expr(AnyV v, CodeBlob& code) { +static std::vector process_bool_const(V v, CodeBlob& code) { + const FunctionData* builtin_sym = lookup_global_symbol(v->bool_val ? "__true" : "__false")->as(); + return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); +} + +static std::vector process_null_keyword(V v, CodeBlob& code) { + const FunctionData* builtin_sym = lookup_global_symbol("__null")->as(); + return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); +} + +static std::vector process_self_keyword(V v, CodeBlob& code) { + tolk_assert(code.fun_ref->does_accept_self() && v->param_ref); + tolk_assert(v->param_ref->idx == 0); + return {0}; +} + +static std::vector process_identifier(V v, CodeBlob& code, LValGlobs* lval_globs) { + const Symbol* sym = v->sym; + if (const auto* glob_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + if (lval_globs) { + lval_globs->add_modified_glob(glob_ref, rvect[0]); + return rvect; + } else { + code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector{}, glob_ref); + return rvect; + } + } + if (const auto* const_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + if (const_ref->is_int_const()) { + code.emplace_back(v->loc, Op::_IntConst, rvect, const_ref->as_int_const()); + } else { + code.emplace_back(v->loc, Op::_SliceConst, rvect, const_ref->as_slice_const()); + } + return rvect; + } + if (const auto* fun_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); + return rvect; + } + if (const auto* var_ref = sym->try_as()) { +#ifdef TOLK_DEBUG + tolk_assert(var_ref->idx != -1); +#endif + return {var_ref->idx}; + } + throw UnexpectedASTNodeType(v, "process_identifier"); +} + +static std::vector process_local_var(V v, CodeBlob& code, LValGlobs* lval_globs) { + if (v->marked_as_redef) { + return process_identifier(v->get_identifier()->as(), code, lval_globs); + } + if (v->get_identifier()->try_as()) { + const LocalVarData* var_ref = v->var_maybe->as(); + tolk_assert(var_ref->idx == -1); + var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, var_ref, v->loc)); + return {var_ref->idx}; + } + return {code.create_tmp_var(v->inferred_type, v->loc)}; // underscore +} + +static std::vector process_underscore(V v, CodeBlob& code) { + return {code.create_tmp_var(v->inferred_type, v->loc)}; +} + +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs) { switch (v->type) { case ast_binary_operator: - return process_expr(v->as(), code); + return process_binary_operator(v->as(), code); case ast_unary_operator: - return process_expr(v->as(), code); + return process_unary_operator(v->as(), code); case ast_ternary_operator: - return process_expr(v->as(), code); + return process_ternary_operator(v->as(), code); case ast_function_call: return process_function_call(v->as(), code); case ast_dot_method_call: return process_dot_method_call(v->as(), code); - case ast_parenthesized_expr: - return process_expr(v->as()->get_expr(), code); + case ast_parenthesized_expression: + return pre_compile_expr(v->as()->get_expr(), code, lval_globs); case ast_tensor: - return process_expr(v->as(), code); + return process_tensor(v->as(), code, lval_globs); case ast_tensor_square: - return process_expr(v->as(), code); + return process_tensor_square(v->as(), code, lval_globs); case ast_int_const: - return process_expr(v->as()); + return process_int_const(v->as(), code); case ast_string_const: - return process_expr(v->as()); + return process_string_const(v->as(), code); case ast_bool_const: - return process_expr(v->as()); + return process_bool_const(v->as(), code); case ast_null_keyword: - return process_expr(v->as()); + return process_null_keyword(v->as(), code); case ast_self_keyword: - return process_expr(v->as(), code); + return process_self_keyword(v->as(), code); case ast_identifier: - return process_identifier(v->as()); + return process_identifier(v->as(), code, lval_globs); + case ast_local_var: + return process_local_var(v->as(), code, lval_globs); case ast_underscore: - return create_new_underscore_variable(v->loc, TypeExpr::new_hole()); + return process_underscore(v->as(), code); default: - throw UnexpectedASTNodeType(v, "process_expr"); + throw UnexpectedASTNodeType(v, "pre_compile_expr"); } } -static Expr* process_local_vars_lhs(AnyV v, CodeBlob& code) { - switch (v->type) { - case ast_local_var: { - auto v_var = v->as(); - if (v_var->marked_as_redef) { - Expr* redef_var = process_identifier(v_var->get_identifier()->as()); - if (redef_var->is_immutable()) { - redef_var->fire_error_modifying_immutable("left side of assignment"); - } - return redef_var; - } - TypeExpr* var_type = v_var->declared_type ? v_var->declared_type : TypeExpr::new_hole(); - if (auto v_ident = v->as()->get_identifier()->try_as()) { - return create_new_local_variable(v->loc, v_ident->name, var_type, v_var->is_immutable); - } else { - return create_new_underscore_variable(v->loc, var_type); - } - } - case ast_parenthesized_expr: - return process_local_vars_lhs(v->as()->get_expr(), code); - case ast_tensor: { - std::vector type_list; - Expr* res = new Expr{Expr::_Tensor, v->loc}; - for (AnyV item : v->as()->get_items()) { - Expr* x = process_local_vars_lhs(item, code); - res->pb_arg(x); - res->flags |= x->flags; - type_list.push_back(x->e_type); - } - res->e_type = TypeExpr::new_tensor(std::move(type_list)); - return res; - } - case ast_tensor_square: { - std::vector type_list; - Expr* res = new Expr{Expr::_Tensor, v->loc}; - for (AnyV item : v->as()->get_items()) { - Expr* x = process_local_vars_lhs(item, code); - res->pb_arg(x); - res->flags |= x->flags; - type_list.push_back(x->e_type); - } - res->e_type = TypeExpr::new_tensor(std::move(type_list)); - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = res->args.at(0)->flags; - res->here = v->loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - return res; - } - default: - throw UnexpectedASTNodeType(v, "process_local_vars_lhs"); - } + +static void process_local_vars_declaration(V v, CodeBlob& code) { + pre_compile_let(code, v->get_lhs(), v->get_assigned_val(), v->loc); } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* x = process_local_vars_lhs(v->get_lhs(), code); - Expr* y = process_expr(v->get_assigned_val(), code); - y->chk_rvalue(); - x->predefine_vars(); - x->define_new_vars(code); - Expr* res = new Expr{Expr::_Letop, {x, y}}; - res->here = v->loc; - res->flags = x->flags | Expr::_IsRvalue; - res->deduce_type(); - res->chk_rvalue(); - res->pre_compile(code); - return blk_fl::end; -} - -static bool is_expr_valid_as_return_self(Expr* return_expr) { - // `return self` - if (return_expr->cls == Expr::_Var && return_expr->val == 0) { - return true; - } - if (return_expr->cls == Expr::_ReturnSelf) { - return is_expr_valid_as_return_self(return_expr->args[1]); - } - if (return_expr->cls == Expr::_CondExpr) { - return is_expr_valid_as_return_self(return_expr->args[1]) && is_expr_valid_as_return_self(return_expr->args[2]); - } - return false; -} - -// for mutating functions, having `return expr`, transform it to `return (modify_var1, ..., expr)` -static Expr* wrap_return_value_with_mutate_params(SrcLocation loc, CodeBlob& code, Expr* return_expr) { - Expr* tmp_var; - if (return_expr->cls != Expr::_Var) { - // `return complex_expr` - extract this into temporary variable (eval it before return) - // this is mandatory if it assigns to one of modified vars - tmp_var = create_new_underscore_variable(loc, return_expr->e_type); - tmp_var->predefine_vars(); - tmp_var->define_new_vars(code); - Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, return_expr}); - assign_to_tmp_var->here = loc; - assign_to_tmp_var->flags = tmp_var->flags | Expr::_IsRvalue; - assign_to_tmp_var->deduce_type(); - assign_to_tmp_var->pre_compile(code); - } else { - tmp_var = return_expr; - } - - Expr* ret_tensor = new Expr(Expr::_Tensor, loc); - std::vector type_list; - for (SymDef* p_sym: code.func_val->parameters) { - if (p_sym && dynamic_cast(p_sym->value)->is_mutate_parameter()) { - Expr* p_expr = new Expr{Expr::_Var, p_sym->loc}; - p_expr->sym = p_sym; - p_expr->val = p_sym->value->idx; - p_expr->flags = Expr::_IsRvalue; - p_expr->e_type = p_sym->value->get_type(); - ret_tensor->pb_arg(p_expr); - type_list.emplace_back(p_expr->e_type); - } - } - ret_tensor->pb_arg(tmp_var); - type_list.emplace_back(tmp_var->e_type); - ret_tensor->flags = Expr::_IsRvalue; - ret_tensor->e_type = TypeExpr::new_tensor(std::move(type_list)); - return ret_tensor; -} - -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_return_value(), code); - if (code.func_val->does_return_self()) { - if (!is_expr_valid_as_return_self(expr)) { - v->error("invalid return from `self` function"); - } - Expr* var_self = new Expr(Expr::_Var, v->loc); - var_self->flags = Expr::_IsRvalue | Expr::_IsLvalue; - var_self->e_type = code.func_val->parameters[0]->value->get_type(); - Expr* assign_to_self = new Expr(Expr::_Letop, {var_self, expr}); - assign_to_self->here = v->loc; - assign_to_self->flags = Expr::_IsRvalue; - assign_to_self->deduce_type(); - assign_to_self->pre_compile(code); - Expr* empty_tensor = new Expr(Expr::_Tensor, {}); - empty_tensor->here = v->loc; - empty_tensor->flags = Expr::_IsRvalue; - empty_tensor->e_type = TypeExpr::new_tensor({}); - expr = empty_tensor; - } - if (code.func_val->has_mutate_params()) { - expr = wrap_return_value_with_mutate_params(v->loc, code, expr); - } - expr->chk_rvalue(); - try { - unify(expr->e_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; - v->error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); - return blk_fl::ret; -} - -static void append_implicit_ret_stmt(SrcLocation loc_end, CodeBlob& code) { - Expr* expr = new Expr{Expr::_Tensor, {}}; - expr->flags = Expr::_IsRvalue; - expr->here = loc_end; - expr->e_type = TypeExpr::new_unit(); - if (code.func_val->does_return_self()) { - throw ParseError(loc_end, "missing return; forgot `return self`?"); - } - if (code.func_val->has_mutate_params()) { - expr = wrap_return_value_with_mutate_params(loc_end, code, expr); - } - try { - unify(expr->e_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with implicit end-of-block return type " << expr->e_type << ": " << ue; - throw ParseError(loc_end, os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - code.emplace_back(loc_end, Op::_Return, std::move(tmp_vars)); -} - -static blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { - if (!no_new_scope) { - open_scope(v->loc); - } - blk_fl::val res = blk_fl::init; - bool warned = false; +static void process_sequence(V v, CodeBlob& code) { for (AnyV item : v->get_items()) { - if (!(res & blk_fl::end) && !warned) { - item->loc.show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, process_statement(item, code)); + process_statement(item, code); } - if (!no_new_scope) { - close_scope(); - } - return res; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_cond(), code); - expr->chk_rvalue(); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); + +static void process_assert_statement(V v, CodeBlob& code) { + std::vector args(3); + if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { + args[0] = v->get_thrown_code(); + args[1] = v->get_cond()->as()->get_rhs(); + args[2] = createV(v->loc, true); + args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } else { + args[0] = v->get_thrown_code(); + args[1] = v->get_cond(); + args[2] = createV(v->loc, false); + args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - v->get_cond()->error("repeat count value is not a singleton"); + + const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as(); + std::vector args_vars = pre_compile_tensor(code, args); + gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); +} + +static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { + if (auto v_ident = v_catch_var->try_as()) { + const LocalVarData* var_ref = v_ident->sym->as(); + tolk_assert(var_ref->idx == -1); + var_ref->mutate()->assign_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc)); } +} + +static void process_try_catch_statement(V v, CodeBlob& code) { + code.require_callxargs = true; + Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); + code.push_set_cur(try_catch_op.block0); + process_statement(v->get_try_body(), code); + code.close_pop_cur(v->get_try_body()->loc_end); + code.push_set_cur(try_catch_op.block1); + + // transform catch (excNo, arg) into TVM-catch (arg, excNo), where arg is untyped and thus almost useless now + const std::vector& catch_vars = v->get_catch_expr()->get_items(); + tolk_assert(catch_vars.size() == 2); + process_catch_variable(catch_vars[0], code); + process_catch_variable(catch_vars[1], code); + try_catch_op.left = pre_compile_tensor(code, {catch_vars[1], catch_vars[0]}); + process_statement(v->get_catch_body(), code); + code.close_pop_cur(v->get_catch_body()->loc_end); +} + +static void process_repeat_statement(V v, CodeBlob& code) { + std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); - blk_fl::val res = process_vertex(v->get_body(), code); + process_statement(v->get_body(), code); code.close_pop_cur(v->get_body()->loc_end); - return res | blk_fl::end; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_cond(), code); - expr->chk_rvalue(); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); +static void process_if_statement(V v, CodeBlob& code) { + std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); + Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars)); + code.push_set_cur(if_op.block0); + process_statement(v->get_if_body(), code); + code.close_pop_cur(v->get_if_body()->loc_end); + code.push_set_cur(if_op.block1); + process_statement(v->get_else_body(), code); + code.close_pop_cur(v->get_else_body()->loc_end); + if (v->is_ifnot) { + std::swap(if_op.block0, if_op.block1); } - Op& while_op = code.emplace_back(v->loc, Op::_While); - code.push_set_cur(while_op.block0); - while_op.left = expr->pre_compile(code); - code.close_pop_cur(v->get_body()->loc); - if (while_op.left.size() != 1) { - v->get_cond()->error("while condition value is not a singleton"); - } - code.push_set_cur(while_op.block1); - blk_fl::val res1 = process_vertex(v->get_body(), code); - code.close_pop_cur(v->get_body()->loc_end); - return res1 | blk_fl::end; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { +static void process_do_while_statement(V v, CodeBlob& code) { Op& until_op = code.emplace_back(v->loc, Op::_Until); code.push_set_cur(until_op.block0); - open_scope(v->loc); - blk_fl::val res = process_vertex(v->get_body(), code, true); + process_statement(v->get_body(), code); // in TVM, there is only "do until", but in Tolk, we want "do while" // here we negate condition to pass it forward to legacy to Op::_Until // also, handle common situations as a hardcoded "optimization": replace (a<0) with (a>=0) and so on // todo these hardcoded conditions should be removed from this place in the future - AnyV cond = v->get_cond(); - AnyV until_cond; + AnyExprV cond = v->get_cond(); + AnyExprV until_cond; if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { until_cond = v_not->get_rhs(); } else if (auto v_eq = cond->try_as(); v_eq && v_eq->tok == tok_eq) { @@ -1096,215 +621,114 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { } else { until_cond = createV(cond->loc, "!", tok_logical_not, cond); } + until_cond->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - Expr* expr = process_expr(until_cond, code); - expr->chk_rvalue(); - close_scope(); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`while` condition value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - until_op.left = expr->pre_compile(code); + until_op.left = pre_compile_expr(until_cond, code); code.close_pop_cur(v->get_body()->loc_end); - if (until_op.left.size() != 1) { - v->get_cond()->error("`while` condition value is not a singleton"); - } - return res & ~blk_fl::empty; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - std::vector args; - SymDef* builtin_sym; +static void process_while_statement(V v, CodeBlob& code) { + Op& while_op = code.emplace_back(v->loc, Op::_While); + code.push_set_cur(while_op.block0); + while_op.left = pre_compile_expr(v->get_cond(), code); + code.close_pop_cur(v->get_body()->loc); + code.push_set_cur(while_op.block1); + process_statement(v->get_body(), code); + code.close_pop_cur(v->get_body()->loc_end); +} + +static void process_throw_statement(V v, CodeBlob& code) { if (v->has_thrown_arg()) { - builtin_sym = lookup_symbol(calc_sym_idx("__throw_arg")); - args.push_back(process_expr(v->get_thrown_arg(), code)); - args.push_back(process_expr(v->get_thrown_code(), code)); + const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()}); + gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); } else { - builtin_sym = lookup_symbol(calc_sym_idx("__throw")); - args.push_back(process_expr(v->get_thrown_code(), code)); + const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_code()}); + gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); } - - Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); - apply->flags |= Expr::_IsImpure; - apply->pre_compile(code); - return blk_fl::end; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - std::vector args(3); - if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { - args[0] = process_expr(v->get_thrown_code(), code); - args[1] = process_expr(v->get_cond()->as()->get_rhs(), code); - args[2] = process_expr(createV(v->loc, true), code); - } else { - args[0] = process_expr(v->get_thrown_code(), code); - args[1] = process_expr(v->get_cond(), code); - args[2] = process_expr(createV(v->loc, false), code); +static void process_return_statement(V v, CodeBlob& code) { + std::vector return_vars = pre_compile_expr(v->get_return_value(), code); + if (code.fun_ref->does_return_self()) { + tolk_assert(return_vars.size() == 1); + return_vars = {}; } - - SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__throw_if_unless")); - Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); - apply->flags |= Expr::_IsImpure; - apply->pre_compile(code); - return blk_fl::end; + if (code.fun_ref->has_mutate_params()) { + std::vector mutated_vars; + for (const LocalVarData& p_sym: code.fun_ref->parameters) { + if (p_sym.is_mutate_parameter()) { + mutated_vars.push_back(p_sym.idx); + } + } + return_vars.insert(return_vars.begin(), mutated_vars.begin(), mutated_vars.end()); + } + code.emplace_back(v->loc, Op::_Return, std::move(return_vars)); } -static Expr* process_catch_variable(AnyV catch_var, TypeExpr* var_type) { - if (auto v_ident = catch_var->try_as()) { - return create_new_local_variable(catch_var->loc, v_ident->name, var_type, true); - } - return create_new_underscore_variable(catch_var->loc, var_type); -} - -static blk_fl::val process_vertex(V v, CodeBlob& code) { - code.require_callxargs = true; - Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); - code.push_set_cur(try_catch_op.block0); - blk_fl::val res0 = process_vertex(v->get_try_body(), code); - code.close_pop_cur(v->get_try_body()->loc_end); - code.push_set_cur(try_catch_op.block1); - open_scope(v->get_catch_expr()->loc); - - // transform catch (excNo, arg) into TVM-catch (arg, excNo), where arg is untyped and thus almost useless now - TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); - const std::vector& catch_items = v->get_catch_expr()->get_items(); - tolk_assert(catch_items.size() == 2); - Expr* e_catch = new Expr{Expr::_Tensor, v->get_catch_expr()->loc}; - e_catch->pb_arg(process_catch_variable(catch_items[1], tvm_error_type->args[0])); - e_catch->pb_arg(process_catch_variable(catch_items[0], tvm_error_type->args[1])); - e_catch->flags = Expr::_IsLvalue; - e_catch->e_type = tvm_error_type; - e_catch->predefine_vars(); - e_catch->define_new_vars(code); - try_catch_op.left = e_catch->pre_compile(code); - tolk_assert(try_catch_op.left.size() == 2); - - blk_fl::val res1 = process_vertex(v->get_catch_body(), code); - close_scope(); - code.close_pop_cur(v->get_catch_body()->loc_end); - blk_fl::combine_parallel(res0, res1); - return res0; -} - -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_cond(), code); - expr->chk_rvalue(); - TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - v->get_cond()->error("condition value is not a singleton"); - } - Op& if_op = code.emplace_back(v->loc, Op::_If, tmp_vars); - code.push_set_cur(if_op.block0); - blk_fl::val res1 = process_vertex(v->get_if_body(), code); - blk_fl::val res2 = blk_fl::init; - code.close_pop_cur(v->get_if_body()->loc_end); - code.push_set_cur(if_op.block1); - res2 = process_vertex(v->get_else_body(), code); - code.close_pop_cur(v->get_else_body()->loc_end); - if (v->is_ifnot) { - std::swap(if_op.block0, if_op.block1); - } - blk_fl::combine_parallel(res1, res2); - return res1; -} - -blk_fl::val process_statement(AnyV v, CodeBlob& code) { - switch (v->type) { - case ast_local_vars_declaration: - return process_vertex(v->as(), code); - case ast_return_statement: - return process_vertex(v->as(), code); - case ast_sequence: - return process_vertex(v->as(), code); - case ast_empty: - return blk_fl::init; - case ast_repeat_statement: - return process_vertex(v->as(), code); - case ast_if_statement: - return process_vertex(v->as(), code); - case ast_do_while_statement: - return process_vertex(v->as(), code); - case ast_while_statement: - return process_vertex(v->as(), code); - case ast_throw_statement: - return process_vertex(v->as(), code); - case ast_assert_statement: - return process_vertex(v->as(), code); - case ast_try_catch_statement: - return process_vertex(v->as(), code); - default: { - Expr* expr = process_expr(v, code); - expr->chk_rvalue(); - expr->pre_compile(code); - return blk_fl::end; +static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code) { + std::vector mutated_vars; + if (code.fun_ref->has_mutate_params()) { + for (const LocalVarData& p_sym: code.fun_ref->parameters) { + if (p_sym.is_mutate_parameter()) { + mutated_vars.push_back(p_sym.idx); + } } } + code.emplace_back(loc_end, Op::_Return, std::move(mutated_vars)); } -static FormalArg process_vertex(V v, SymDef* param_sym) { - if (!param_sym) { - return std::make_tuple(v->param_type, nullptr, v->loc); + +void process_statement(AnyV v, CodeBlob& code) { + switch (v->type) { + case ast_local_vars_declaration: + return process_local_vars_declaration(v->as(), code); + case ast_sequence: + return process_sequence(v->as(), code); + case ast_return_statement: + return process_return_statement(v->as(), code); + case ast_repeat_statement: + return process_repeat_statement(v->as(), code); + case ast_if_statement: + return process_if_statement(v->as(), code); + case ast_do_while_statement: + return process_do_while_statement(v->as(), code); + case ast_while_statement: + return process_while_statement(v->as(), code); + case ast_throw_statement: + return process_throw_statement(v->as(), code); + case ast_assert_statement: + return process_assert_statement(v->as(), code); + case ast_try_catch_statement: + return process_try_catch_statement(v->as(), code); + case ast_empty_statement: + return; + default: + pre_compile_expr(reinterpret_cast(v), code); } - SymDef* new_sym_def = define_symbol(calc_sym_idx(v->get_identifier()->name), true, v->loc); - if (!new_sym_def || new_sym_def->value) { - v->error("redefined parameter"); - } - const SymValVariable* param_val = dynamic_cast(param_sym->value); - new_sym_def->value = new SymValVariable(*param_val); - return std::make_tuple(v->param_type, new_sym_def, v->loc); } static void convert_function_body_to_CodeBlob(V v, V v_body) { - SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); - SymValCodeFunc* sym_val = dynamic_cast(sym_def->value); - tolk_assert(sym_val != nullptr); - - open_scope(v->loc); - CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, sym_val, v->ret_type}; - if (v->marked_as_pure) { - blob->flags |= CodeBlob::_ForbidImpure; - } + CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, v->fun_ref, v->ret_type}; FormalArgList legacy_arg_list; for (int i = 0; i < v->get_num_params(); ++i) { - legacy_arg_list.emplace_back(process_vertex(v->get_param(i), sym_val->parameters[i])); + legacy_arg_list.emplace_back(v->get_param(i)->declared_type, &v->fun_ref->parameters[i], v->loc); } blob->import_params(std::move(legacy_arg_list)); - blk_fl::val res = blk_fl::init; - bool warned = false; for (AnyV item : v_body->get_items()) { - if (!(res & blk_fl::end) && !warned) { - item->loc.show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, process_statement(item, *blob)); + process_statement(item, *blob); } - if (res & blk_fl::end) { - append_implicit_ret_stmt(v_body->loc_end, *blob); + if (v->fun_ref->is_implicit_return()) { + append_implicit_return_statement(v_body->loc_end, *blob); } blob->close_blk(v_body->loc_end); - close_scope(); - sym_val->set_code(blob); + std::get(v->fun_ref->body)->set_code(blob); } static void convert_asm_body_to_AsmOp(V v, V v_body) { - SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); - SymValAsmFunc* sym_val = dynamic_cast(sym_def->value); - tolk_assert(sym_val != nullptr); - int cnt = v->get_num_params(); int width = v->ret_type->get_width(); std::vector asm_ops; @@ -1332,14 +756,11 @@ static void convert_asm_body_to_AsmOp(V v, Vset_code(std::move(asm_ops)); + std::get(v->fun_ref->body)->set_code(std::move(asm_ops)); } - void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) { for (const SrcFile* file : all_src_files) { - tolk_assert(file->ast); - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { if (auto v_func = v->try_as()) { if (v_func->is_asm_function()) { diff --git a/tolk/pipe-calc-rvalue-lvalue.cpp b/tolk/pipe-calc-rvalue-lvalue.cpp new file mode 100644 index 00000000..1738226b --- /dev/null +++ b/tolk/pipe-calc-rvalue-lvalue.cpp @@ -0,0 +1,192 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe assigns lvalue/rvalue flags for AST expressions. + * It happens after identifiers have been resolved, but before type inferring (before methods binding). + * + * Example: `a = b`, `a` is lvalue, `b` is rvalue. + * Example: `a + b`, both are rvalue. + * + * Note, that this pass only assigns, not checks. So, for `f() = 4`, expr `f()` is lvalue. + * Checking (firing this as incorrect later) is performed after type inferring, see pipe-check-rvalue-lvalue. + */ + +namespace tolk { + +enum class MarkingState { + None, + LValue, + RValue, + LValueAndRValue +}; + +class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { + MarkingState cur_state = MarkingState::None; + + MarkingState enter_state(MarkingState activated) { + MarkingState saved = cur_state; + cur_state = activated; + return saved; + } + + void restore_state(MarkingState saved) { + cur_state = saved; + } + + void mark_vertex_cur_or_rvalue(AnyExprV v) const { + if (cur_state == MarkingState::LValue || cur_state == MarkingState::LValueAndRValue) { + v->mutate()->assign_lvalue_true(); + } + if (cur_state == MarkingState::RValue || cur_state == MarkingState::LValueAndRValue || cur_state == MarkingState::None) { + v->mutate()->assign_rvalue_true(); + } + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(v->passed_as_mutate ? MarkingState::LValueAndRValue : MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v->get_obj()); + enter_state(MarkingState::RValue); + parent::visit(v->get_arg_list()); + restore_state(saved); + } + + void visit(V v) override { + // underscore is a placeholder to ignore left side of assignment: `(a, _) = get2params()` + // so, if current state is "lvalue", `_` will be marked as lvalue, and ok + // but if used incorrectly, like `f(_)` or just `_;`, it will be marked rvalue + // and will fire an error later, in pipe lvalue/rvalue check + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(v->is_set_assign() ? MarkingState::LValueAndRValue : v->is_assign() ? MarkingState::LValue : MarkingState::RValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_rhs()); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); // both cond, when_true and when_false are rvalue, `(cond ? a : b) = 5` prohibited + restore_state(saved); + } + + void visit(V v) override { + MarkingState saved = enter_state(MarkingState::LValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_assigned_val()); + restore_state(saved); + } + + void visit(V v) override { + tolk_assert(cur_state == MarkingState::LValue); + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + parent::visit(v->get_try_body()); + MarkingState saved = enter_state(MarkingState::LValue); + parent::visit(v->get_catch_expr()); + restore_state(saved); + parent::visit(v->get_catch_body()); + } +}; + +void pipeline_calculate_rvalue_lvalue(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-check-pure-impure.cpp b/tolk/pipe-check-pure-impure.cpp new file mode 100644 index 00000000..6cef9f15 --- /dev/null +++ b/tolk/pipe-check-pure-impure.cpp @@ -0,0 +1,107 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" +#include "platform-utils.h" + +/* + * This pipe checks for impure operations inside pure functions. + * It happens after type inferring (after methods binding) since it operates fun_ref of calls. + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_impure_operation_inside_pure_function(AnyV v) { + v->error("an impure operation in a pure function"); +} + +class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFunctionBody { + static void fire_if_global_var(AnyExprV v) { + if (auto v_ident = v->try_as()) { + if (v_ident->sym->try_as()) { + fire_error_impure_operation_inside_pure_function(v); + } + } + } + + void visit(V v) override { + if (v->marked_as_redef) { + fire_if_global_var(v->get_identifier()); + } + } + + void visit(V v) override { + if (v->is_set_assign() || v->is_assign()) { + fire_if_global_var(v->get_lhs()); + } + + parent::visit(v); + } + + void visit(V v) override { + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + if (!v->fun_maybe) { + // calling variables is always impure, no considerations about what's there at runtime + fire_error_impure_operation_inside_pure_function(v); + } + + if (!v->fun_maybe->is_marked_as_pure()) { + fire_error_impure_operation_inside_pure_function(v); + } + + parent::visit(v); + } + + void visit(V v) override { + if (!v->fun_ref->is_marked_as_pure()) { + fire_error_impure_operation_inside_pure_function(v); + } + + parent::visit(v); + } + + void visit(V v) override { + if (v->passed_as_mutate) { + fire_if_global_var(v->get_expr()); + } + + parent::visit(v); + } + + void visit(V v) override { + fire_error_impure_operation_inside_pure_function(v); + } + + void visit(V v) override { + fire_error_impure_operation_inside_pure_function(v); + } + +public: + void start_visiting_function(V v_function) override { + if (v_function->marked_as_pure) { + parent::visit(v_function->get_body()); + } + } +}; + +void pipeline_check_pure_impure_operations(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-check-rvalue-lvalue.cpp b/tolk/pipe-check-rvalue-lvalue.cpp new file mode 100644 index 00000000..f5bf8526 --- /dev/null +++ b/tolk/pipe-check-rvalue-lvalue.cpp @@ -0,0 +1,172 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" +#include "platform-utils.h" + +/* + * This pipe checks lvalue/rvalue for validity. + * It happens after type inferring (after methods binding) and after lvalue/rvalue are refined based on fun_ref. + * + * Example: `f() = 4`, `f()` was earlier marked as lvalue, it's incorrect. + * Example: `f(mutate 5)`, `5` was marked also, it's incorrect. + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_be_used_as_lvalue(AnyV v, const std::string& details) { + // example: `f() = 32` + // example: `loadUint(c.beginParse(), 32)` (since `loadUint()` mutates the first argument) + v->error(details + " can not be used as lvalue"); +} + +// handle when a function used as rvalue, like `var cb = f` +static void handle_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) { + fun_ref->mutate()->assign_is_used_as_noncall(); + if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { + v->error("saving `" + fun_ref->name + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); + } + if (fun_ref->has_mutate_params()) { + v->error("saving `" + fun_ref->name + "` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly"); + } +} + +class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast(v->operator_name)); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast(v->operator_name)); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "operator ?:"); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "function call"); + } + if (!v->fun_maybe) { + parent::visit(v->get_called_f()); + } + // for `f(...)` don't visit identifier `f`, to detect `f` usage as non-call, like `var cb = f` + + for (int i = 0; i < v->get_num_args(); ++i) { + parent::visit(v->get_arg(i)); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "method call"); + } + + parent::visit(v->get_obj()); + + for (int i = 0; i < v->get_num_args(); ++i) { + parent::visit(v->get_arg(i)); + } + } + + void visit(V v) override { + if (v->marked_as_redef) { + tolk_assert(v->var_maybe); // always filled, but for `var g_var redef` might point not to a local + if (const LocalVarData* var_ref = v->var_maybe->try_as(); var_ref && var_ref->is_immutable()) { + v->error("`redef` for immutable variable"); + } + } + } + + void visit(V v) override { + if (v->is_lvalue) { + tolk_assert(v->sym); + if (const auto* var_ref = v->sym->try_as(); var_ref && var_ref->is_immutable()) { + v->error("modifying immutable variable `" + var_ref->name + "`"); + } else if (v->sym->try_as()) { + v->error("modifying immutable constant"); + } else if (v->sym->try_as()) { + v->error("function can't be used as lvalue"); + } + } + + // a reference to a function used as rvalue, like `var v = someFunction` + if (const FunctionData* fun_ref = v->sym->try_as(); fun_ref && v->is_rvalue) { + handle_function_used_as_noncall(v, fun_ref); + } + } + + void visit(V v) override { + if (v->is_lvalue && v->param_ref->is_immutable()) { + v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); + } + } + + void visit(V v) override { + if (v->is_rvalue) { + v->error("`_` can't be used as a value; it's a placeholder for a left side of assignment"); + } + } + + void visit(V v) override { + parent::visit(v->get_try_body()); + // skip catch(_,excNo), there are always vars due to grammar, lvalue/rvalue aren't set to them + parent::visit(v->get_catch_body()); + } +}; + +void pipeline_check_rvalue_lvalue(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp new file mode 100644 index 00000000..9e266e6d --- /dev/null +++ b/tolk/pipe-constant-folding.cpp @@ -0,0 +1,68 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-replacer.h" + +/* + * This pipe is supposed to do constant folding, like replacing `2 + 3` with `5`. + * It happens after type inferring and validity checks, one of the last ones. + * + * Currently, it just replaces `-1` (ast_unary_operator ast_int_const) with a number -1. + * More rich constant folding should be done some day, but even without this, IR optimizations + * (operating low-level stack variables) pretty manage to do all related optimizations. + * Constant folding in the future, done at AST level, just would slightly reduce amount of work for optimizer. + */ + +namespace tolk { + +class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { + static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { + auto v_int = createV(loc, std::move(intval), {}); + v_int->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + v_int->assign_rvalue_true(); + return v_int; + } + + AnyExprV replace(V v) override { + parent::replace(v); + + TokenType t = v->tok; + // convert "-1" (tok_minus tok_int_const) to a const -1 + if (t == tok_minus && v->get_rhs()->type == ast_int_const) { + td::RefInt256 intval = v->get_rhs()->as()->intval; + tolk_assert(!intval.is_null()); + intval = -intval; + if (intval.is_null() || !intval->signed_fits_bits(257)) { + v->error("integer overflow"); + } + return create_int_const(v->loc, std::move(intval)); + } + // same for "+1" + if (t == tok_plus && v->get_rhs()->type == ast_int_const) { + return v->get_rhs(); + } + + return v; + } +}; + +void pipeline_constant_folding(const AllSrcFiles& all_src_files) { + replace_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-detect-unreachable.cpp b/tolk/pipe-detect-unreachable.cpp new file mode 100644 index 00000000..96de2eb0 --- /dev/null +++ b/tolk/pipe-detect-unreachable.cpp @@ -0,0 +1,127 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe does two things: + * 1) detects unreachable code and prints warnings about it + * example: `fun main() { if(1){return;}else{return;} var x = 0; }` — var is unreachable + * 2) if control flow reaches end of function, store a flag to insert an implicit return + * example: `fun main() { assert(...); }` — has an implicit `return ()` statement before a brace + * + * Note, that it does not delete unreachable code, only prints warnings. + * Actual deleting is done much later (in "legacy" part), after AST is converted to Op. + * + * Note, that it's not CFG, it's just a shallow reachability detection. + * In the future, a true CFG should be introduced. For instance, in order to have nullable types, + * I'll need to implement smart casts. Then I'll think of a complicated granular control flow graph, + * considering data flow and exceptions (built before type inferring, of course), + * and detecting unreachable code will be a part of it. + */ + +namespace tolk { + +class UnreachableStatementsDetectVisitor final { + bool always_returns(AnyV v) { + switch (v->type) { + case ast_sequence: return always_returns(v->as()); + case ast_return_statement: return always_returns(v->as()); + case ast_throw_statement: return always_returns(v->as()); + case ast_function_call: return always_returns(v->as()); + case ast_repeat_statement: return always_returns(v->as()); + case ast_while_statement: return always_returns(v->as()); + case ast_do_while_statement: return always_returns(v->as()); + case ast_try_catch_statement: return always_returns(v->as()); + case ast_if_statement: return always_returns(v->as()); + default: + // unhandled statements (like assert) and statement expressions + return false; + } + } + + bool always_returns(V v) { + bool always = false; + for (AnyV item : v->get_items()) { + if (always && item->type != ast_empty_statement) { + item->loc.show_warning("unreachable code"); + break; + } + always |= always_returns(item); + } + return always; + } + + static bool always_returns([[maybe_unused]] V v) { + // quite obvious: `return expr` interrupts control flow + return true; + } + + static bool always_returns([[maybe_unused]] V v) { + // todo `throw excNo` currently does not interrupt control flow + // (in other words, `throw 1; something` - something is reachable) + // the reason is that internally it's transformed to a call of built-in function __throw(), + // which is a regular function, like __throw_if() or loadInt() + // to fix this later on, it should be deeper, introducing Op::_Throw for example, + // to make intermediate representations and stack optimizer also be aware that after it there is unreachable + return false; + } + + static bool always_returns([[maybe_unused]] V v) { + // neither annotations like @noreturn nor auto-detection of always-throwing functions also doesn't exist + // in order to do this in the future, it should be handled not only at AST/CFG level, + // but inside Op and low-level optimizer (at least if reachability detection is not moved out of there) + // see comments for `throw` above, similar to this case + return false; + } + + bool always_returns(V v) { + return always_returns(v->get_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_try_body()) && always_returns(v->get_catch_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_if_body()) && always_returns(v->get_else_body()); + } + +public: + void start_visiting_function(V v_function) { + bool control_flow_reaches_end = !always_returns(v_function->get_body()->as()); + if (control_flow_reaches_end) { + v_function->fun_ref->mutate()->assign_is_implicit_return(); + } + } +}; + + +void pipeline_detect_unreachable_statements(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-discover-parse-sources.cpp b/tolk/pipe-discover-parse-sources.cpp index a8445ae9..92cc2807 100644 --- a/tolk/pipe-discover-parse-sources.cpp +++ b/tolk/pipe-discover-parse-sources.cpp @@ -28,6 +28,14 @@ #include "ast-from-tokens.h" #include "compiler-state.h" +/* + * This is the starting point of compilation pipeline. + * It parses Tolk files to AST, analyzes `import` statements and loads/parses imported files. + * + * When it finishes, all files have been parsed to AST, and no more files will later be added. + * If a parsing error happens (invalid syntax), an exception is thrown immediately from ast-from-tokens.cpp. + */ + namespace tolk { AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { @@ -50,7 +58,7 @@ AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filena SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); file->imports.push_back(SrcFile::ImportStatement{imported}); - v_import->mutate_set_src_file(imported); + v_import->mutate()->assign_src_file(imported); } } } diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp index f83579f4..815905e6 100644 --- a/tolk/pipe-find-unused-symbols.cpp +++ b/tolk/pipe-find-unused-symbols.cpp @@ -24,51 +24,41 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" -#include "src-file.h" #include "compiler-state.h" /* - * Here we find unused symbols (global functions and variables) to strip them off codegen. - * Note, that currently it's implemented as a standalone step after AST has been transformed to legacy Expr/Op. - * The reason why it's not done on AST level is that symbol resolving is done too late. For instance, - * having `beginCell()` there is not enough information in AST whether if points to a global function - * or it's a local variable application. - * In the future, this should be done on AST level. + * This pipe finds unused symbols (global functions and variables) to strip them off codegen. + * It happens after converting AST to Op, so it does not traverse AST. + * In the future, when control flow graph is introduced, this should be done at AST level. */ namespace tolk { static void mark_function_used_dfs(const std::unique_ptr& op); -static void mark_function_used(SymValCodeFunc* func_val) { - if (!func_val->code || func_val->is_really_used) { // already handled +static void mark_function_used(const FunctionData* fun_ref) { + if (!fun_ref->is_regular_function() || fun_ref->is_really_used()) { // already handled return; } - func_val->is_really_used = true; - mark_function_used_dfs(func_val->code->ops); + fun_ref->mutate()->assign_is_really_used(); + mark_function_used_dfs(std::get(fun_ref->body)->code->ops); } -static void mark_global_var_used(SymValGlobVar* glob_val) { - glob_val->is_really_used = true; +static void mark_global_var_used(const GlobalVarData* glob_ref) { + glob_ref->mutate()->assign_is_really_used(); } static void mark_function_used_dfs(const std::unique_ptr& op) { if (!op) { return; } - // op->fun_ref, despite its name, may actually ref global var - // note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar - // (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also) - if (op->fun_ref) { - if (auto* func_val = dynamic_cast(op->fun_ref->value)) { - mark_function_used(func_val); - } else if (auto* glob_val = dynamic_cast(op->fun_ref->value)) { - mark_global_var_used(glob_val); - } else if (auto* asm_val = dynamic_cast(op->fun_ref->value)) { - } else { - tolk_assert(false); - } + + if (op->f_sym) { // for Op::_Call + mark_function_used(op->f_sym); + } + if (op->g_sym) { // for Op::_GlobVar + mark_global_var_used(op->g_sym); } mark_function_used_dfs(op->next); mark_function_used_dfs(op->block0); @@ -76,11 +66,9 @@ static void mark_function_used_dfs(const std::unique_ptr& op) { } void pipeline_find_unused_symbols() { - for (SymDef* func_sym : G.all_code_functions) { - auto* func_val = dynamic_cast(func_sym->value); - std::string name = G.symbols.get_name(func_sym->sym_idx); - if (func_val->method_id.not_null() || func_val->is_entrypoint()) { - mark_function_used(func_val); + for (const FunctionData* fun_ref : G.all_code_functions) { + if (fun_ref->is_method_id_not_empty()) { // get methods, main and other entrypoints, regular functions with @method_id + mark_function_used(fun_ref); } } } diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 91a99f96..5c0f1647 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -1,5 +1,5 @@ /* - This file is part of TON Blockchain source code. + This file is part of TON Blockchain source code-> TON Blockchain is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -30,106 +30,86 @@ namespace tolk { -bool SymValCodeFunc::does_need_codegen() const { - // when a function is declared, but not referenced from code in any way, don't generate its body - if (!is_really_used && G.settings.remove_unused_functions) { - return false; - } - // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist - if (flags & flagUsedAsNonCall) { - return true; - } - // currently, there is no inlining, all functions are codegenerated - // (but actually, unused ones are later removed by Fift) - // in the future, we may want to implement a true AST inlining for "simple" functions - return true; -} - -void SymValCodeFunc::set_code(CodeBlob* code) { +void FunctionBodyCode::set_code(CodeBlob* code) { this->code = code; } -void SymValAsmFunc::set_code(std::vector code) { - this->ext_compile = make_ext_compile(std::move(code)); +void FunctionBodyAsm::set_code(std::vector&& code) { + this->ops = std::move(code); } -static void generate_output_func(SymDef* func_sym) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val); - std::string name = G.symbols.get_name(func_sym->sym_idx); +static void generate_output_func(const FunctionData* fun_ref) { + tolk_assert(fun_ref->is_regular_function()); if (G.is_verbosity(2)) { - std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; + std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->full_type << std::endl; } - if (!func_val->code) { - throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented"); - } else { - CodeBlob& code = *(func_val->code); - if (G.is_verbosity(3)) { - code.print(std::cerr, 9); + + CodeBlob* code = std::get(fun_ref->body)->code; + if (G.is_verbosity(3)) { + code->print(std::cerr, 9); + } + code->simplify_var_types(); + if (G.is_verbosity(5)) { + std::cerr << "after simplify_var_types: \n"; + code->print(std::cerr, 0); + } + code->prune_unreachable_code(); + if (G.is_verbosity(5)) { + std::cerr << "after prune_unreachable: \n"; + code->print(std::cerr, 0); + } + code->split_vars(true); + if (G.is_verbosity(5)) { + std::cerr << "after split_vars: \n"; + code->print(std::cerr, 0); + } + for (int i = 0; i < 8; i++) { + code->compute_used_code_vars(); + if (G.is_verbosity(4)) { + std::cerr << "after compute_used_vars: \n"; + code->print(std::cerr, 6); } - code.simplify_var_types(); + code->fwd_analyze(); if (G.is_verbosity(5)) { - std::cerr << "after simplify_var_types: \n"; - code.print(std::cerr, 0); + std::cerr << "after fwd_analyze: \n"; + code->print(std::cerr, 6); } - code.prune_unreachable_code(); + code->prune_unreachable_code(); if (G.is_verbosity(5)) { std::cerr << "after prune_unreachable: \n"; - code.print(std::cerr, 0); - } - code.split_vars(true); - if (G.is_verbosity(5)) { - std::cerr << "after split_vars: \n"; - code.print(std::cerr, 0); - } - for (int i = 0; i < 8; i++) { - code.compute_used_code_vars(); - if (G.is_verbosity(4)) { - std::cerr << "after compute_used_vars: \n"; - code.print(std::cerr, 6); - } - code.fwd_analyze(); - if (G.is_verbosity(5)) { - std::cerr << "after fwd_analyze: \n"; - code.print(std::cerr, 6); - } - code.prune_unreachable_code(); - if (G.is_verbosity(5)) { - std::cerr << "after prune_unreachable: \n"; - code.print(std::cerr, 6); - } - } - code.mark_noreturn(); - if (G.is_verbosity(3)) { - code.print(std::cerr, 15); - } - if (G.is_verbosity(2)) { - std::cerr << "\n---------- resulting code for " << name << " -------------\n"; - } - const char* modifier = ""; - if (func_val->is_inline()) { - modifier = "INLINE"; - } else if (func_val->is_inline_ref()) { - modifier = "REF"; - } - std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n"; - int mode = 0; - if (G.settings.stack_layout_comments) { - mode |= Stack::_StkCmt | Stack::_CptStkCmt; - } - if (func_val->is_inline() && code.ops->noreturn()) { - mode |= Stack::_InlineFunc; - } - if (func_val->is_inline() || func_val->is_inline_ref()) { - mode |= Stack::_InlineAny; - } - code.generate_code(std::cout, mode, 2); - std::cout << std::string(2, ' ') << "}>\n"; - if (G.is_verbosity(2)) { - std::cerr << "--------------\n"; + code->print(std::cerr, 6); } } + code->mark_noreturn(); + if (G.is_verbosity(3)) { + code->print(std::cerr, 15); + } + if (G.is_verbosity(2)) { + std::cerr << "\n---------- resulting code for " << fun_ref->name << " -------------\n"; + } + const char* modifier = ""; + if (fun_ref->is_inline()) { + modifier = "INLINE"; + } else if (fun_ref->is_inline_ref()) { + modifier = "REF"; + } + std::cout << std::string(2, ' ') << fun_ref->name << " PROC" << modifier << ":<{\n"; + int mode = 0; + if (G.settings.stack_layout_comments) { + mode |= Stack::_StkCmt | Stack::_CptStkCmt; + } + if (fun_ref->is_inline() && code->ops->noreturn()) { + mode |= Stack::_InlineFunc; + } + if (fun_ref->is_inline() || fun_ref->is_inline_ref()) { + mode |= Stack::_InlineAny; + } + code->generate_code(std::cout, mode, 2); + std::cout << std::string(2, ' ') << "}>\n"; + if (G.is_verbosity(2)) { + std::cerr << "--------------\n"; + } } void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) { @@ -149,26 +129,23 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) std::cout << "PROGRAM{\n"; bool has_main_procedure = false; - for (SymDef* func_sym : G.all_code_functions) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val); - if (!func_val->does_need_codegen()) { + for (const FunctionData* fun_ref : G.all_code_functions) { + if (!fun_ref->does_need_codegen()) { if (G.is_verbosity(2)) { - std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n"; + std::cerr << fun_ref->name << ": code not generated, function does not need codegen\n"; } continue; } - std::string name = G.symbols.get_name(func_sym->sym_idx); - if (func_val->is_entrypoint() && (name == "main" || name == "onInternalMessage")) { + if (fun_ref->is_entrypoint() && (fun_ref->name == "main" || fun_ref->name == "onInternalMessage")) { has_main_procedure = true; } std::cout << std::string(2, ' '); - if (func_val->method_id.is_null()) { - std::cout << "DECLPROC " << name << "\n"; + if (fun_ref->is_method_id_not_empty()) { + std::cout << fun_ref->method_id << " DECLMETHOD " << fun_ref->name << "\n"; } else { - std::cout << func_val->method_id << " DECLMETHOD " << name << "\n"; + std::cout << "DECLPROC " << fun_ref->name << "\n"; } } @@ -176,25 +153,22 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) throw Fatal("the contract has no entrypoint; forgot `fun onInternalMessage(...)`?"); } - for (SymDef* gvar_sym : G.all_global_vars) { - auto* glob_val = dynamic_cast(gvar_sym->value); - tolk_assert(glob_val); - if (!glob_val->is_really_used && G.settings.remove_unused_functions) { + for (const GlobalVarData* var_ref : G.all_global_vars) { + if (!var_ref->is_really_used() && G.settings.remove_unused_functions) { if (G.is_verbosity(2)) { - std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n"; + std::cerr << var_ref->name << ": variable not generated, it's unused\n"; } continue; } - std::string name = G.symbols.get_name(gvar_sym->sym_idx); - std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n"; + + std::cout << std::string(2, ' ') << "DECLGLOBVAR " << var_ref->name << "\n"; } - for (SymDef* func_sym : G.all_code_functions) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - if (!func_val->does_need_codegen()) { + for (const FunctionData* fun_ref : G.all_code_functions) { + if (!fun_ref->does_need_codegen()) { continue; } - generate_output_func(func_sym); + generate_output_func(fun_ref); } std::cout << "}END>c\n"; diff --git a/tolk/pipe-infer-check-types.cpp b/tolk/pipe-infer-check-types.cpp new file mode 100644 index 00000000..8c18bae9 --- /dev/null +++ b/tolk/pipe-infer-check-types.cpp @@ -0,0 +1,524 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "src-file.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe does type inferring. + * It will be fully rewritten, because current type system is based on Hindley-Milner (unifying usages), + * and I am going to introduce a static type system, drop TypeExpr completely, etc. + * Currently, after this inferring, lots of `te_Indirect` and partially complete types still exist, + * whey are partially refined during converting AST to legacy. + */ + +namespace tolk { + +class InferAndCheckTypesInsideFunctionVisitor final : public ASTVisitorFunctionBody { + const FunctionData* current_function = nullptr; + + static bool expect_integer(TypeExpr* inferred) { + try { + TypeExpr* t_int = TypeExpr::new_atomic(TypeExpr::_Int); + unify(inferred, t_int); + return true; + } catch (UnifyError&) { + return false; + } + } + + static bool expect_integer(AnyExprV v_inferred) { + return expect_integer(v_inferred->inferred_type); + } + + static bool is_expr_valid_as_return_self(AnyExprV return_expr) { + // `return self` + if (return_expr->type == ast_self_keyword) { + return true; + } + // `return self.someMethod()` + if (auto v_call = return_expr->try_as()) { + return v_call->fun_ref->does_return_self() && is_expr_valid_as_return_self(v_call->get_obj()); + } + // `return cond ? ... : ...` + if (auto v_ternary = return_expr->try_as()) { + return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); + } + return false; + } + + void visit(V v) override { + parent::visit(v->get_expr()); + v->mutate()->assign_inferred_type(v->get_expr()->inferred_type); + } + + void visit(V v) override { + if (v->empty()) { + v->mutate()->assign_inferred_type(TypeExpr::new_unit()); + return; + } + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (AnyExprV item : v->get_items()) { + parent::visit(item); + types_list.emplace_back(item->inferred_type); + } + v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list))); + } + + void visit(V v) override { + if (v->empty()) { + v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_unit())); + return; + } + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (AnyExprV item : v->get_items()) { + parent::visit(item); + types_list.emplace_back(item->inferred_type); + } + v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types_list), false))); + } + + void visit(V v) override { + if (const auto* glob_ref = v->sym->try_as()) { + v->mutate()->assign_inferred_type(glob_ref->declared_type); + } else if (const auto* const_ref = v->sym->try_as()) { + v->mutate()->assign_inferred_type(const_ref->inferred_type); + } else if (const auto* fun_ref = v->sym->try_as()) { + v->mutate()->assign_inferred_type(fun_ref->full_type); + } else if (const auto* var_ref = v->sym->try_as()) { + v->mutate()->assign_inferred_type(var_ref->declared_type); + } + } + + void visit(V v) override { + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } + + void visit(V v) override { + switch (v->modifier) { + case 0: + case 's': + case 'a': + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Slice)); + break; + case 'u': + case 'h': + case 'H': + case 'c': + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + break; + default: + break; + } + } + + void visit(V v) override { + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } + + void visit(V v) override { + const FunctionData* fun_ref = lookup_global_symbol("__null")->as(); + TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_unit(), TypeExpr::new_hole()); + TypeExpr* sym_type = fun_ref->full_type; + try { + unify(fun_type, sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type " + << fun_type->args[0] << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + v->mutate()->assign_inferred_type(e_type); + } + + void visit(V v) override { + v->mutate()->assign_inferred_type(v->param_ref->declared_type); + } + + void visit(V v) override { + parent::visit(v->get_expr()); + v->mutate()->assign_inferred_type(v->get_expr()->inferred_type); + } + + void visit(V v) override { + if (v->empty()) { + v->mutate()->assign_inferred_type(TypeExpr::new_unit()); + return; + } + std::vector types_list; + types_list.reserve(v->size()); + for (AnyExprV item : v->get_arguments()) { + parent::visit(item); + types_list.emplace_back(item->inferred_type); + } + v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list))); + } + + void visit(V v) override { + // special error for "null()" which is a FunC syntax + if (v->get_called_f()->type == ast_null_keyword) { + v->error("null is not a function: use `null`, not `null()`"); + } + + parent::visit(v->get_called_f()); + visit(v->get_arg_list()); + + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + const FunctionData* fun_ref = v->fun_maybe; + if (!fun_ref) { + TypeExpr* arg_tensor = v->get_arg_list()->inferred_type; + TypeExpr* lhs_type = v->get_called_f()->inferred_type; + TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); + try { + unify(fun_type, lhs_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply expression of type " << lhs_type << " to an expression of type " << arg_tensor + << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + v->mutate()->assign_inferred_type(e_type); + return; + } + + TypeExpr* arg_tensor = v->get_arg_list()->inferred_type; + TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); + TypeExpr* sym_type = fun_ref->full_type; + try { + unify(fun_type, sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type " + << fun_type->args[0] << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + + if (fun_ref->has_mutate_params()) { + tolk_assert(e_type->constr == TypeExpr::te_Tensor); + e_type = e_type->args[e_type->args.size() - 1]; + } + + v->mutate()->assign_inferred_type(e_type); + } + + void visit(V v) override { + parent::visit(v->get_obj()); + visit(v->get_arg_list()); + std::vector arg_types; + arg_types.reserve(1 + v->get_num_args()); + arg_types.push_back(v->get_obj()->inferred_type); + for (int i = 0; i < v->get_num_args(); ++i) { + arg_types.push_back(v->get_arg(i)->inferred_type); + } + + TypeExpr* arg_tensor = TypeExpr::new_tensor(std::move(arg_types)); + TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); + TypeExpr* sym_type = v->fun_ref->full_type; + try { + unify(fun_type, sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply function " << v->fun_ref->name << " : " << v->fun_ref->full_type << " to arguments of type " + << fun_type->args[0] << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + + if (v->fun_ref->has_mutate_params()) { + tolk_assert(e_type->constr == TypeExpr::te_Tensor); + e_type = e_type->args[e_type->args.size() - 1]; + } + if (v->fun_ref->does_return_self()) { + e_type = v->get_obj()->inferred_type; + TypeExpr::remove_indirect(e_type); + } + + v->mutate()->assign_inferred_type(e_type); + } + + void visit(V v) override { + v->mutate()->assign_inferred_type(TypeExpr::new_hole()); + } + + void visit(V v) override { + parent::visit(v->get_rhs()); + if (!expect_integer(v->get_rhs())) { + v->error("operator `" + static_cast(v->operator_name) + "` expects integer operand"); + } + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } + + void visit(V v) override { + parent::visit(v->get_lhs()); + parent::visit(v->get_rhs()); + switch (v->tok) { + case tok_assign: { + TypeExpr* lhs_type = v->get_lhs()->inferred_type; + TypeExpr* rhs_type = v->get_rhs()->inferred_type; + try { + unify(lhs_type, rhs_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot assign an expression of type " << rhs_type << " to a variable or pattern of type " + << lhs_type << ": " << ue; + v->error(os.str()); + } + TypeExpr* e_type = lhs_type; + TypeExpr::remove_indirect(e_type); + v->mutate()->assign_inferred_type(e_type); + break; + } + case tok_eq: + case tok_neq: + case tok_spaceship: { + if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { + v->error("comparison operators `== !=` can compare only integers"); + } + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + break; + } + case tok_logical_and: + case tok_logical_or: { + if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { + v->error("logical operators `&& ||` expect integer operands"); + } + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + break; + } + default: + if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { + v->error("operator `" + static_cast(v->operator_name) + "` expects integer operands"); + } + v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + } + } + + void visit(V v) override { + parent::visit(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of ternary ?: operator must be an integer"); + } + parent::visit(v->get_when_true()); + parent::visit(v->get_when_false()); + + TypeExpr* res = TypeExpr::new_hole(); + TypeExpr *ttrue = v->get_when_true()->inferred_type; + TypeExpr *tfals = v->get_when_false()->inferred_type; + unify(res, ttrue); + unify(res, tfals); + v->mutate()->assign_inferred_type(res); + } + + void visit(V v) override { + parent::visit(v->get_cond()); + parent::visit(v->get_if_body()); + parent::visit(v->get_else_body()); + TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* cond_type = v->get_cond()->inferred_type; + try { + + unify(cond_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`if` condition value of type " << cond_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + v->get_cond()->mutate()->assign_inferred_type(cond_type); + } + + void visit(V v) override { + parent::visit(v->get_cond()); + parent::visit(v->get_body()); + TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* cond_type = v->get_cond()->inferred_type; + try { + unify(cond_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "repeat count value of type " << cond_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + v->get_cond()->mutate()->assign_inferred_type(cond_type); + } + + void visit(V v) override { + parent::visit(v->get_cond()); + parent::visit(v->get_body()); + TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* cond_type = v->get_cond()->inferred_type; + try { + unify(cond_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`while` condition value of type " << cond_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + v->get_cond()->mutate()->assign_inferred_type(cond_type); + } + + void visit(V v) override { + parent::visit(v->get_body()); + parent::visit(v->get_cond()); + TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* cond_type = v->get_cond()->inferred_type; + try { + unify(cond_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`while` condition value of type " << cond_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + v->get_cond()->mutate()->assign_inferred_type(cond_type); + } + + void visit(V v) override { + parent::visit(v->get_return_value()); + if (current_function->does_return_self()) { + if (!is_expr_valid_as_return_self(v->get_return_value())) { + v->error("invalid return from `self` function"); + } + return; + } + TypeExpr* expr_type = v->get_return_value()->inferred_type; + TypeExpr* ret_type = current_function->full_type; + if (ret_type->constr == TypeExpr::te_ForAll) { + ret_type = ret_type->args[0]; + } + tolk_assert(ret_type->constr == TypeExpr::te_Map); + ret_type = ret_type->args[1]; + if (current_function->has_mutate_params()) { + tolk_assert(ret_type->constr == TypeExpr::te_Tensor); + ret_type = ret_type->args[ret_type->args.size() - 1]; + } + try { + unify(expr_type, ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << ret_type + << " cannot be unified with return statement expression type " << expr_type << ": " << ue; + v->error(os.str()); + } + } + + void visit(V v) override { + if (v->var_maybe) { // not underscore + if (const auto* var_ref = v->var_maybe->try_as()) { + v->mutate()->assign_inferred_type(var_ref->declared_type); + } else if (const auto* glob_ref = v->var_maybe->try_as()) { + v->mutate()->assign_inferred_type(glob_ref->declared_type); + } else { + tolk_assert(0); + } + } else if (v->declared_type) { // underscore with type + v->mutate()->assign_inferred_type(v->declared_type); + } else { // just underscore + v->mutate()->assign_inferred_type(TypeExpr::new_hole()); + } + v->get_identifier()->mutate()->assign_inferred_type(v->inferred_type); + } + + void visit(V v) override { + parent::visit(v->get_lhs()); + parent::visit(v->get_assigned_val()); + TypeExpr* lhs = v->get_lhs()->inferred_type; + TypeExpr* rhs = v->get_assigned_val()->inferred_type; + try { + unify(lhs, rhs); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot assign an expression of type " << rhs << " to a variable or pattern of type " << lhs << ": " << ue; + v->error(os.str()); + } + } + + void visit(V v) override { + parent::visit(v->get_try_body()); + parent::visit(v->get_catch_expr()); + + TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); + tolk_assert(v->get_catch_expr()->size() == 2); + TypeExpr* type1 = v->get_catch_expr()->get_item(0)->inferred_type; + unify(type1, tvm_error_type->args[1]); + TypeExpr* type2 = v->get_catch_expr()->get_item(1)->inferred_type; + unify(type2, tvm_error_type->args[0]); + + parent::visit(v->get_catch_body()); + } + + void visit(V v) override { + parent::visit(v->get_thrown_code()); + if (!expect_integer(v->get_thrown_code())) { + v->get_thrown_code()->error("excNo of `throw` must be an integer"); + } + if (v->has_thrown_arg()) { + parent::visit(v->get_thrown_arg()); + } + } + + void visit(V v) override { + parent::visit(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `assert` must be an integer"); + } + parent::visit(v->get_thrown_code()); + } + +public: + void start_visiting_function(V v_function) override { + current_function = v_function->fun_ref; + parent::visit(v_function->get_body()); + if (current_function->is_implicit_return()) { + if (current_function->does_return_self()) { + throw ParseError(v_function->get_body()->as()->loc_end, "missing return; forgot `return self`?"); + } + TypeExpr* expr_type = TypeExpr::new_unit(); + TypeExpr* ret_type = current_function->full_type; + if (ret_type->constr == TypeExpr::te_ForAll) { + ret_type = ret_type->args[0]; + } + tolk_assert(ret_type->constr == TypeExpr::te_Map); + ret_type = ret_type->args[1]; + if (current_function->has_mutate_params()) { + ret_type = ret_type->args[ret_type->args.size() - 1]; + } + try { + unify(expr_type, ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "implicit function return type " << expr_type + << " cannot be unified with inferred return type " << ret_type << ": " << ue; + v_function->error(os.str()); + } + } + } +}; + +void pipeline_infer_and_check_types(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-refine-lvalue-for-mutate.cpp b/tolk/pipe-refine-lvalue-for-mutate.cpp new file mode 100644 index 00000000..c4c31b51 --- /dev/null +++ b/tolk/pipe-refine-lvalue-for-mutate.cpp @@ -0,0 +1,118 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe refines rvalue/lvalue and checks `mutate` arguments validity. + * It happens after type inferring (after methods binding), because it uses fun_ref of calls. + * + * Example: `a.increment().increment()`, the first `a.increment()` becomes lvalue (assume that increment mutates self). + * Example: `increment(a)` is invalid, should be `increment(mutate a)`. + * + * Note, that explicitly specifying `mutate` for arguments, like `increment(mutate a)` is on purpose. + * If we wished `increment(a)` to be valid (to work and mutate `a`, like passing by ref), it would also be done here, + * refining `a` to be lvalue. But to avoid unexpected mutations, `mutate` keyword for an argument is required. + * So, for mutated arguments, instead of setting lvalue, we check its presence. + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_invalid_mutate_arg_passed(AnyV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { + std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as()->name : "obj"); + + // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` + if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && p_sym.idx == 0 && fun_ref->does_accept_self()) { + v->error("`" + fun_ref->name + "` is a mutating method; consider calling `" + arg_str + "." + fun_ref->name + "()`, not `" + fun_ref->name + "(" + arg_str + ")`"); + } + // case: `cs.mutating_function()`; suggest: `mutating_function(mutate cs)` or make it a method + if (p_sym.is_mutate_parameter() && called_as_method && p_sym.idx == 0 && !fun_ref->does_accept_self()) { + v->error("function `" + fun_ref->name + "` mutates parameter `" + p_sym.name + "`; consider calling `" + fun_ref->name + "(mutate " + arg_str + ")`, not `" + arg_str + "." + fun_ref->name + "`(); alternatively, rename parameter to `self` to make it a method"); + } + // case: `mutating_function(arg)`; suggest: `mutate arg` + if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate) { + v->error("function `" + fun_ref->name + "` mutates parameter `" + p_sym.name + "`; you need to specify `mutate` when passing an argument, like `mutate " + arg_str + "`"); + } + // case: `usual_function(mutate arg)` + if (!p_sym.is_mutate_parameter() && arg_passed_as_mutate) { + v->error("incorrect `mutate`, since `" + fun_ref->name + "` does not mutate this parameter"); + } + throw Fatal("unreachable"); +} + + +class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBody { + void visit(V v) override { + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + const FunctionData* fun_ref = v->fun_maybe; + if (!fun_ref) { + parent::visit(v); + for (int i = 0; i < v->get_num_args(); ++i) { + auto v_arg = v->get_arg(i); + if (v_arg->passed_as_mutate) { + v_arg->error("`mutate` used for non-mutate argument"); + } + } + return; + } + + tolk_assert(static_cast(fun_ref->parameters.size()) == v->get_num_args()); + + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& p_sym = fun_ref->parameters[i]; + auto arg_i = v->get_arg(i); + if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) { + fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr()); + } + parent::visit(arg_i); + } + } + + void visit(V v) override { + parent::visit(v); + + const FunctionData* fun_ref = v->fun_ref; + tolk_assert(static_cast(fun_ref->parameters.size()) == 1 + v->get_num_args()); + + if (fun_ref->does_mutate_self()) { + bool will_be_extracted_as_tmp_var = v->get_obj()->type == ast_function_call || v->get_obj()->type == ast_dot_method_call; + if (!will_be_extracted_as_tmp_var) { + v->get_obj()->mutate()->assign_lvalue_true(); + } + } + + if (!fun_ref->does_accept_self() && fun_ref->parameters[0].is_mutate_parameter()) { + fire_error_invalid_mutate_arg_passed(v, fun_ref, fun_ref->parameters[0], true, false, v->get_obj()); + } + + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& p_sym = fun_ref->parameters[1 + i]; + auto arg_i = v->get_arg(i); + if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) { + fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr()); + } + } + } +}; + +void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index 569d434a..478bc727 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -13,65 +13,50 @@ You should have received a copy of the GNU General Public License along with TON Blockchain. If not, see . - - In addition, as a special exception, the copyright holders give permission - to link the code of portions of this program with the OpenSSL library. - You must obey the GNU General Public License in all respects for all - of the code used other than OpenSSL. If you modify file(s) with this - exception, you may extend this exception to your version of the file(s), - but you are not obligated to do so. If you do not wish to do so, delete this - exception statement from your version. If you delete this exception statement - from all source files in the program, then also delete it here. */ #include "tolk.h" #include "platform-utils.h" #include "src-file.h" #include "ast.h" #include "compiler-state.h" +#include "constant-evaluator.h" #include "td/utils/crypto.h" #include +/* + * This pipe registers global symbols: functions, constants, global vars, etc. + * It happens just after all files have been parsed to AST. + * + * "Registering" means adding symbols to a global symbol table. + * After this pass, any global symbol can be looked up. + * Note, that local variables are not analyzed here, it's a later step. + * Before digging into locals, we need a global symtable to be filled, exactly done here. + */ + namespace tolk { -Expr* process_expr(AnyV v, CodeBlob& code); - -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_redefinition_of_symbol(V v_ident, SymDef* existing) { - if (existing->loc.is_stdlib()) { - v_ident->error("redefinition of a symbol from stdlib"); - } else if (existing->loc.is_defined()) { - v_ident->error("redefinition of symbol, previous was at: " + existing->loc.to_string()); - } else { - v_ident->error("redefinition of built-in symbol"); - } -} - -static int calc_sym_idx(std::string_view sym_name) { - return G.symbols.lookup_add(sym_name); -} - -static td::RefInt256 calculate_method_id_for_entrypoint(std::string_view func_name) { +static int calculate_method_id_for_entrypoint(std::string_view func_name) { if (func_name == "main" || func_name == "onInternalMessage") { - return td::make_refint(0); + return 0; } if (func_name == "onExternalMessage") { - return td::make_refint(-1); + return -1; } if (func_name == "onRunTickTock") { - return td::make_refint(-2); + return -2; } if (func_name == "onSplitPrepare") { - return td::make_refint(-3); + return -3; } if (func_name == "onSplitInstall") { - return td::make_refint(-4); + return -4; } tolk_assert(false); } -static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { +static int calculate_method_id_by_func_name(std::string_view func_name) { unsigned int crc = td::crc16(static_cast(func_name)); - return td::make_refint((crc & 0xffff) | 0x10000); + return static_cast(crc & 0xffff) | 0x10000; } static void calc_arg_ret_order_of_asm_function(V v_body, V param_list, TypeExpr* ret_type, @@ -89,7 +74,7 @@ static void calc_arg_ret_order_of_asm_function(V v_body, V v_param = param_list->get_param(i); - int arg_width = v_param->param_type->get_width(); + int arg_width = v_param->declared_type->get_width(); if (arg_width < 0 || arg_width > 16) { v_param->error("parameters of an assembler built-in function must have a well-defined fixed width"); } @@ -130,102 +115,39 @@ static void calc_arg_ret_order_of_asm_function(V v_body, V v) { - AnyV init_value = v->get_init_value(); - SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); - if (sym_def->value) { - fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); - } + ConstantValue init_value = eval_const_init_value(v->get_init_value()); + GlobalConstData* c_sym = new GlobalConstData(static_cast(v->get_identifier()->name), v->loc, std::move(init_value)); - // todo currently, constant value calculation is dirty and roughly: init_value is evaluated to fif code - // and waited to be a single expression - // although it works, of course it should be later rewritten using AST calculations, as well as lots of other parts - CodeBlob code("tmp", v->loc, nullptr, nullptr); - Expr* x = process_expr(init_value, code); - if (!x->is_rvalue()) { - v->get_init_value()->error("expression is not strictly Rvalue"); - } - if (v->declared_type && !v->declared_type->equals_to(x->e_type)) { + if (v->declared_type && !v->declared_type->equals_to(c_sym->inferred_type)) { v->error("expression type does not match declared type"); } - SymValConst* sym_val = nullptr; - if (x->cls == Expr::_Const) { // Integer constant - sym_val = new SymValConst(static_cast(G.all_constants.size()), x->intval); - } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) - sym_val = new SymValConst(static_cast(G.all_constants.size()), x->strval); - } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) - code.emplace_back(v->loc, Op::_Import, std::vector()); - auto tmp_vars = x->pre_compile(code); - code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); - code.emplace_back(v->loc, Op::_Nop); - // It is REQUIRED to execute "optimizations" as in tolk.cpp - code.simplify_var_types(); - code.prune_unreachable_code(); - code.split_vars(true); - for (int i = 0; i < 16; i++) { - code.compute_used_code_vars(); - code.fwd_analyze(); - code.prune_unreachable_code(); - } - code.mark_noreturn(); - AsmOpList out_list(0, &code.vars); - code.generate_code(out_list); - if (out_list.list_.size() != 1) { - init_value->error("precompiled expression must result in single operation"); - } - auto op = out_list.list_[0]; - if (!op.is_const()) { - init_value->error("precompiled expression must result in compilation time constant"); - } - if (op.origin.is_null() || !op.origin->is_valid()) { - init_value->error("precompiled expression did not result in a valid integer constant"); - } - sym_val = new SymValConst(static_cast(G.all_constants.size()), op.origin); - } else { - init_value->error("integer or slice literal or constant expected"); - } - sym_def->value = sym_val; -#ifdef TOLK_DEBUG - sym_def->value->sym_name = v->get_identifier()->name; -#endif - G.all_constants.push_back(sym_def); + G.symtable.add_global_const(c_sym); + G.all_constants.push_back(c_sym); + v->mutate()->assign_const_ref(c_sym); } static void register_global_var(V v) { - SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); - if (sym_def->value) { - fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); - } + GlobalVarData* g_sym = new GlobalVarData(static_cast(v->get_identifier()->name), v->loc, v->declared_type); - sym_def->value = new SymValGlobVar(static_cast(G.all_global_vars.size()), v->declared_type); -#ifdef TOLK_DEBUG - sym_def->value->sym_name = v->get_identifier()->name; -#endif - G.all_global_vars.push_back(sym_def); + G.symtable.add_global_var(g_sym); + G.all_global_vars.push_back(g_sym); + v->mutate()->assign_var_ref(g_sym); } -static SymDef* register_parameter(V v, int idx) { +static LocalVarData register_parameter(V v, int idx) { if (v->is_underscore()) { - return nullptr; - } - SymDef* sym_def = define_parameter(calc_sym_idx(v->get_identifier()->name), v->loc); - if (sym_def->value) { - // todo always false now, how to detect similar parameter names? (remember about underscore) - v->error("redefined parameter"); + return {"", v->loc, idx, v->declared_type}; } - SymValVariable* sym_val = new SymValVariable(idx, v->param_type); + LocalVarData p_sym(static_cast(v->param_name), v->loc, idx, v->declared_type); if (v->declared_as_mutate) { - sym_val->flags |= SymValVariable::flagMutateParameter; + p_sym.flags |= LocalVarData::flagMutateParameter; } - if (!v->declared_as_mutate && idx == 0 && v->get_identifier()->name == "self") { - sym_val->flags |= SymValVariable::flagImmutable; + if (!v->declared_as_mutate && idx == 0 && v->param_name == "self") { + p_sym.flags |= LocalVarData::flagImmutable; } - sym_def->value = sym_val; -#ifdef TOLK_DEBUG - sym_def->value->sym_name = v->get_identifier()->name; -#endif - return sym_def; + return p_sym; } static void register_function(V v) { @@ -235,16 +157,16 @@ static void register_function(V v) { TypeExpr* params_tensor_type = nullptr; int n_params = v->get_num_params(); int n_mutate_params = 0; - std::vector parameters_syms; + std::vector parameters; if (n_params) { std::vector param_tensor_items; param_tensor_items.reserve(n_params); - parameters_syms.reserve(n_params); + parameters.reserve(n_params); for (int i = 0; i < n_params; ++i) { auto v_param = v->get_param(i); n_mutate_params += static_cast(v_param->declared_as_mutate); - param_tensor_items.emplace_back(v_param->param_type); - parameters_syms.emplace_back(register_parameter(v_param, i)); + param_tensor_items.emplace_back(v_param->declared_type); + parameters.emplace_back(register_parameter(v_param, i)); } params_tensor_type = TypeExpr::new_tensor(std::move(param_tensor_items)); } else { @@ -261,24 +183,20 @@ static void register_function(V v) { function_type = TypeExpr::new_forall(std::move(type_vars), function_type); } if (v->marked_as_builtin) { - const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); - const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; - if (!func_val || !func_val->is_builtin()) { + const Symbol* builtin_func = lookup_global_symbol(func_name); + const FunctionData* func_val = builtin_func ? builtin_func->as() : nullptr; + if (!func_val || !func_val->is_builtin_function()) { v->error("`builtin` used for non-builtin function"); } #ifdef TOLK_DEBUG // in release, we don't need this check, since `builtin` is used only in stdlib, which is our responsibility - if (!func_val->sym_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { + if (!func_val->full_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { v->error("declaration for `builtin` function doesn't match an actual one"); } #endif return; } - SymDef* sym_def = define_global_symbol(calc_sym_idx(func_name), v->loc); - if (sym_def->value) { - fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); - } if (G.is_verbosity(1)) { std::cerr << "fun " << func_name << " : " << function_type << std::endl; } @@ -286,67 +204,61 @@ static void register_function(V v) { v->error("a pure function should return something, otherwise it will be optimized out anyway"); } - SymValFunc* sym_val = nullptr; - if (const auto* v_seq = v->get_body()->try_as()) { - sym_val = new SymValCodeFunc(std::move(parameters_syms), static_cast(G.all_code_functions.size()), function_type); - } else if (const auto* v_asm = v->get_body()->try_as()) { - std::vector arg_order, ret_order; - calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, arg_order, ret_order); - sym_val = new SymValAsmFunc(std::move(parameters_syms), function_type, std::move(arg_order), std::move(ret_order), 0); - } else { - v->error("Unexpected function body statement"); + FunctionBody f_body = v->get_body()->type == ast_sequence ? static_cast(new FunctionBodyCode) : static_cast(new FunctionBodyAsm); + FunctionData* f_sym = new FunctionData(static_cast(func_name), v->loc, function_type, std::move(parameters), 0, f_body); + + if (const auto* v_asm = v->get_body()->try_as()) { + calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, f_sym->arg_order, f_sym->ret_order); } if (v->method_id) { - sym_val->method_id = td::string_to_int256(static_cast(v->method_id->int_val)); - if (sym_val->method_id.is_null()) { + if (v->method_id->intval.is_null() || !v->method_id->intval->signed_fits_bits(32)) { v->method_id->error("invalid integer constant"); } + f_sym->method_id = static_cast(v->method_id->intval->to_long()); } else if (v->marked_as_get_method) { - sym_val->method_id = calculate_method_id_by_func_name(func_name); - for (const SymDef* other : G.all_get_methods) { - if (!td::cmp(dynamic_cast(other->value)->method_id, sym_val->method_id)) { - v->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" << static_cast(func_name) << "` produce the same hash. Consider renaming one of these functions."); + f_sym->method_id = calculate_method_id_by_func_name(func_name); + for (const FunctionData* other : G.all_get_methods) { + if (other->method_id == f_sym->method_id) { + v->error(PSTRING() << "GET methods hash collision: `" << other->name << "` and `" << f_sym->name << "` produce the same hash. Consider renaming one of these functions."); } } } else if (v->is_entrypoint) { - sym_val->method_id = calculate_method_id_for_entrypoint(func_name); + f_sym->method_id = calculate_method_id_for_entrypoint(func_name); } if (v->marked_as_pure) { - sym_val->flags |= SymValFunc::flagMarkedAsPure; + f_sym->flags |= FunctionData::flagMarkedAsPure; } if (v->marked_as_inline) { - sym_val->flags |= SymValFunc::flagInline; + f_sym->flags |= FunctionData::flagInline; } if (v->marked_as_inline_ref) { - sym_val->flags |= SymValFunc::flagInlineRef; + f_sym->flags |= FunctionData::flagInlineRef; } if (v->marked_as_get_method) { - sym_val->flags |= SymValFunc::flagGetMethod; + f_sym->flags |= FunctionData::flagGetMethod; } if (v->is_entrypoint) { - sym_val->flags |= SymValFunc::flagIsEntrypoint; + f_sym->flags |= FunctionData::flagIsEntrypoint; } if (n_mutate_params) { - sym_val->flags |= SymValFunc::flagHasMutateParams; + f_sym->flags |= FunctionData::flagHasMutateParams; } if (v->accepts_self) { - sym_val->flags |= SymValFunc::flagAcceptsSelf; + f_sym->flags |= FunctionData::flagAcceptsSelf; } if (v->returns_self) { - sym_val->flags |= SymValFunc::flagReturnsSelf; + f_sym->flags |= FunctionData::flagReturnsSelf; } - sym_def->value = sym_val; -#ifdef TOLK_DEBUG - sym_def->value->sym_name = func_name; -#endif - if (dynamic_cast(sym_val)) { - G.all_code_functions.push_back(sym_def); + G.symtable.add_function(f_sym); + if (f_sym->is_regular_function()) { + G.all_code_functions.push_back(f_sym); } - if (sym_val->is_get_method()) { - G.all_get_methods.push_back(sym_def); + if (f_sym->is_get_method()) { + G.all_get_methods.push_back(f_sym); } + v->mutate()->assign_fun_ref(f_sym); } static void iterate_through_file_symbols(const SrcFile* file) { diff --git a/tolk/pipe-resolve-symbols.cpp b/tolk/pipe-resolve-symbols.cpp new file mode 100644 index 00000000..31d25229 --- /dev/null +++ b/tolk/pipe-resolve-symbols.cpp @@ -0,0 +1,272 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "platform-utils.h" +#include "src-file.h" +#include "ast.h" +#include "ast-visitor.h" +#include "compiler-state.h" +#include + +/* + * This pipe resolves identifiers (local variables) in all functions bodies. + * It happens before type inferring, but after all global symbols are registered. + * It means, that for any symbol `x` we can look up whether it's a global name or not. + * + * Example: `var x = 10; x = 20;` both `x` point to one LocalVarData. + * Example: `x = 20` undefined symbol `x` is also here (unless it's a global) + * Variables scoping and redeclaration are also here. + * + * As a result of this step, every V::sym is filled, pointing either to a local var/parameter, + * or to a global var / constant / function. + */ + +namespace tolk { + +static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { + SrcLocation sym_loc = used_sym->loc; + if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { + const SrcFile* declared_in = sym_loc.get_src_file(); + bool has_import = false; + for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { + if (import_stmt.imported_file == declared_in) { + has_import = true; + } + } + if (!has_import) { + v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + } + } +} + +struct NameAndScopeResolver { + std::vector> scopes; + + static uint64_t key_hash(std::string_view name_key) { + return std::hash{}(name_key); + } + + void open_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "open_scope " << scopes.size() + 1 << " at " << loc << std::endl; + scopes.emplace_back(); + } + + void close_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "close_scope " << scopes.size() << " at " << loc << std::endl; + if (UNLIKELY(scopes.empty())) { + throw Fatal{"cannot close the outer scope"}; + } + scopes.pop_back(); + } + + const Symbol* lookup_symbol(std::string_view name) const { + uint64_t key = key_hash(name); + for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) { // NOLINT(*-loop-convert) + const auto& scope = *it; + if (auto it_sym = scope.find(key); it_sym != scope.end()) { + return it_sym->second; + } + } + return G.symtable.lookup(name); + } + + const Symbol* add_local_var(const LocalVarData* v_sym) { + if (UNLIKELY(scopes.empty())) { + throw Fatal("unexpected scope_level = 0"); + } + if (v_sym->name.empty()) { // underscore + return v_sym; + } + + uint64_t key = key_hash(v_sym->name); + const auto& [_, inserted] = scopes.rbegin()->emplace(key, v_sym); + if (UNLIKELY(!inserted)) { + throw ParseError(v_sym->loc, "redeclaration of local variable `" + v_sym->name + "`"); + } + return v_sym; + } +}; + + +class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { + // more correctly this field shouldn't be static, but currently there is no need to make it a part of state + static NameAndScopeResolver current_scope; + + static const Symbol* create_local_var_sym(std::string_view name, SrcLocation loc, TypeExpr* var_type, bool immutable) { + LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, -1, var_type); + if (immutable) { + v_sym->flags |= LocalVarData::flagImmutable; + } + return current_scope.add_local_var(v_sym); + } + + static void process_catch_variable(AnyV catch_var) { + if (auto v_ident = catch_var->try_as()) { + const Symbol* sym = create_local_var_sym(v_ident->name, catch_var->loc, TypeExpr::new_hole(), true); + v_ident->mutate()->assign_sym(sym); + } + } + + static void process_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { + int delta_self = lhs_of_dot_call ? 1 : 0; + int n_arguments = static_cast(v->get_arguments().size()) + delta_self; + int n_parameters = static_cast(fun_ref->parameters.size()); + + // Tolk doesn't have optional parameters currently, so just compare counts + if (n_parameters < n_arguments) { + v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + if (n_arguments < n_parameters) { + v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + } + + void visit(V v) override { + if (v->marked_as_redef) { + auto v_ident = v->get_identifier()->as(); + const Symbol* sym = current_scope.lookup_symbol(v_ident->name); + if (sym == nullptr) { + v->error("`redef` for unknown variable"); + } + if (!sym->try_as() && !sym->try_as()) { + v->error("`redef` for unknown variable"); + } + v->mutate()->assign_var_ref(sym); + v_ident->mutate()->assign_sym(sym); + } else if (auto v_ident = v->get_identifier()->try_as()) { + TypeExpr* var_type = v->declared_type ? v->declared_type : TypeExpr::new_hole(); + const Symbol* sym = create_local_var_sym(v_ident->name, v->loc, var_type, v->is_immutable); + v->mutate()->assign_var_ref(sym); + v_ident->mutate()->assign_sym(sym); + } else { + // underscore, do nothing, v->sym remains nullptr + } + } + + void visit(V v) override { + parent::visit(v->get_assigned_val()); + parent::visit(v->get_lhs()); + } + + void visit(V v) override { + const Symbol* sym = current_scope.lookup_symbol(v->name); + if (!sym) { + v->error("undefined symbol `" + static_cast(v->name) + "`"); + } + v->mutate()->assign_sym(sym); + + // for global functions, global vars and constants, `import` must exist + if (!sym->try_as()) { + check_import_exists_when_using_sym(v, sym); + } + } + + void visit(V v) override { + parent::visit(v->get_called_f()); + parent::visit(v->get_arg_list()); + + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + // for such corner cases, sym remains nullptr + if (auto v_ident = v->get_called_f()->try_as()) { + if (const auto* fun_ref = v_ident->sym->try_as()) { + v->mutate()->assign_fun_ref(fun_ref); + process_function_arguments(fun_ref, v->get_arg_list(), nullptr); + } + } + // for `some_var(args)`, if it's called with wrong arguments count, the error is not here + // it will be fired later, it's a type checking error + } + + void visit(V v) override { + const Symbol* sym = lookup_global_symbol(v->method_name); + if (!sym) { + v->error("undefined symbol `" + static_cast(v->method_name) + "`"); + } + const auto* fun_ref = sym->try_as(); + if (!fun_ref) { + v->error("`" + static_cast(v->method_name) + "` is not a method"); + } + + if (fun_ref->parameters.empty()) { + v->error("`" + static_cast(v->method_name) + "` has no parameters and can not be called as method"); + } + + v->mutate()->assign_fun_ref(fun_ref); + parent::visit(v); + process_function_arguments(fun_ref, v->get_arg_list(), v->get_obj()); + } + + void visit(V v) override { + const Symbol* sym = current_scope.lookup_symbol("self"); + if (!sym) { + v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + } + v->mutate()->assign_param_ref(sym->as()); + } + + void visit(V v) override { + if (v->empty()) { + return; + } + current_scope.open_scope(v->loc); + parent::visit(v); + current_scope.close_scope(v->loc_end); + } + + void visit(V v) override { + current_scope.open_scope(v->loc); + parent::visit(v->get_body()); + parent::visit(v->get_cond()); // in 'while' condition it's ok to use variables declared inside do + current_scope.close_scope(v->get_body()->loc_end); + } + + void visit(V v) override { + visit(v->get_try_body()); + current_scope.open_scope(v->get_catch_body()->loc); + const std::vector& catch_items = v->get_catch_expr()->get_items(); + tolk_assert(catch_items.size() == 2); + process_catch_variable(catch_items[1]); + process_catch_variable(catch_items[0]); + parent::visit(v->get_catch_body()); + current_scope.close_scope(v->get_catch_body()->loc_end); + } + +public: + void start_visiting_function(V v_function) override { + auto v_seq = v_function->get_body()->try_as(); + tolk_assert(v_seq != nullptr); + + current_scope.open_scope(v_function->loc); + + for (int i = 0; i < v_function->get_num_params(); ++i) { + current_scope.add_local_var(&v_function->fun_ref->parameters[i]); + v_function->get_param(i)->mutate()->assign_param_ref(&v_function->fun_ref->parameters[i]); + } + parent::visit(v_seq); + + current_scope.close_scope(v_seq->loc_end); + tolk_assert(current_scope.scopes.empty()); + } +}; + +NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; + +void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles& all_src_files) { + visit_ast_of_all_functions(all_src_files); +} + +} // namespace tolk diff --git a/tolk/pipeline.h b/tolk/pipeline.h index fdfd2b99..5c0fe656 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -33,6 +33,14 @@ namespace tolk { AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); void pipeline_register_global_symbols(const AllSrcFiles&); +void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles&); +void pipeline_calculate_rvalue_lvalue(const AllSrcFiles&); +void pipeline_detect_unreachable_statements(const AllSrcFiles&); +void pipeline_infer_and_check_types(const AllSrcFiles&); +void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles&); +void pipeline_check_rvalue_lvalue(const AllSrcFiles&); +void pipeline_check_pure_impure_operations(const AllSrcFiles&); +void pipeline_constant_folding(const AllSrcFiles&); void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&); void pipeline_find_unused_symbols(); diff --git a/tolk/src-file.h b/tolk/src-file.h index 815dccbe..9fbbfbb4 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -18,11 +18,10 @@ #include #include +#include "fwd-declarations.h" namespace tolk { -struct ASTNodeBase; - struct SrcFile { struct SrcPosition { int offset; @@ -39,7 +38,7 @@ struct SrcFile { std::string rel_filename; // relative to cwd std::string abs_filename; // absolute from root std::string text; // file contents loaded into memory, every Token::str_val points inside it - const ASTNodeBase* ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here + AnyV ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here std::vector imports; // to check strictness (can't use a symbol without importing its file) SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text) diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index abaeb084..dc715031 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -16,154 +16,85 @@ */ #include "symtable.h" #include "compiler-state.h" +#include "platform-utils.h" #include #include namespace tolk { +bool FunctionData::does_need_codegen() const { + // when a function is declared, but not referenced from code in any way, don't generate its body + if (!is_really_used() && G.settings.remove_unused_functions) { + return false; + } + // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist + if (is_used_as_noncall()) { + return true; + } + // currently, there is no inlining, all functions are codegenerated + // (but actually, unused ones are later removed by Fift) + // in the future, we may want to implement a true AST inlining for "simple" functions + return true; +} -std::string Symbol::unknown_symbol_name(sym_idx_t i) { - if (!i) { - return "_"; - } else { - std::ostringstream os; - os << "SYM#" << i; - return os.str(); +void FunctionData::assign_is_really_used() { + this->flags |= flagReallyUsed; +} + +void FunctionData::assign_is_used_as_noncall() { + this->flags |= flagUsedAsNonCall; +} + +void FunctionData::assign_is_implicit_return() { + this->flags |= flagImplicitReturn; +} + +void GlobalVarData::assign_is_really_used() { + this->flags |= flagReallyUsed; +} + +void LocalVarData::assign_idx(int idx) { + this->idx = idx; +} + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_redefinition_of_symbol(SrcLocation loc, const Symbol* previous) { + SrcLocation prev_loc = previous->loc; + if (prev_loc.is_stdlib()) { + throw ParseError(loc, "redefinition of a symbol from stdlib"); + } + if (prev_loc.is_defined()) { + throw ParseError(loc, "redefinition of symbol, previous was at: " + prev_loc.to_string()); + } + throw ParseError(loc, "redefinition of built-in symbol"); +} + +void GlobalSymbolTable::add_function(const FunctionData* f_sym) { + auto key = key_hash(f_sym->name); + auto [it, inserted] = entries.emplace(key, f_sym); + if (!inserted) { + fire_error_redefinition_of_symbol(f_sym->loc, it->second); } } -sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) { - unsigned long long h1 = 1, h2 = 1; - for (char c : str) { - h1 = ((h1 * 239) + (unsigned char)(c)) % SIZE_PRIME; - h2 = ((h2 * 17) + (unsigned char)(c)) % (SIZE_PRIME - 1); - } - ++h2; - ++h1; - while (true) { - if (sym[h1]) { - if (sym[h1]->str == str) { - return (mode & 2) ? not_found : sym_idx_t(h1); - } - h1 += h2; - if (h1 > SIZE_PRIME) { - h1 -= SIZE_PRIME; - } - } else { - if (!(mode & 1)) { - return not_found; - } - if (def_sym >= ((long long)SIZE_PRIME * 3) / 4) { - throw SymTableOverflow{def_sym}; - } - sym[h1] = std::make_unique(static_cast(str), idx <= 0 ? sym_idx_t(h1) : -idx); - ++def_sym; - return sym_idx_t(h1); - } +void GlobalSymbolTable::add_global_var(const GlobalVarData* g_sym) { + auto key = key_hash(g_sym->name); + auto [it, inserted] = entries.emplace(key, g_sym); + if (!inserted) { + fire_error_redefinition_of_symbol(g_sym->loc, it->second); } } -std::string SymDef::name() const { - return G.symbols.get_name(sym_idx); +void GlobalSymbolTable::add_global_const(const GlobalConstData* c_sym) { + auto key = key_hash(c_sym->name); + auto [it, inserted] = entries.emplace(key, c_sym); + if (!inserted) { + fire_error_redefinition_of_symbol(c_sym->loc, it->second); + } } -void open_scope(SrcLocation loc) { - ++G.scope_level; - G.scope_opened_at.push_back(loc); -} - -void close_scope() { - if (!G.scope_level) { - throw Fatal{"cannot close the outer scope"}; - } - while (!G.symbol_stack.empty() && G.symbol_stack.back().first == G.scope_level) { - SymDef old_def = G.symbol_stack.back().second; - auto idx = old_def.sym_idx; - G.symbol_stack.pop_back(); - SymDef* cur_def = G.sym_def[idx]; - assert(cur_def); - assert(cur_def->level == G.scope_level && cur_def->sym_idx == idx); - //std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl; - if (cur_def->value) { - //std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl; - delete cur_def->value; - } - if (!old_def.level && !old_def.value) { - delete cur_def; // ??? keep the definition always? - G.sym_def[idx] = nullptr; - } else { - cur_def->value = old_def.value; - cur_def->level = old_def.level; - } - old_def.value = nullptr; - } - --G.scope_level; - G.scope_opened_at.pop_back(); -} - -SymDef* lookup_symbol(sym_idx_t idx) { - if (!idx) { - return nullptr; - } - if (G.sym_def[idx]) { - return G.sym_def[idx]; - } - if (G.global_sym_def[idx]) { - return G.global_sym_def[idx]; - } - return nullptr; -} - -SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc) { - if (SymDef* found = G.global_sym_def[name_idx]) { - return found; // found->value is filled; it means, that a symbol is redefined - } - - SymDef* registered = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc); -#ifdef TOLK_DEBUG - registered->sym_name = registered->name(); -#endif - return registered; // registered->value is nullptr; it means, it's just created -} - -SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc) { - // note, that parameters (defined at function declaration) are not inserted into symtable - // their SymDef is registered to be inserted into SymValFunc::parameters - // (and later ->value is filled with SymValVariable) - - SymDef* registered = new SymDef(0, name_idx, loc); -#ifdef TOLK_DEBUG - registered->sym_name = registered->name(); -#endif - return registered; -} - -SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { - if (!name_idx) { - return nullptr; - } - if (!G.scope_level) { - throw Fatal("unexpected scope_level = 0"); - } - auto found = G.sym_def[name_idx]; - if (found) { - if (found->level < G.scope_level) { - G.symbol_stack.emplace_back(G.scope_level, *found); - found->level = G.scope_level; - } else if (found->value && force_new) { - return nullptr; - } - found->value = nullptr; - found->loc = loc; - return found; - } - found = G.sym_def[name_idx] = new SymDef(G.scope_level, name_idx, loc); - G.symbol_stack.emplace_back(G.scope_level, SymDef{0, name_idx, loc}); -#ifdef TOLK_DEBUG - found->sym_name = found->name(); - G.symbol_stack.back().second.sym_name = found->name(); -#endif - return found; +const Symbol* lookup_global_symbol(std::string_view name) { + return G.symtable.lookup(name); } } // namespace tolk diff --git a/tolk/symtable.h b/tolk/symtable.h index 69e2eaa8..53a5e52e 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -18,97 +18,194 @@ #include "src-file.h" #include "type-expr.h" -#include -#include +#include "constant-evaluator.h" +#include "crypto/common/refint.h" +#include +#include +#include namespace tolk { -typedef int var_idx_t; -typedef int sym_idx_t; - -enum class SymValKind { _Var, _Func, _GlobVar, _Const }; - -struct SymValBase { - SymValKind kind; - int idx; - TypeExpr* sym_type; -#ifdef TOLK_DEBUG - std::string sym_name; // seeing symbol name in debugger makes it much easier to delve into Tolk sources -#endif - - SymValBase(SymValKind kind, int idx, TypeExpr* sym_type) : kind(kind), idx(idx), sym_type(sym_type) { - } - virtual ~SymValBase() = default; - - TypeExpr* get_type() const { - return sym_type; - } -}; - - struct Symbol { - std::string str; - sym_idx_t idx; - - Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {} - - static std::string unknown_symbol_name(sym_idx_t i); -}; - -class SymTable { -public: - static constexpr int SIZE_PRIME = 100003; - -private: - sym_idx_t def_sym{0}; - std::unique_ptr sym[SIZE_PRIME + 1]; - sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0); - -public: - - static constexpr sym_idx_t not_found = 0; - sym_idx_t lookup(std::string_view str) { - return gen_lookup(str, 0); - } - sym_idx_t lookup_add(std::string_view str) { - return gen_lookup(str, 1); - } - Symbol* operator[](sym_idx_t i) const { - return sym[i].get(); - } - std::string get_name(sym_idx_t i) const { - return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i); - } -}; - -struct SymTableOverflow { - int sym_def; - explicit SymTableOverflow(int x) : sym_def(x) { - } -}; - - -struct SymDef { - int level; - sym_idx_t sym_idx; - SymValBase* value; + std::string name; SrcLocation loc; -#ifdef TOLK_DEBUG - std::string sym_name; -#endif - SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr) - : level(lvl), sym_idx(idx), value(val), loc(_loc) { + + Symbol(std::string name, SrcLocation loc) + : name(std::move(name)) + , loc(loc) { + } + + virtual ~Symbol() = default; + + template + const T* as() const { +#ifdef TOLK_DEBUG + assert(dynamic_cast(this) != nullptr); +#endif + return dynamic_cast(this); + } + + template + const T* try_as() const { + return dynamic_cast(this); } - std::string name() const; }; +struct LocalVarData final : Symbol { + enum { + flagMutateParameter = 1, // parameter was declared with `mutate` keyword + flagImmutable = 2, // variable was declared via `val` (not `var`) + }; -void open_scope(SrcLocation loc); -void close_scope(); -SymDef* lookup_symbol(sym_idx_t idx); + TypeExpr* declared_type; + int flags = 0; + int idx; -SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc = {}); -SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc); -SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc); + LocalVarData(std::string name, SrcLocation loc, int idx, TypeExpr* declared_type) + : Symbol(std::move(name), loc) + , declared_type(declared_type) + , idx(idx) { + } + + bool is_underscore() const { return name.empty(); } + bool is_immutable() const { return flags & flagImmutable; } + bool is_mutate_parameter() const { return flags & flagMutateParameter; } + + LocalVarData* mutate() const { return const_cast(this); } + void assign_idx(int idx); +}; + +struct FunctionBodyCode; +struct FunctionBodyAsm; +struct FunctionBodyBuiltin; + +typedef std::variant< + FunctionBodyCode*, + FunctionBodyAsm*, + FunctionBodyBuiltin* +> FunctionBody; + +struct FunctionData final : Symbol { + static constexpr int EMPTY_METHOD_ID = -10; + + enum { + flagInline = 1, // marked `@inline` + flagInlineRef = 2, // marked `@inline_ref` + flagReallyUsed = 4, // calculated via dfs from used functions; declared but unused functions are not codegenerated + flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) + flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out + flagImplicitReturn = 32, // control flow reaches end of function, so it needs implicit return at the end + flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned + flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc. + flagHasMutateParams = 256, // has parameters declared as `mutate` + flagAcceptsSelf = 512, // is a member function (has `self` first parameter) + flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable + }; + + int method_id = EMPTY_METHOD_ID; + int flags; + TypeExpr* full_type; // currently, TypeExpr::_Map, probably wrapped with forall + + std::vector parameters; + std::vector arg_order, ret_order; + + FunctionBody body; + + FunctionData(std::string name, SrcLocation loc, TypeExpr* full_type, std::vector parameters, int initial_flags, FunctionBody body) + : Symbol(std::move(name), loc) + , flags(initial_flags) + , full_type(full_type) + , parameters(std::move(parameters)) + , body(body) { + } + + const std::vector* get_arg_order() const { + return arg_order.empty() ? nullptr : &arg_order; + } + const std::vector* get_ret_order() const { + return ret_order.empty() ? nullptr : &ret_order; + } + + bool is_regular_function() const { return std::holds_alternative(body); } + bool is_asm_function() const { return std::holds_alternative(body); } + bool is_builtin_function() const { return std::holds_alternative(body); } + + bool is_inline() const { return flags & flagInline; } + bool is_inline_ref() const { return flags & flagInlineRef; } + bool is_really_used() const { return flags & flagReallyUsed; } + bool is_used_as_noncall() const { return flags & flagUsedAsNonCall; } + bool is_marked_as_pure() const { return flags & flagMarkedAsPure; } + bool is_implicit_return() const { return flags & flagImplicitReturn; } + bool is_get_method() const { return flags & flagGetMethod; } + bool is_method_id_not_empty() const { return method_id != EMPTY_METHOD_ID; } + bool is_entrypoint() const { return flags & flagIsEntrypoint; } + bool has_mutate_params() const { return flags & flagHasMutateParams; } + bool does_accept_self() const { return flags & flagAcceptsSelf; } + bool does_return_self() const { return flags & flagReturnsSelf; } + bool does_mutate_self() const { return (flags & flagAcceptsSelf) && parameters[0].is_mutate_parameter(); } + + bool does_need_codegen() const; + + FunctionData* mutate() const { return const_cast(this); } + void assign_is_really_used(); + void assign_is_used_as_noncall(); + void assign_is_implicit_return(); +}; + +struct GlobalVarData final : Symbol { + enum { + flagReallyUsed = 1, // calculated via dfs from used functions; unused globals are not codegenerated + }; + + TypeExpr* declared_type; + int flags = 0; + + GlobalVarData(std::string name, SrcLocation loc, TypeExpr* declared_type) + : Symbol(std::move(name), loc) + , declared_type(declared_type) { + } + + bool is_really_used() const { return flags & flagReallyUsed; } + + GlobalVarData* mutate() const { return const_cast(this); } + void assign_is_really_used(); +}; + +struct GlobalConstData final : Symbol { + ConstantValue value; + TypeExpr* inferred_type; + + GlobalConstData(std::string name, SrcLocation loc, ConstantValue&& value) + : Symbol(std::move(name), loc) + , value(std::move(value)) + , inferred_type(TypeExpr::new_atomic(this->value.is_int() ? TypeExpr::_Int : TypeExpr::_Slice)) { + } + + bool is_int_const() const { return value.is_int(); } + bool is_slice_const() const { return value.is_slice(); } + + td::RefInt256 as_int_const() const { return value.as_int(); } + const std::string& as_slice_const() const { return value.as_slice(); } +}; + +class GlobalSymbolTable { + std::unordered_map entries; + + static uint64_t key_hash(std::string_view name_key) { + return std::hash{}(name_key); + } + +public: + void add_function(const FunctionData* f_sym); + void add_global_var(const GlobalVarData* g_sym); + void add_global_const(const GlobalConstData* c_sym); + + const Symbol* lookup(std::string_view name) const { + const auto it = entries.find(key_hash(name)); + return it == entries.end() ? nullptr : it->second; + } +}; + +const Symbol* lookup_global_symbol(std::string_view name); } // namespace tolk diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 9268cc62..f2255ce3 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -54,6 +54,14 @@ int tolk_proceed(const std::string &entrypoint_filename) { AllSrcFiles all_files = pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); pipeline_register_global_symbols(all_files); + pipeline_resolve_identifiers_and_assign_symbols(all_files); + pipeline_calculate_rvalue_lvalue(all_files); + pipeline_detect_unreachable_statements(all_files); + pipeline_infer_and_check_types(all_files); + pipeline_refine_lvalue_for_mutate_arguments(all_files); + pipeline_check_rvalue_lvalue(all_files); + pipeline_check_pure_impure_operations(all_files); + pipeline_constant_folding(all_files); pipeline_convert_ast_to_legacy_Expr_Op(all_files); pipeline_find_unused_symbols(); diff --git a/tolk/tolk.h b/tolk/tolk.h index 971ca35d..0408648f 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -22,6 +22,7 @@ #include "symtable.h" #include "crypto/common/refint.h" #include "td/utils/Status.h" +#include #include #include #include @@ -64,22 +65,27 @@ void unify(TypeExpr*& te1, TypeExpr*& te2); * */ -using const_idx_t = int; +typedef int var_idx_t; +typedef int const_idx_t; struct TmpVar { TypeExpr* v_type; var_idx_t idx; - sym_idx_t sym_idx; + const LocalVarData* v_sym; // points to var defined in code; nullptr for implicitly created tmp vars int coord; SrcLocation where; std::vector> on_modification; - TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc); - bool is_unnamed() const { return sym_idx == 0; } + TmpVar(var_idx_t _idx, TypeExpr* _type, const LocalVarData* v_sym, SrcLocation loc) + : v_type(_type) + , idx(_idx) + , v_sym(v_sym) + , coord(0) + , where(loc) { + } void show(std::ostream& os, int omit_idx = 0) const; void dump(std::ostream& os) const; - void set_location(SrcLocation loc); }; struct VarDescr { @@ -171,7 +177,6 @@ struct VarDescr { void set_const(long long value); void set_const(td::RefInt256 value); void set_const(std::string value); - void set_const_nan(); void operator+=(const VarDescr& y) { flags &= y.flags; } @@ -303,7 +308,8 @@ struct Op { enum { _Disabled = 1, _NoReturn = 4, _Impure = 24 }; int flags; std::unique_ptr next; - SymDef* fun_ref; // despite its name, it may actually ref global var; applicable not only to Op::_Call, but for other kinds also + const FunctionData* f_sym = nullptr; + const GlobalVarData* g_sym = nullptr; SrcLocation where; VarDescrList var_info; std::vector args; @@ -311,27 +317,41 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; - Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { + Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), f_sym(nullptr), where(_where) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(std::move(_left)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, td::RefInt256 _const) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left), int_const(_const) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, std::string _const) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left), str_const(_const) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right) + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left), right(_right) { + } + Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right) + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(std::move(_left)), right(std::move(_right)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, - SymDef* _fun = nullptr) - : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) { + const FunctionData* _fun) + : cl(_cl), flags(0), f_sym(_fun), where(_where), left(_left), right(_right) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, - SymDef* _fun = nullptr) - : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { + const FunctionData* _fun) + : cl(_cl), flags(0), f_sym(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, + const GlobalVarData* _gvar) + : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(_left), right(_right) { + } + Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, + const GlobalVarData* _gvar) + : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(std::move(_left)), right(std::move(_right)) { } bool disabled() const { return flags & _Disabled; } @@ -343,8 +363,7 @@ struct Op { bool set_noreturn(bool flag); bool impure() const { return flags & _Impure; } - void set_impure(const CodeBlob &code); - void set_impure(const CodeBlob &code, bool flag); + void set_impure_flag(); void show(std::ostream& os, const std::vector& vars, std::string pfx = "", int mode = 0) const; void show_var_list(std::ostream& os, const std::vector& idx_list, const std::vector& vars) const; @@ -391,247 +410,16 @@ inline ListIterator end(const Op* op_list) { return ListIterator{}; } -typedef std::tuple FormalArg; +typedef std::tuple FormalArg; typedef std::vector FormalArgList; struct AsmOpList; -/* - * - * SYMBOL VALUES - * - */ - -struct SymValVariable : SymValBase { - enum SymValFlag { - flagMutateParameter = 1, // parameter was declared with `mutate` keyword - flagImmutable = 2, // variable was declared via `val` (not `var`) - }; - int flags{0}; - - ~SymValVariable() override = default; - SymValVariable(int val, TypeExpr* sym_type) - : SymValBase(SymValKind::_Var, val, sym_type) {} - - bool is_function_parameter() const { - return idx >= 0; - } - bool is_mutate_parameter() const { - return flags & flagMutateParameter; - } - bool is_local_var() const { - return idx == -1; - } - bool is_immutable() const { - return flags & flagImmutable; - } -}; - -struct SymValFunc : SymValBase { - enum SymValFlag { - flagInline = 1, // marked `@inline` - flagInlineRef = 2, // marked `@inline_ref` - flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) - flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out - flagBuiltinFunction = 32, // was created via `define_builtin_func()`, not from source code - flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned - flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc. - flagHasMutateParams = 256, // has parameters declared as `mutate` - flagAcceptsSelf = 512, // is a member function (has `self` first parameter) - flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable - }; - - td::RefInt256 method_id; // todo why int256? it's small - int flags{0}; - std::vector parameters; // [i]-th may be nullptr for underscore; if not, its val is SymValVariable - std::vector arg_order, ret_order; - - ~SymValFunc() override = default; - SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags) - : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)) { - } - SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags, std::initializer_list arg_order, std::initializer_list ret_order) - : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)), arg_order(arg_order), ret_order(ret_order) { - } - - const std::vector* get_arg_order() const { - return arg_order.empty() ? nullptr : &arg_order; - } - const std::vector* get_ret_order() const { - return ret_order.empty() ? nullptr : &ret_order; - } - - bool is_inline() const { - return flags & flagInline; - } - bool is_inline_ref() const { - return flags & flagInlineRef; - } - bool is_marked_as_pure() const { - return flags & flagMarkedAsPure; - } - bool is_builtin() const { - return flags & flagBuiltinFunction; - } - bool is_get_method() const { - return flags & flagGetMethod; - } - bool is_entrypoint() const { - return flags & flagIsEntrypoint; - } - bool has_mutate_params() const { - return flags & flagHasMutateParams; - } - bool does_accept_self() const { - return flags & flagAcceptsSelf; - } - bool does_return_self() const { - return flags & flagReturnsSelf; - } -}; - -struct SymValCodeFunc : SymValFunc { +struct FunctionBodyCode { CodeBlob* code; - bool is_really_used{false}; // calculated via dfs; unused functions are not codegenerated - ~SymValCodeFunc() override = default; - SymValCodeFunc(std::vector parameters, int val, TypeExpr* _ft) - : SymValFunc(std::move(parameters), val, _ft, 0), code(nullptr) { - } - bool does_need_codegen() const; void set_code(CodeBlob* code); }; -struct SymValGlobVar : SymValBase { - bool is_really_used{false}; // calculated via dfs from used functions; unused globals are not codegenerated - - SymValGlobVar(int val, TypeExpr* gvtype) - : SymValBase(SymValKind::_GlobVar, val, gvtype) { - } - ~SymValGlobVar() override = default; -}; - -struct SymValConst : SymValBase { - enum ConstKind { IntConst, SliceConst }; - - td::RefInt256 intval; - std::string strval; - ConstKind kind; - - SymValConst(int idx, td::RefInt256 value) - : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Int)), intval(std::move(value)), kind(IntConst) { - } - SymValConst(int idx, std::string value) - : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Slice)), strval(std::move(value)), kind(SliceConst) { - } - ~SymValConst() override = default; - td::RefInt256 get_int_value() const { - return intval; - } - std::string get_str_value() const { - return strval; - } - ConstKind get_kind() const { - return kind; - } -}; - - -/* - * - * EXPRESSIONS - * - */ - -struct Expr { - enum ExprCls { - _Apply, - _VarApply, - _GrabMutatedVars, - _ReturnSelf, - _MkTuple, - _Tensor, - _Const, - _Var, - _GlobFunc, - _GlobVar, - _Letop, - _Hole, - _CondExpr, - _SliceConst, - }; - ExprCls cls; - int val{0}; - enum { _IsRvalue = 2, _IsLvalue = 4, _IsImmutable = 8, _IsImpure = 32 }; - int flags{0}; - SrcLocation here; - td::RefInt256 intval; - std::string strval; - SymDef* sym{nullptr}; - TypeExpr* e_type{nullptr}; - std::vector args; - Expr(ExprCls c, SrcLocation loc) : cls(c), here(loc) { - } - Expr(ExprCls c, std::vector _args) : cls(c), args(std::move(_args)) { - } - Expr(ExprCls c, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { - } - Expr(ExprCls c, SymDef* _sym, std::initializer_list _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { - } - Expr(ExprCls c, SymDef* _sym, std::vector _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { - } - Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist); - ~Expr() { - for (auto& arg_ptr : args) { - delete arg_ptr; - } - } - Expr* copy() const; - void pb_arg(Expr* expr) { - args.push_back(expr); - } - void set_val(int _val) { - val = _val; - } - bool is_rvalue() const { - return flags & _IsRvalue; - } - bool is_lvalue() const { - return flags & _IsLvalue; - } - bool is_immutable() const { - return flags & _IsImmutable; - } - bool is_mktuple() const { - return cls == _MkTuple; - } - void chk_rvalue() const { - if (!is_rvalue()) { - fire_error_rvalue_expected(); - } - } - void deduce_type(); - void set_location(SrcLocation loc) { - here = loc; - } - SrcLocation get_location() const { - return here; - } - void define_new_vars(CodeBlob& code); - void predefine_vars(); - std::vector pre_compile(CodeBlob& code, std::vector>* lval_globs = nullptr) const; - var_idx_t new_tmp(CodeBlob& code) const; - std::vector new_tmp_vect(CodeBlob& code) const { - return {new_tmp(code)}; - } - - GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN - void fire_error_rvalue_expected() const; - GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN - void fire_error_lvalue_expected(const std::string& details) const; - GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN - void fire_error_modifying_immutable(const std::string& details) const; -}; - /* * * GENERATE CODE @@ -651,7 +439,6 @@ struct AsmOp { int a, b; bool gconst{false}; std::string op; - td::RefInt256 origin; struct SReg { int idx; SReg(int _idx) : idx(_idx) { @@ -671,9 +458,6 @@ struct AsmOp { AsmOp(Type _t, int _a, int _b, std::string _op) : t(_t), a(_a), b(_b), op(std::move(_op)) { compute_gconst(); } - AsmOp(Type _t, int _a, int _b, std::string _op, td::RefInt256 x) : t(_t), a(_a), b(_b), op(std::move(_op)), origin(x) { - compute_gconst(); - } void out(std::ostream& os) const; void out_indent_nl(std::ostream& os, bool no_nl = false) const; std::string to_string() const; @@ -786,20 +570,20 @@ struct AsmOp { static AsmOp BlkReverse(int a, int b); static AsmOp make_stk2(int a, int b, const char* str, int delta); static AsmOp make_stk3(int a, int b, int c, const char* str, int delta); - static AsmOp IntConst(td::RefInt256 value); + static AsmOp IntConst(const td::RefInt256& x); static AsmOp BoolConst(bool f); - static AsmOp Const(std::string push_op, td::RefInt256 origin = {}) { - return AsmOp(a_const, 0, 1, std::move(push_op), origin); + static AsmOp Const(std::string push_op) { + return AsmOp(a_const, 0, 1, std::move(push_op)); } - static AsmOp Const(int arg, std::string push_op, td::RefInt256 origin = {}); - static AsmOp Comment(std::string comment) { + static AsmOp Const(int arg, const std::string& push_op); + static AsmOp Comment(const std::string& comment) { return AsmOp(a_none, std::string{"// "} + comment); } - static AsmOp Custom(std::string custom_op) { + static AsmOp Custom(const std::string& custom_op) { return AsmOp(a_custom, 255, 255, custom_op); } - static AsmOp Parse(std::string custom_op); - static AsmOp Custom(std::string custom_op, int args, int retv = 1) { + static AsmOp Parse(const std::string& custom_op); + static AsmOp Custom(const std::string& custom_op, int args, int retv = 1) { return AsmOp(a_custom, args, retv, custom_op); } static AsmOp Parse(std::string custom_op, int args, int retv = 1); @@ -887,18 +671,6 @@ inline std::ostream& operator<<(std::ostream& os, const AsmOpList& op_list) { return os; } -class IndentGuard { - AsmOpList& aol_; - - public: - IndentGuard(AsmOpList& aol) : aol_(aol) { - aol.indent(); - } - ~IndentGuard() { - aol_.undent(); - } -}; - struct AsmOpCons { std::unique_ptr car; std::unique_ptr cdr; @@ -1321,71 +1093,57 @@ struct Stack { */ typedef std::function&, std::vector&, SrcLocation)> simple_compile_func_t; -typedef std::function&, std::vector&)> compile_func_t; inline simple_compile_func_t make_simple_compile(AsmOp op) { return [op](std::vector& out, std::vector& in, SrcLocation) -> AsmOp { return op; }; } -inline compile_func_t make_ext_compile(std::vector&& ops) { - return [ops = std::move(ops)](AsmOpList& dest, std::vector& out, std::vector& in)->bool { - return dest.append(ops); - }; -} - -inline compile_func_t make_ext_compile(AsmOp op) { - return - [op](AsmOpList& dest, std::vector& out, std::vector& in) -> bool { return dest.append(op); }; -} - -struct SymValAsmFunc : SymValFunc { +struct FunctionBodyBuiltin { simple_compile_func_t simple_compile; - compile_func_t ext_compile; - ~SymValAsmFunc() override = default; - SymValAsmFunc(std::vector parameters, TypeExpr* ft, std::vector&& arg_order, std::vector&& ret_order, int flags) - : SymValFunc(std::move(parameters), -1, ft, flags) { - this->arg_order = std::move(arg_order); - this->ret_order = std::move(ret_order); - } - SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags) - : SymValFunc(std::move(parameters), -1, ft, flags), simple_compile(std::move(_compile)) { - } - SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags, - std::initializer_list arg_order, std::initializer_list ret_order) - : SymValFunc(std::move(parameters), -1, ft, flags, arg_order, ret_order), simple_compile(std::move(_compile)) { - } - void set_code(std::vector code); - bool compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; + + explicit FunctionBodyBuiltin(simple_compile_func_t compile) + : simple_compile(std::move(compile)) {} + + void compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; +}; + +struct FunctionBodyAsm { + std::vector ops; + + void set_code(std::vector&& code); + void compile(AsmOpList& dest) const; }; struct CodeBlob { - enum { _ForbidImpure = 4 }; int var_cnt, in_var_cnt; TypeExpr* ret_type; - const SymValCodeFunc* func_val; + const FunctionData* fun_ref; std::string name; SrcLocation loc; std::vector vars; std::unique_ptr ops; std::unique_ptr* cur_ops; - std::vector debug_ttt; +#ifdef TOLK_DEBUG + std::vector _vector_of_ops; // to see it in debugger instead of nested pointers +#endif std::stack*> cur_ops_stack; - int flags = 0; bool require_callxargs = false; - CodeBlob(std::string name, SrcLocation loc, const SymValCodeFunc* func_val, TypeExpr* ret_type) - : var_cnt(0), in_var_cnt(0), ret_type(ret_type), func_val(func_val), name(std::move(name)), loc(loc), cur_ops(&ops) { + CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref, TypeExpr* ret_type) + : var_cnt(0), in_var_cnt(0), ret_type(ret_type), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) { } template Op& emplace_back(Args&&... args) { Op& res = *(*cur_ops = std::make_unique(args...)); cur_ops = &(res.next); - debug_ttt.push_back(&res); +#ifdef TOLK_DEBUG + _vector_of_ops.push_back(&res); +#endif return res; } - bool import_params(FormalArgList arg_list); - var_idx_t create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation loc); + bool import_params(FormalArgList&& arg_list); + var_idx_t create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation loc); var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { - return create_var(var_type, 0, loc); + return create_var(var_type, nullptr, loc); } int split_vars(bool strict = false); bool compute_used_code_vars(); @@ -1413,9 +1171,11 @@ struct CodeBlob { void generate_code(AsmOpList& out_list, int mode = 0); void generate_code(std::ostream& os, int mode = 0, int indent = 0); - void on_var_modification(var_idx_t idx, SrcLocation here) const { - for (auto& f : vars.at(idx).on_modification) { - f(here); + void on_var_modification(const std::vector& left_lval_indices, SrcLocation here) const { + for (var_idx_t ir_idx : left_lval_indices) { + for (auto& f : vars.at(ir_idx).on_modification) { + f(here); + } } } }; diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp index cee71942..3712c6f5 100644 --- a/tolk/unify-types.cpp +++ b/tolk/unify-types.cpp @@ -121,7 +121,7 @@ bool TypeExpr::equals_to(const TypeExpr *rhs) const { while (r->constr == te_Indirect) r = r->args[0]; - bool eq = l->constr == r->constr && l->value == r->value && + bool eq = l->constr == r->constr && (l->constr == te_Unknown || l->value == r->value) && l->minw == r->minw && l->maxw == r->maxw && l->was_forall_var == r->was_forall_var && l->args.size() == r->args.size(); From dc2f0dad818bea29d9a5c2549a6c3861e8c60129 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Mon, 13 Jan 2025 17:39:56 +0300 Subject: [PATCH 07/61] Add extra currencies to c7 in tonlib runGetMethod --- crypto/smc-envelope/SmartContract.cpp | 21 +++++++++++---------- crypto/smc-envelope/SmartContract.h | 5 +++++ tonlib/tonlib/TonlibClient.cpp | 21 ++++++++++++--------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/crypto/smc-envelope/SmartContract.cpp b/crypto/smc-envelope/SmartContract.cpp index 2578a951..8ec2c146 100644 --- a/crypto/smc-envelope/SmartContract.cpp +++ b/crypto/smc-envelope/SmartContract.cpp @@ -149,16 +149,17 @@ td::Ref prepare_vm_c7(SmartContract::Args args, td::Ref cod } std::vector tuple = { - td::make_refint(0x076ef1ea), // [ magic:0x076ef1ea - td::make_refint(0), // actions:Integer - td::make_refint(0), // msgs_sent:Integer - td::make_refint(now), // unixtime:Integer - td::make_refint(0), //TODO: // block_lt:Integer - td::make_refint(0), //TODO: // trans_lt:Integer - std::move(rand_seed_int), // rand_seed:Integer - block::CurrencyCollection(args.balance).as_vm_tuple(), // balance_remaining:[Integer (Maybe Cell)] - vm::load_cell_slice_ref(address), // myself:MsgAddressInt - vm::StackEntry::maybe(config) // vm::StackEntry::maybe(td::Ref()) + td::make_refint(0x076ef1ea), // [ magic:0x076ef1ea + td::make_refint(0), // actions:Integer + td::make_refint(0), // msgs_sent:Integer + td::make_refint(now), // unixtime:Integer + td::make_refint(0), // block_lt:Integer (TODO) + td::make_refint(0), // trans_lt:Integer (TODO) + std::move(rand_seed_int), // rand_seed:Integer + block::CurrencyCollection(args.balance, args.extra_currencies) + .as_vm_tuple(), // balance_remaining:[Integer (Maybe Cell)] + vm::load_cell_slice_ref(address), // myself:MsgAddressInt + vm::StackEntry::maybe(config) // vm::StackEntry::maybe(td::Ref()) }; if (args.config && args.config.value()->get_global_version() >= 4) { tuple.push_back(vm::StackEntry::maybe(code)); // code:Cell diff --git a/crypto/smc-envelope/SmartContract.h b/crypto/smc-envelope/SmartContract.h index 7fc93579..49edb969 100644 --- a/crypto/smc-envelope/SmartContract.h +++ b/crypto/smc-envelope/SmartContract.h @@ -64,6 +64,7 @@ class SmartContract : public td::CntObject { bool ignore_chksig{false}; td::uint64 amount{0}; td::uint64 balance{0}; + td::Ref extra_currencies; int vm_log_verbosity_level{0}; bool debug_enabled{false}; @@ -121,6 +122,10 @@ class SmartContract : public td::CntObject { this->balance = balance; return std::move(*this); } + Args&& set_extra_currencies(td::Ref extra_currencies) { + this->extra_currencies = std::move(extra_currencies); + return std::move(*this); + } Args&& set_address(block::StdAddress address) { this->address = address; return std::move(*this); diff --git a/tonlib/tonlib/TonlibClient.cpp b/tonlib/tonlib/TonlibClient.cpp index 507512d0..d917a57a 100644 --- a/tonlib/tonlib/TonlibClient.cpp +++ b/tonlib/tonlib/TonlibClient.cpp @@ -1050,15 +1050,17 @@ class Query { } vm::GasLimits gas_limits = compute_gas_limits(td::make_refint(raw_.source->get_balance()), gas_limits_prices); - auto res = smc.write().send_external_message(raw_.message_body, ton::SmartContract::Args() - .set_limits(gas_limits) - .set_balance(raw_.source->get_balance()) - .set_now(raw_.source->get_sync_time()) - .set_ignore_chksig(ignore_chksig) - .set_address(raw_.source->get_address()) - .set_config(cfg) - .set_prev_blocks_info(state.prev_blocks_info) - .set_libraries(libraries)); + auto res = smc.write().send_external_message(raw_.message_body, + ton::SmartContract::Args() + .set_limits(gas_limits) + .set_balance(raw_.source->get_balance()) + .set_extra_currencies(raw_.source->get_extra_currencies()) + .set_now(raw_.source->get_sync_time()) + .set_ignore_chksig(ignore_chksig) + .set_address(raw_.source->get_address()) + .set_config(cfg) + .set_prev_blocks_info(state.prev_blocks_info) + .set_libraries(libraries)); td::int64 fwd_fee = 0; if (res.success) { LOG(DEBUG) << "output actions:\n" @@ -4790,6 +4792,7 @@ td::Status TonlibClient::do_request(const tonlib_api::smc_runGetMethod& request, } args.set_stack(std::move(stack)); args.set_balance(it->second->get_balance()); + args.set_extra_currencies(it->second->get_extra_currencies()); args.set_now(it->second->get_sync_time()); args.set_address(it->second->get_address()); From 4ddb14c136d18a8d462c70f775115e925f701b5b Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Mon, 13 Jan 2025 17:40:16 +0300 Subject: [PATCH 08/61] Fix double tilde for crc computation in tlbc --- crypto/tl/tlbc.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/crypto/tl/tlbc.cpp b/crypto/tl/tlbc.cpp index 0050e161..d3a6edb5 100644 --- a/crypto/tl/tlbc.cpp +++ b/crypto/tl/tlbc.cpp @@ -1800,9 +1800,6 @@ void Constructor::show(std::ostream& os, int mode) const { } for (int i = 0; i < type_arity; i++) { os << ' '; - if (param_negated.at(i)) { - os << '~'; - } params.at(i)->show(os, this, 100, mode | 1); } if (!(mode & 2)) { From cae9ccfacf594c44852f320330ef512b3f0c859b Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Mon, 13 Jan 2025 17:41:10 +0300 Subject: [PATCH 09/61] Retry dht query in adnl-peer if peer does not respond for too long --- adnl/adnl-peer.cpp | 14 ++++++++++++++ adnl/adnl-peer.hpp | 1 + 2 files changed, 15 insertions(+) diff --git a/adnl/adnl-peer.cpp b/adnl/adnl-peer.cpp index 7f5c6039..ab460058 100644 --- a/adnl/adnl-peer.cpp +++ b/adnl/adnl-peer.cpp @@ -119,6 +119,7 @@ void AdnlPeerPairImpl::discover() { void AdnlPeerPairImpl::receive_packet_checked(AdnlPacket packet) { last_received_packet_ = td::Timestamp::now(); try_reinit_at_ = td::Timestamp::never(); + drop_addr_list_at_ = td::Timestamp::never(); request_reverse_ping_after_ = td::Timestamp::in(15.0); auto d = Adnl::adnl_start_time(); if (packet.dst_reinit_date() > d) { @@ -415,6 +416,9 @@ void AdnlPeerPairImpl::send_packet_continue(AdnlPacket packet, td::actor::ActorI if (!try_reinit_at_ && last_received_packet_ < td::Timestamp::in(-5.0)) { try_reinit_at_ = td::Timestamp::in(10.0); } + if (!drop_addr_list_at_ && last_received_packet_ < td::Timestamp::in(-60.0 * 9.0)) { + drop_addr_list_at_ = td::Timestamp::in(60.0); + } packet.run_basic_checks().ensure(); auto B = serialize_tl_object(packet.tl(), true); if (via_channel) { @@ -692,6 +696,16 @@ void AdnlPeerPairImpl::reinit(td::int32 date) { } td::Result, bool>> AdnlPeerPairImpl::get_conn() { + if (drop_addr_list_at_ && drop_addr_list_at_.is_in_past()) { + drop_addr_list_at_ = td::Timestamp::never(); + priority_addr_list_ = AdnlAddressList{}; + priority_conns_.clear(); + addr_list_ = AdnlAddressList{}; + conns_.clear(); + has_reverse_addr_ = false; + return td::Status::Error(ErrorCode::notready, "no active connections"); + } + if (!priority_addr_list_.empty() && priority_addr_list_.expire_at() < td::Clocks::system()) { priority_addr_list_ = AdnlAddressList{}; priority_conns_.clear(); diff --git a/adnl/adnl-peer.hpp b/adnl/adnl-peer.hpp index 7db2e2a1..243974ba 100644 --- a/adnl/adnl-peer.hpp +++ b/adnl/adnl-peer.hpp @@ -266,6 +266,7 @@ class AdnlPeerPairImpl : public AdnlPeerPair { td::Timestamp last_received_packet_ = td::Timestamp::never(); td::Timestamp try_reinit_at_ = td::Timestamp::never(); + td::Timestamp drop_addr_list_at_ = td::Timestamp::never(); bool has_reverse_addr_ = false; td::Timestamp request_reverse_ping_after_ = td::Timestamp::now(); From 87c4b4a5d4d5c6a12f12d446aff8b40d36223245 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Mon, 13 Jan 2025 17:41:50 +0300 Subject: [PATCH 10/61] Fix handling small out-of-sync in validate-query --- validator/downloaders/wait-block-state.cpp | 5 +- validator/impl/validate-query.cpp | 59 ++++++++++++---------- validator/impl/validate-query.hpp | 1 + validator/manager.cpp | 2 +- 4 files changed, 37 insertions(+), 30 deletions(-) diff --git a/validator/downloaders/wait-block-state.cpp b/validator/downloaders/wait-block-state.cpp index b61b9492..c80e7d89 100644 --- a/validator/downloaders/wait-block-state.cpp +++ b/validator/downloaders/wait-block-state.cpp @@ -67,7 +67,8 @@ void WaitBlockState::start() { if (reading_from_db_) { return; } - if (handle_->received_state()) { + bool inited_proof = handle_->id().is_masterchain() ? handle_->inited_proof() : handle_->inited_proof_link(); + if (handle_->received_state() && inited_proof) { reading_from_db_ = true; auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { @@ -107,7 +108,7 @@ void WaitBlockState::start() { }); td::actor::send_closure(manager_, &ValidatorManager::send_get_zero_state_request, handle_->id(), priority_, std::move(P)); - } else if (check_persistent_state_desc()) { + } else if (check_persistent_state_desc() && !handle_->received_state()) { auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { if (R.is_error()) { LOG(WARNING) << "failed to get persistent state: " << R.move_as_error(); diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index 8490567e..9a66ea81 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -346,16 +346,7 @@ void ValidateQuery::start_up() { // return; } } - // 2. learn latest masterchain state and block id - LOG(DEBUG) << "sending get_top_masterchain_state_block() to Manager"; - ++pending; - td::actor::send_closure_later(manager, &ValidatorManager::get_top_masterchain_state_block, - [self = get_self()](td::Result, BlockIdExt>> res) { - LOG(DEBUG) << "got answer to get_top_masterchain_state_block"; - td::actor::send_closure_later( - std::move(self), &ValidateQuery::after_get_latest_mc_state, std::move(res)); - }); - // 3. load state(s) corresponding to previous block(s) + // 2. load state(s) corresponding to previous block(s) prev_states.resize(prev_blocks.size()); for (int i = 0; (unsigned)i < prev_blocks.size(); i++) { // 3.1. load state @@ -368,21 +359,13 @@ void ValidateQuery::start_up() { std::move(self), &ValidateQuery::after_get_shard_state, i, std::move(res)); }); } - // 4. unpack block candidate (while necessary data is being loaded) + // 3. unpack block candidate (while necessary data is being loaded) if (!unpack_block_candidate()) { reject_query("error unpacking block candidate"); return; } - // 5. request masterchain state referred to in the block + // 4. request masterchain handle and state referred to in the block if (!is_masterchain()) { - ++pending; - td::actor::send_closure_later(manager, &ValidatorManager::wait_block_state_short, mc_blkid_, priority(), timeout, - [self = get_self()](td::Result> res) { - LOG(DEBUG) << "got answer to wait_block_state() query for masterchain block"; - td::actor::send_closure_later(std::move(self), &ValidateQuery::after_get_mc_state, - std::move(res)); - }); - // 5.1. request corresponding block handle ++pending; td::actor::send_closure_later(manager, &ValidatorManager::get_block_handle, mc_blkid_, true, [self = get_self()](td::Result res) { @@ -663,6 +646,19 @@ bool ValidateQuery::extract_collated_data() { return true; } +/** + * Send get_top_masterchain_state_block to manager, call after_get_latest_mc_state afterwards + */ +void ValidateQuery::request_latest_mc_state() { + ++pending; + td::actor::send_closure_later(manager, &ValidatorManager::get_top_masterchain_state_block, + [self = get_self()](td::Result, BlockIdExt>> res) { + LOG(DEBUG) << "got answer to get_top_masterchain_state_block"; + td::actor::send_closure_later( + std::move(self), &ValidateQuery::after_get_latest_mc_state, std::move(res)); + }); +} + /** * Callback function called after retrieving the latest masterchain state. * @@ -710,6 +706,7 @@ void ValidateQuery::after_get_latest_mc_state(td::Result> res) { + CHECK(!is_masterchain()); LOG(WARNING) << "in ValidateQuery::after_get_mc_state() for " << mc_blkid_.to_str(); --pending; if (res.is_error()) { @@ -720,6 +717,7 @@ void ValidateQuery::after_get_mc_state(td::Result> res) { fatal_error("cannot process masterchain state for "s + mc_blkid_.to_str()); return; } + request_latest_mc_state(); if (!pending) { if (!try_validate()) { fatal_error("cannot validate new block"); @@ -734,17 +732,21 @@ void ValidateQuery::after_get_mc_state(td::Result> res) { */ void ValidateQuery::got_mc_handle(td::Result res) { LOG(DEBUG) << "in ValidateQuery::got_mc_handle() for " << mc_blkid_.to_str(); - --pending; if (res.is_error()) { fatal_error(res.move_as_error()); return; } - auto handle = res.move_as_ok(); - if (!handle->inited_proof() && mc_blkid_.seqno()) { - fatal_error(-666, "reference masterchain block "s + mc_blkid_.to_str() + " for block " + id_.to_str() + - " does not have a valid proof"); - return; - } + auto mc_handle = res.move_as_ok(); + td::actor::send_closure_later( + manager, &ValidatorManager::wait_block_state, mc_handle, priority(), timeout, + [self = get_self(), id = id_, mc_handle](td::Result> res) { + LOG(DEBUG) << "got answer to wait_block_state() query for masterchain block"; + if (res.is_ok() && mc_handle->id().seqno() > 0 && !mc_handle->inited_proof()) { + res = td::Status::Error(-666, "reference masterchain block "s + mc_handle->id().to_str() + " for block " + + id.to_str() + " does not have a valid proof"); + } + td::actor::send_closure_later(std::move(self), &ValidateQuery::after_get_mc_state, std::move(res)); + }); } /** @@ -778,6 +780,9 @@ void ValidateQuery::after_get_shard_state(int idx, td::Result> r return; } } + if (is_masterchain()) { + request_latest_mc_state(); + } if (!pending) { if (!try_validate()) { fatal_error("cannot validate new block"); diff --git a/validator/impl/validate-query.hpp b/validator/impl/validate-query.hpp index 52d4968a..98cd2493 100644 --- a/validator/impl/validate-query.hpp +++ b/validator/impl/validate-query.hpp @@ -284,6 +284,7 @@ class ValidateQuery : public td::actor::Actor { return actor_id(this); } + void request_latest_mc_state(); void after_get_latest_mc_state(td::Result, BlockIdExt>> res); void after_get_mc_state(td::Result> res); void got_mc_handle(td::Result res); diff --git a/validator/manager.cpp b/validator/manager.cpp index a631bd09..068ea5eb 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -1343,7 +1343,7 @@ void ValidatorManagerImpl::written_handle(BlockHandle handle, td::Promisesecond.actor_, &WaitBlockData::force_read_from_db); } } - if (inited_state) { + if (inited_state && inited_proof) { auto it = wait_state_.find(handle->id()); if (it != wait_state_.end()) { td::actor::send_closure(it->second.actor_, &WaitBlockState::force_read_from_db); From 652f4f01411913227cd554cfe88374ae81e8fe95 Mon Sep 17 00:00:00 2001 From: crStiv Date: Wed, 15 Jan 2025 08:36:46 +0100 Subject: [PATCH 11/61] Update Changelog.md (#1476) Co-authored-by: EmelyanenkoK --- Changelog.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Changelog.md b/Changelog.md index 15e2450d..fd513bc8 100644 --- a/Changelog.md +++ b/Changelog.md @@ -31,7 +31,7 @@ Besides the work of the core team, this update is based on the efforts of @krigg ## 2024.08 Update 1. Introduction of dispatch queues, message envelopes with transaction chain metadata, and explicitly stored msg_queue size, which will be activated by `Config8.version >= 8` and new `Config8.capabilities` bits: `capStoreOutMsgQueueSize`, `capMsgMetadata`, `capDeferMessages`. -2. A number of changes to transcation executor which will activated for `Config8.version >= 8`: +2. A number of changes to transaction executor which will activated for `Config8.version >= 8`: - Check mode on invalid `action_send_msg`. Ignore action if `IGNORE_ERROR` (+2) bit is set, bounce if `BOUNCE_ON_FAIL` (+16) bit is set. - Slightly change random seed generation to fix mix of `addr_rewrite` and `addr`. - Fill in `skipped_actions` for both invalid and valid messages with `IGNORE_ERROR` mode that can't be sent. @@ -103,7 +103,7 @@ Besides the work of the core team, this update is based on the efforts of @akifo * Fix error in proof generation for blocks after merge * Fix most of `block is not applied` issues related to sending too recent block in Proofs * LS now check external messages till `accept_message` (`set_gas`). -3. Improvements in DHT work and storage, CellDb, config.json ammendment, peer misbehavior detection, validator session stats collection, emulator. +3. Improvements in DHT work and storage, CellDb, config.json amendment, peer misbehavior detection, validator session stats collection, emulator. 4. Change in CTOS and XLOAD behavior activated through setting `version >= 5` in `ConfigParam 8;`: * Loading "nested libraries" (i.e. a library cell that points to another library cell) throws an exception. * Loading a library consumes gas for cell load only once (for the library cell), not twice (both for the library cell and the cell in the library). @@ -114,7 +114,7 @@ Besides the work of the Core team, this update is based on the efforts of @XaBbl ## 2023.12 Update 1. Optimized message queue handling, now queue cleaning speed doesn't depend on total queue size - * Cleaning delivered messages using lt augmentation instead of random search / consequtive walk + * Cleaning delivered messages using lt augmentation instead of random search / consecutive walk * Keeping root cell of queue message in memory until outdated (caching) 2. Changes to block collation/validation limits 3. Stop accepting new external message if message queue is overloaded @@ -206,7 +206,7 @@ Besides the work of the core team, this update is based on the efforts of @vtama Besides the work of the core team, this update is based on the efforts of @tvorogme (debug improvements), @AlexeyFSL (WASM builds) and third-party security auditors. ## 2022.08 Update -* Blockchain state serialization now works via separate db-handler which simplfies memory clearing after serialization +* Blockchain state serialization now works via separate db-handler which simplifies memory clearing after serialization * CellDB now works asynchronously which substantially increase database access throughput * Abseil-cpp and crc32 updated: solve issues with compilation on recent OS distributives * Fixed a series of UBs and issues for exotic endianness hosts From f6fa986b3326888a27a1161725c3034a7e558862 Mon Sep 17 00:00:00 2001 From: "Victor S." <53380262+1IxI1@users.noreply.github.com> Date: Wed, 15 Jan 2025 10:39:05 +0300 Subject: [PATCH 12/61] Fix *DATASIZE* opcode log msg (#1465) Co-authored-by: EmelyanenkoK --- crypto/vm/tonops.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/vm/tonops.cpp b/crypto/vm/tonops.cpp index 6c698df4..6eebbc6d 100644 --- a/crypto/vm/tonops.cpp +++ b/crypto/vm/tonops.cpp @@ -1293,7 +1293,7 @@ void register_ton_crypto_ops(OpcodeTable& cp0) { } int exec_compute_data_size(VmState* st, int mode) { - VM_LOG(st) << (mode & 2 ? 'S' : 'C') << "DATASIZE" << (mode & 1 ? "Q" : ""); + VM_LOG(st) << "execute " << (mode & 2 ? 'S' : 'C') << "DATASIZE" << (mode & 1 ? "Q" : ""); Stack& stack = st->get_stack(); stack.check_underflow(2); auto bound = stack.pop_int(); From 62838571ebbb874985ec78146eee9c37868a2c03 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Wed, 15 Jan 2025 07:43:33 +0000 Subject: [PATCH 13/61] Support extra currencies in reserve action with +2 flag (#1429) * Support extra currencies in reserve action with +2 flag * Enable new reserve behavior in version 9 --- crypto/block/block.cpp | 30 ++++++++++++++++++++++++++++++ crypto/block/block.h | 1 + crypto/block/transaction.cpp | 22 +++++++++++++--------- crypto/block/transaction.h | 1 + doc/GlobalVersions.md | 3 ++- validator/impl/validate-query.cpp | 1 + 6 files changed, 48 insertions(+), 10 deletions(-) diff --git a/crypto/block/block.cpp b/crypto/block/block.cpp index 98546a2d..302a2aa4 100644 --- a/crypto/block/block.cpp +++ b/crypto/block/block.cpp @@ -1319,6 +1319,36 @@ CurrencyCollection CurrencyCollection::operator-(td::RefInt256 other_grams) cons } } +bool CurrencyCollection::clamp(const CurrencyCollection& other) { + if (!is_valid() || !other.is_valid()) { + return invalidate(); + } + grams = std::min(grams, other.grams); + vm::Dictionary dict1{extra, 32}, dict2(other.extra, 32); + bool ok = dict1.check_for_each([&](td::Ref cs1, td::ConstBitPtr key, int n) { + CHECK(n == 32); + td::Ref cs2 = dict2.lookup(key, 32); + td::RefInt256 val1 = tlb::t_VarUIntegerPos_32.as_integer(cs1); + if (val1.is_null()) { + return false; + } + td::RefInt256 val2 = cs2.is_null() ? td::zero_refint() : tlb::t_VarUIntegerPos_32.as_integer(cs2); + if (val2.is_null()) { + return false; + } + if (val1 > val2) { + if (val2->sgn() == 0) { + dict1.lookup_delete(key, 32); + } else { + dict1.set(key, 32, cs2); + } + } + return true; + }); + extra = dict1.get_root_cell(); + return ok || invalidate(); +} + bool CurrencyCollection::operator==(const CurrencyCollection& other) const { return is_valid() && other.is_valid() && !td::cmp(grams, other.grams) && (extra.not_null() == other.extra.not_null()) && diff --git a/crypto/block/block.h b/crypto/block/block.h index 56e6dd38..f64f00a8 100644 --- a/crypto/block/block.h +++ b/crypto/block/block.h @@ -390,6 +390,7 @@ struct CurrencyCollection { CurrencyCollection operator-(const CurrencyCollection& other) const; CurrencyCollection operator-(CurrencyCollection&& other) const; CurrencyCollection operator-(td::RefInt256 other_grams) const; + bool clamp(const CurrencyCollection& other); bool store(vm::CellBuilder& cb) const; bool store_or_zero(vm::CellBuilder& cb) const; bool fetch(vm::CellSlice& cs); diff --git a/crypto/block/transaction.cpp b/crypto/block/transaction.cpp index a32bad52..49325957 100644 --- a/crypto/block/transaction.cpp +++ b/crypto/block/transaction.cpp @@ -2760,22 +2760,25 @@ int Transaction::try_action_reserve_currency(vm::CellSlice& cs, ActionPhase& ap, LOG(DEBUG) << "cannot reserve a negative amount: " << reserve.to_str(); return -1; } - if (reserve.grams > ap.remaining_balance.grams) { - if (mode & 2) { - reserve.grams = ap.remaining_balance.grams; + if (mode & 2) { + if (cfg.reserve_extra_enabled) { + if (!reserve.clamp(ap.remaining_balance)) { + LOG(DEBUG) << "failed to clamp reserve amount" << mode; + return -1; + } } else { - LOG(DEBUG) << "cannot reserve " << reserve.grams << " nanograms : only " << ap.remaining_balance.grams - << " available"; - return 37; // not enough grams + reserve.grams = std::min(reserve.grams, ap.remaining_balance.grams); } } + if (reserve.grams > ap.remaining_balance.grams) { + LOG(DEBUG) << "cannot reserve " << reserve.grams << " nanograms : only " << ap.remaining_balance.grams + << " available"; + return 37; // not enough grams + } if (!block::sub_extra_currency(ap.remaining_balance.extra, reserve.extra, newc.extra)) { LOG(DEBUG) << "not enough extra currency to reserve: " << block::CurrencyCollection{0, reserve.extra}.to_str() << " required, only " << block::CurrencyCollection{0, ap.remaining_balance.extra}.to_str() << " available"; - if (mode & 2) { - // TODO: process (mode & 2) correctly by setting res_extra := inf (reserve.extra, ap.remaining_balance.extra) - } return 38; // not enough (extra) funds } newc.grams = ap.remaining_balance.grams - reserve.grams; @@ -3778,6 +3781,7 @@ td::Status FetchConfigParams::fetch_config_params( action_phase_cfg->bounce_on_fail_enabled = config.get_global_version() >= 4; action_phase_cfg->message_skip_enabled = config.get_global_version() >= 8; action_phase_cfg->disable_custom_fess = config.get_global_version() >= 8; + action_phase_cfg->reserve_extra_enabled = config.get_global_version() >= 9; action_phase_cfg->mc_blackhole_addr = config.get_burning_config().blackhole_addr; } { diff --git a/crypto/block/transaction.h b/crypto/block/transaction.h index 20d7cb29..0f6952dc 100644 --- a/crypto/block/transaction.h +++ b/crypto/block/transaction.h @@ -169,6 +169,7 @@ struct ActionPhaseConfig { bool bounce_on_fail_enabled{false}; bool message_skip_enabled{false}; bool disable_custom_fess{false}; + bool reserve_extra_enabled{false}; td::optional mc_blackhole_addr; const MsgPrices& fetch_msg_prices(bool is_masterchain) const { return is_masterchain ? fwd_mc : fwd_std; diff --git a/doc/GlobalVersions.md b/doc/GlobalVersions.md index 5db1ab76..1739b73a 100644 --- a/doc/GlobalVersions.md +++ b/doc/GlobalVersions.md @@ -122,4 +122,5 @@ Operations for working with Merkle proofs, where cells can have non-zero level a ### Other changes - Fix `RAWRESERVE` action with flag `4` (use original balance of the account) by explicitly setting `original_balance` to `balance - msg_balance_remaining`. - Previously it did not work if storage fee was greater than the original balance. -- Jumps to nested continuations of depth more than 8 consume 1 gas for eact subsequent continuation (this does not affect most of TVM code). \ No newline at end of file +- Jumps to nested continuations of depth more than 8 consume 1 gas for eact subsequent continuation (this does not affect most of TVM code). +- Support extra currencies in reserve action with `+2` mode. \ No newline at end of file diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index 9a66ea81..9e4d406e 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -1002,6 +1002,7 @@ bool ValidateQuery::fetch_config_params() { action_phase_cfg_.bounce_on_fail_enabled = config_->get_global_version() >= 4; action_phase_cfg_.message_skip_enabled = config_->get_global_version() >= 8; action_phase_cfg_.disable_custom_fess = config_->get_global_version() >= 8; + action_phase_cfg_.reserve_extra_enabled = config_->get_global_version() >= 9; action_phase_cfg_.mc_blackhole_addr = config_->get_burning_config().blackhole_addr; } { From 2ebc6d6a3c73f558630c8c87893830062e3a18ff Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Wed, 15 Jan 2025 07:45:04 +0000 Subject: [PATCH 14/61] Fix error processing in load_cell (#1467) --- crypto/vm/db/DynamicBagOfCellsDb.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/crypto/vm/db/DynamicBagOfCellsDb.cpp b/crypto/vm/db/DynamicBagOfCellsDb.cpp index d4deae4a..09303758 100644 --- a/crypto/vm/db/DynamicBagOfCellsDb.cpp +++ b/crypto/vm/db/DynamicBagOfCellsDb.cpp @@ -100,8 +100,18 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat return get_cell_info_lazy(level_mask, hash, depth).cell; } td::Result> load_cell(td::Slice hash) override { - TRY_RESULT(loaded_cell, get_cell_info_force(hash).cell->load_cell()); - return std::move(loaded_cell.data_cell); + auto info = hash_table_.get_if_exists(hash); + if (info && info->sync_with_db) { + TRY_RESULT(loaded_cell, info->cell->load_cell()); + return std::move(loaded_cell.data_cell); + } + TRY_RESULT(res, loader_->load(hash, true, *this)); + if (res.status != CellLoader::LoadResult::Ok) { + return td::Status::Error("cell not found"); + } + Ref cell = res.cell(); + hash_table_.apply(hash, [&](CellInfo &info) { update_cell_info_loaded(info, hash, std::move(res)); }); + return cell; } td::Result> load_root(td::Slice hash) override { return load_cell(hash); @@ -145,9 +155,6 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat promise->set_result(std::move(cell)); }); } - CellInfo &get_cell_info_force(td::Slice hash) { - return hash_table_.apply(hash, [&](CellInfo &info) { update_cell_info_force(info, hash); }); - } CellInfo &get_cell_info_lazy(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) { return hash_table_.apply(hash.substr(hash.size() - Cell::hash_bytes), [&](CellInfo &info) { update_cell_info_lazy(info, level_mask, hash, depth); }); From 987c7ca04b204dcb6b962ec6bdb60c4579f07f19 Mon Sep 17 00:00:00 2001 From: dbaranovstonfi <136370214+dbaranovstonfi@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:50:18 +0400 Subject: [PATCH 15/61] emulator: set libraries when libs is NOT empty (#1449) Co-authored-by: dbaranov34 --- emulator/emulator-extern.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emulator/emulator-extern.cpp b/emulator/emulator-extern.cpp index 52c374ed..4e5f17bf 100644 --- a/emulator/emulator-extern.cpp +++ b/emulator/emulator-extern.cpp @@ -615,7 +615,7 @@ const char *tvm_emulator_emulate_run_method(uint32_t len, const char *params_boc emulator->set_vm_verbosity_level(0); emulator->set_gas_limit(gas_limit); emulator->set_c7_raw(c7->fetch(0).as_tuple()); - if (libs.is_empty()) { + if (!libs.is_empty()) { emulator->set_libraries(std::move(libs)); } auto result = emulator->run_get_method(int(method_id), stack); From 799e2d12655536295b21c04a77ff5465a81aaca3 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 30 Dec 2024 22:31:27 +0700 Subject: [PATCH 16/61] [Tolk] Rewrite the type system from Hindley-Milner to static typing FunC's (and Tolk's before this PR) type system is based on Hindley-Milner. This is a common approach for functional languages, where types are inferred from usage through unification. As a result, type declarations are not necessary: () f(a,b) { return a+b; } // a and b now int, since `+` (int, int) While this approach works for now, problems arise with the introduction of new types like bool, where `!x` must handle both int and bool. It will also become incompatible with int32 and other strict integers. This will clash with structure methods, struggle with proper generics, and become entirely impractical for union types. This PR completely rewrites the type system targeting the future. 1) type of any expression is inferred and never changed 2) this is available because dependent expressions already inferred 3) forall completely removed, generic functions introduced (they work like template functions actually, instantiated while inferring) 4) instantiation `<...>` syntax, example: `t.tupleAt(0)` 5) `as` keyword, for example `t.tupleAt(0) as int` 6) methods binding is done along with type inferring, not before ("before", as worked previously, was always a wrong approach) --- crypto/smartcont/tolk-stdlib/common.tolk | 12 +- .../smartcont/tolk-stdlib/gas-payments.tolk | 6 + tolk-tester/tests/a10.tolk | 2 +- tolk-tester/tests/a6.tolk | 3 +- tolk-tester/tests/a6_5.tolk | 2 +- .../tests/allow_post_modification.tolk | 2 +- tolk-tester/tests/assignment-tests.tolk | 28 + tolk-tester/tests/c2.tolk | 4 +- tolk-tester/tests/c2_1.tolk | 2 +- tolk-tester/tests/generics-1.tolk | 150 +++ tolk-tester/tests/invalid-call-1.tolk | 9 +- tolk-tester/tests/invalid-call-9.tolk | 10 + tolk-tester/tests/invalid-declaration-11.tolk | 13 + tolk-tester/tests/invalid-declaration-12.tolk | 16 + tolk-tester/tests/invalid-declaration-13.tolk | 7 + tolk-tester/tests/invalid-generics-1.tolk | 10 + tolk-tester/tests/invalid-generics-10.tolk | 9 + tolk-tester/tests/invalid-generics-11.tolk | 11 + tolk-tester/tests/invalid-generics-2.tolk | 10 + tolk-tester/tests/invalid-generics-3.tolk | 11 + tolk-tester/tests/invalid-generics-4.tolk | 10 + tolk-tester/tests/invalid-generics-5.tolk | 10 + tolk-tester/tests/invalid-generics-6.tolk | 10 + tolk-tester/tests/invalid-generics-7.tolk | 18 + tolk-tester/tests/invalid-generics-8.tolk | 11 + tolk-tester/tests/invalid-generics-9.tolk | 8 + tolk-tester/tests/invalid-mutate-16.tolk | 9 + tolk-tester/tests/invalid-mutate-17.tolk | 13 + tolk-tester/tests/invalid-nopar-4.tolk | 2 +- tolk-tester/tests/invalid-pure-4.tolk | 16 - tolk-tester/tests/invalid-self-4.tolk | 2 +- tolk-tester/tests/invalid-syntax-3.tolk | 2 +- .../{invalid.tolk => invalid-syntax-5.tolk} | 0 tolk-tester/tests/invalid-syntax-6.tolk | 9 + tolk-tester/tests/invalid-syntax-7.tolk | 9 + tolk-tester/tests/invalid-typing-1.tolk | 2 +- tolk-tester/tests/invalid-typing-12.tolk | 10 + tolk-tester/tests/invalid-typing-3.tolk | 2 +- tolk-tester/tests/invalid-typing-4.tolk | 4 +- tolk-tester/tests/invalid-typing-5.tolk | 5 +- tolk-tester/tests/invalid-typing-6.tolk | 2 +- tolk-tester/tests/invalid-typing-7.tolk | 9 + tolk-tester/tests/invalid-typing-8.tolk | 8 + tolk-tester/tests/invalid-typing-9.tolk | 13 + tolk-tester/tests/logical-operators.tolk | 7 +- tolk-tester/tests/mutate-methods.tolk | 4 +- tolk-tester/tests/null-keyword.tolk | 28 +- tolk-tester/tests/op_priority.tolk | 2 +- tolk-tester/tests/pure-functions.tolk | 2 +- .../tests/remove-unused-functions.tolk | 4 +- tolk-tester/tests/self-keyword.tolk | 2 +- tolk-tester/tests/test-math.tolk | 5 +- tolk-tester/tests/try-func.tolk | 7 +- tolk-tester/tests/var-apply.tolk | 28 +- tolk/CMakeLists.txt | 7 +- tolk/abscode.cpp | 7 +- tolk/analyzer.cpp | 47 +- tolk/ast-from-tokens.cpp | 337 +++-- tolk/ast-replacer.h | 96 +- tolk/ast-replicator.h | 255 ++++ tolk/ast-stringifier.h | 127 +- tolk/ast-visitor.h | 71 +- tolk/ast.cpp | 61 +- tolk/ast.h | 644 ++++++--- tolk/builtins.cpp | 210 +-- tolk/codegen.cpp | 27 +- tolk/compiler-state.cpp | 4 + tolk/compiler-state.h | 2 +- tolk/constant-evaluator.cpp | 16 +- tolk/fwd-declarations.h | 3 +- tolk/generics-helpers.cpp | 272 ++++ tolk/generics-helpers.h | 64 + tolk/lexer.cpp | 14 +- tolk/lexer.h | 13 +- tolk/pipe-ast-to-legacy.cpp | 448 ++++--- tolk/pipe-calc-rvalue-lvalue.cpp | 81 +- tolk/pipe-check-pure-impure.cpp | 38 +- tolk/pipe-check-rvalue-lvalue.cpp | 92 +- tolk/pipe-constant-folding.cpp | 12 +- tolk/pipe-detect-unreachable.cpp | 19 +- tolk/pipe-discover-parse-sources.cpp | 14 +- tolk/pipe-find-unused-symbols.cpp | 4 +- tolk/pipe-generate-fif-output.cpp | 19 +- tolk/pipe-infer-check-types.cpp | 524 -------- tolk/pipe-infer-types-and-calls.cpp | 1149 +++++++++++++++++ tolk/pipe-refine-lvalue-for-mutate.cpp | 70 +- tolk/pipe-register-symbols.cpp | 200 ++- tolk/pipe-resolve-identifiers.cpp | 350 +++++ tolk/pipe-resolve-symbols.cpp | 272 ---- tolk/pipeline.h | 34 +- tolk/platform-utils.h | 4 + tolk/src-file.cpp | 23 +- tolk/src-file.h | 17 +- tolk/symtable.cpp | 59 +- tolk/symtable.h | 68 +- tolk/tolk.cpp | 31 +- tolk/tolk.h | 46 +- tolk/type-expr.h | 131 -- tolk/type-system.cpp | 684 ++++++++++ tolk/type-system.h | 405 ++++++ tolk/unify-types.cpp | 454 ------- 101 files changed, 5402 insertions(+), 2713 deletions(-) create mode 100644 tolk-tester/tests/assignment-tests.tolk create mode 100644 tolk-tester/tests/generics-1.tolk create mode 100644 tolk-tester/tests/invalid-call-9.tolk create mode 100644 tolk-tester/tests/invalid-declaration-11.tolk create mode 100644 tolk-tester/tests/invalid-declaration-12.tolk create mode 100644 tolk-tester/tests/invalid-declaration-13.tolk create mode 100644 tolk-tester/tests/invalid-generics-1.tolk create mode 100644 tolk-tester/tests/invalid-generics-10.tolk create mode 100644 tolk-tester/tests/invalid-generics-11.tolk create mode 100644 tolk-tester/tests/invalid-generics-2.tolk create mode 100644 tolk-tester/tests/invalid-generics-3.tolk create mode 100644 tolk-tester/tests/invalid-generics-4.tolk create mode 100644 tolk-tester/tests/invalid-generics-5.tolk create mode 100644 tolk-tester/tests/invalid-generics-6.tolk create mode 100644 tolk-tester/tests/invalid-generics-7.tolk create mode 100644 tolk-tester/tests/invalid-generics-8.tolk create mode 100644 tolk-tester/tests/invalid-generics-9.tolk create mode 100644 tolk-tester/tests/invalid-mutate-16.tolk create mode 100644 tolk-tester/tests/invalid-mutate-17.tolk delete mode 100644 tolk-tester/tests/invalid-pure-4.tolk rename tolk-tester/tests/{invalid.tolk => invalid-syntax-5.tolk} (100%) create mode 100644 tolk-tester/tests/invalid-syntax-6.tolk create mode 100644 tolk-tester/tests/invalid-syntax-7.tolk create mode 100644 tolk-tester/tests/invalid-typing-12.tolk create mode 100644 tolk-tester/tests/invalid-typing-7.tolk create mode 100644 tolk-tester/tests/invalid-typing-8.tolk create mode 100644 tolk-tester/tests/invalid-typing-9.tolk create mode 100644 tolk/ast-replicator.h create mode 100644 tolk/generics-helpers.cpp create mode 100644 tolk/generics-helpers.h delete mode 100644 tolk/pipe-infer-check-types.cpp create mode 100644 tolk/pipe-infer-types-and-calls.cpp create mode 100644 tolk/pipe-resolve-identifiers.cpp delete mode 100644 tolk/pipe-resolve-symbols.cpp delete mode 100644 tolk/type-expr.h create mode 100644 tolk/type-system.cpp create mode 100644 tolk/type-system.h delete mode 100644 tolk/unify-types.cpp diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index dec12e23..03638f0a 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -17,17 +17,17 @@ fun createEmptyTuple(): tuple /// Appends a value to tuple, resulting in `Tuple t' = (x1, ..., xn, value)`. /// If its size exceeds 255, throws a type check exception. @pure -fun tuplePush(mutate self: tuple, value: X): void +fun tuplePush(mutate self: tuple, value: T): void asm "TPUSH"; /// Returns the first element of a non-empty tuple. @pure -fun tupleFirst(t: tuple): X +fun tupleFirst(t: tuple): T asm "FIRST"; /// Returns the [`index`]-th element of a tuple. @pure -fun tupleAt(t: tuple, index: int): X +fun tupleAt(t: tuple, index: int): T builtin; /// Returns the size of a tuple (elements count in it). @@ -37,7 +37,7 @@ fun tupleSize(t: tuple): int /// Returns the last element of a non-empty tuple. @pure -fun tupleLast(t: tuple): int +fun tupleLast(t: tuple): T asm "LAST"; @@ -306,11 +306,11 @@ fun getBuilderDepth(b: builder): int */ /// Dump a variable [x] to the debug log. -fun debugPrint(x: X): void +fun debugPrint(x: T): void builtin; /// Dump a string [x] to the debug log. -fun debugPrintString(x: X): void +fun debugPrintString(x: T): void builtin; /// Dumps the stack (at most the top 255 values) and shows the total stack depth. diff --git a/crypto/smartcont/tolk-stdlib/gas-payments.tolk b/crypto/smartcont/tolk-stdlib/gas-payments.tolk index 1dc6f3f8..1965cc6a 100644 --- a/crypto/smartcont/tolk-stdlib/gas-payments.tolk +++ b/crypto/smartcont/tolk-stdlib/gas-payments.tolk @@ -61,3 +61,9 @@ fun calculateOriginalMessageFee(workchain: int, incomingFwdFee: int): int /// If it has no debt, `0` is returned. fun getMyStorageDuePayment(): int asm "DUEPAYMENT"; + +/// Returns the amount of nanotoncoins charged for storage. +/// (during storage phase preceeding to current computation phase) +@pure +fun getMyStoragePaidPayment(): int + asm "STORAGEFEES"; diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index d46397c6..7301f1d5 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -35,7 +35,7 @@ fun test88(x: int) { } @method_id(89) -fun test89(last: int) { +fun test89(last: int): (int, int, int, int) { var t: tuple = createEmptyTuple(); t.tuplePush(1); t.tuplePush(2); diff --git a/tolk-tester/tests/a6.tolk b/tolk-tester/tests/a6.tolk index 7f2c3946..32fd3364 100644 --- a/tolk-tester/tests/a6.tolk +++ b/tolk-tester/tests/a6.tolk @@ -9,6 +9,7 @@ fun calc_phi(): int { repeat (70) { n*=10; }; var p= 1; var `q`=1; + _=`q`; do { (p,q)=(q,p+q); } while (q <= n); //;; @@ -27,7 +28,7 @@ fun calc_sqrt2(): int { return mulDivRound(p, n, q); } -fun calc_root(m: auto): auto { +fun calc_root(m: int) { var base: int=1; repeat(70) { base *= 10; } var (a, b, c) = (1,0,-m); diff --git a/tolk-tester/tests/a6_5.tolk b/tolk-tester/tests/a6_5.tolk index 8b300c0c..43fd59c5 100644 --- a/tolk-tester/tests/a6_5.tolk +++ b/tolk-tester/tests/a6_5.tolk @@ -1,5 +1,5 @@ @deprecated -fun twice(f: auto, x: auto): auto { +fun twice(f: int -> int, x: int) { return f (f (x)); } diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index 5e0ce6b9..191bcf08 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -138,5 +138,5 @@ fun main() { inc CALLDICT // self newY }> """ -@code_hash 33262590582878205026101577472505372101182291690814957175155528952950621243206 +@code_hash 7627024945492125068389905298530400936797031708759561372406088054030801992712 */ diff --git a/tolk-tester/tests/assignment-tests.tolk b/tolk-tester/tests/assignment-tests.tolk new file mode 100644 index 00000000..89de8cf4 --- /dev/null +++ b/tolk-tester/tests/assignment-tests.tolk @@ -0,0 +1,28 @@ +fun extractFromTypedTuple(params: [int]) { + var [payload: int] = params; + return payload + 10; +} + +@method_id(101) +fun test101(x: int) { + var params = [x]; + return extractFromTypedTuple(params); +} + +fun autoInferIntNull(x: int) { + if (x > 10) { return null; } + return x; +} + +fun main(value: int) { + var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); + if (x == null && y == null) { return null; } + return x == null || y == null ? -1 : x + y; +} + +/** +@testcase | 0 | 3 | 9 +@testcase | 0 | 6 | -1 +@testcase | 0 | 11 | (null) +@testcase | 101 | 78 | 88 +*/ diff --git a/tolk-tester/tests/c2.tolk b/tolk-tester/tests/c2.tolk index ec8d32da..9b56a9c5 100644 --- a/tolk-tester/tests/c2.tolk +++ b/tolk-tester/tests/c2.tolk @@ -4,7 +4,7 @@ fun check_assoc(a: int, b: int, c: int): int { return op(op(a, b), c) == op(a, op(b, c)); } -fun unnamed_args(_: int, _: slice, _: auto): auto { +fun unnamed_args(_: int, _: slice, _: int) { return true; } @@ -14,7 +14,7 @@ fun main(x: int, y: int, z: int): int { } @method_id(101) -fun test101(x: int, z: int): auto { +fun test101(x: int, z: int) { return unnamed_args(x, "asdf", z); } diff --git a/tolk-tester/tests/c2_1.tolk b/tolk-tester/tests/c2_1.tolk index 4e52b9ee..fc16b436 100644 --- a/tolk-tester/tests/c2_1.tolk +++ b/tolk-tester/tests/c2_1.tolk @@ -1,4 +1,4 @@ -fun check_assoc(op: auto, a: int, b: int, c: int) { +fun check_assoc(op: (int, int) -> int, a: int, b: int, c: int) { return op(op(a, b), c) == op(a, op(b, c)); } diff --git a/tolk-tester/tests/generics-1.tolk b/tolk-tester/tests/generics-1.tolk new file mode 100644 index 00000000..0d872cc1 --- /dev/null +++ b/tolk-tester/tests/generics-1.tolk @@ -0,0 +1,150 @@ +fun eq1(value: X): X { return value; } +fun eq2(value: X) { return value; } +fun eq3(value: X): X { var cp: [X] = [eq1(value)]; var ((([v: X]))) = cp; return v; } +fun eq4(value: X) { return eq1(value); } + +@method_id(101) +fun test101(x: int) { + var (a, b, c) = (x, (x,x), [x,x]); + return (eq1(a), eq1(b), eq1(c), eq2(a), eq2(b), eq2(c), eq3(a), eq4(b), eq3(createEmptyTuple())); +} + +fun getTwo(): X { return 2 as X; } + +fun takeInt(a: int) { return a; } + +@method_id(102) +fun test102(): (int, int, int, [(int, int)]) { + var a: int = getTwo(); + var _: int = getTwo(); + var b = getTwo() as int; + var c: int = 1 ? getTwo() : getTwo(); + var c redef = getTwo(); + return (eq1(a), eq2(b), takeInt(getTwo()), [(getTwo(), getTwo())]); +} + +@method_id(103) +fun test103(first: int): (int, int, int) { + var t = createEmptyTuple(); + var cs = beginCell().storeInt(100, 32).endCell().beginParse(); + t.tuplePush(first); + t.tuplePush(2); + t.tuplePush(cs); + cs = t.tupleAt(2); + cs = t.tupleAt(2) as slice; + return (t.tupleAt(0), cs.loadInt(32), t.tupleAt(2).loadInt(32)); +} + +fun manyEq(a: T1, b: T2, c: T3): [T1, T2, T3] { + return [a, b, c]; +} + +@method_id(104) +fun test104(f: int) { + return ( + manyEq(1 ? 1 : 1, f ? 0 : null, !f ? getTwo() as int : null), + manyEq((f ? null as int : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool()), 0, eq4(f)) + ); +} + +fun calcSum(x: X, y: X) { return x + y; } + +@method_id(105) +fun test105() { + if (0) { calcSum(((0)), null); } + return (calcSum(1, 2)); +} + +fun calcYPlus1(value: Y) { return value + 1; } +fun calcLoad32(cs: slice) { return cs.loadInt(32); } +fun calcTensorPlus1(tens: (int, int)) { var (f, s) = tens; return (f + 1, s + 1); } +fun calcTensorMul2(tens: (int, int)) { var (f, s) = tens; return (f * 2, s * 2); } +fun cellToSlice(c: cell) { return c.beginParse(); } +fun abstractTransform(xToY: (X) -> Y, yToR: (((Y))) -> R, initialX: X): R { + var y = xToY(initialX); + return yToR(y); +} + +@method_id(106) +fun test106() { + var c = beginCell().storeInt(106, 32).endCell(); + return [ + abstractTransform(cellToSlice, calcLoad32, c), + abstractTransform(calcYPlus1, calcYPlus1, 0), + abstractTransform(calcTensorPlus1, calcTensorMul2, (2, 2)) + ]; +} + +fun callTupleFirst(t: X): Y { return t.tupleFirst(); } +fun callTuplePush(mutate self: T, v1: V, v2: V): self { self.tuplePush(v1); tuplePush(mutate self, v2); return self; } +fun getTupleLastInt(t: tuple) { return t.tupleLast(); } +fun getTupleSize(t: tuple) { return t.tupleSize(); } +fun callAnyFn(f: (TObj) -> TResult, arg: TObj) { return f(arg); } +fun callAnyFn2(f: TCallback, arg: tuple) { return f(arg); } + +global t107: tuple; + +@method_id(107) +fun test107() { + t107 = createEmptyTuple(); + callTuplePush(mutate t107, 1, 2); + t107.callTuplePush(3, 4).callTuplePush(5, 6); + var first: int = t107.callTupleFirst(); + return ( + callAnyFn(getTupleSize, t107), + callAnyFn2(getTupleSize, t107), + first, + callTupleFirst(t107) as int, + callAnyFn(getTupleLastInt, t107), + callAnyFn2(getTupleLastInt, t107) + ); +} + +global g108: int; + +fun inc108(by: int) { g108 += by; } +fun getInc108() { return inc108; } +fun returnResult(f: () -> RetT): RetT { return f(); } +fun applyAndReturn(f: () -> (ArgT) -> RetT, arg: ArgT): () -> ArgT -> RetT { + f()(arg); + return f; +} + +@method_id(108) +fun test108() { + g108 = 0; + getInc108()(1); + returnResult<(int) -> void>(getInc108)(2); + applyAndReturn(getInc108, 10)()(10); + returnResult(getInc108)(2); + applyAndReturn(getInc108, 10)()(10); + return g108; +} + +fun main(x: int): (int, [[int, int]]) { + try { if(x) { throw (1, x); } } + catch (excNo, arg) { return (arg as int, [[eq2(arg as int), getTwo()]]); } + return (0, [[x, 1]]); +} + +/** +@testcase | 0 | 1 | 1 [ [ 1 2 ] ] +@testcase | 101 | 0 | 0 0 0 [ 0 0 ] 0 0 0 [ 0 0 ] 0 0 0 [] +@testcase | 102 | | 2 2 2 [ 2 2 ] +@testcase | 103 | 0 | 0 100 100 +@testcase | 104 | 0 | [ 1 (null) 2 ] [ 2 -1 0 0 ] +@testcase | 105 | | 3 +@testcase | 106 | | [ 106 2 6 6 ] +@testcase | 107 | | 6 6 1 1 6 6 +@testcase | 108 | | 45 + +@fif_codegen DECLPROC eq1 +@fif_codegen DECLPROC eq1 +@fif_codegen DECLPROC eq1<(int,int)> +@fif_codegen DECLPROC eq1<[int,int]> +@fif_codegen DECLPROC getTwo + +@fif_codegen_avoid DECLPROC eq1 +@fif_codegen_avoid DECLPROC eq2 +@fif_codegen_avoid DECLPROC eq3 + */ diff --git a/tolk-tester/tests/invalid-call-1.tolk b/tolk-tester/tests/invalid-call-1.tolk index 57a33c4b..3542f580 100644 --- a/tolk-tester/tests/invalid-call-1.tolk +++ b/tolk-tester/tests/invalid-call-1.tolk @@ -1,9 +1,10 @@ -fun main() { - return true(); +const asdf = 1; + +fun main(x: int) { + return x.asdf(); } /** @compilation_should_fail -The message is weird now, but later I'll rework error messages anyway. -@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??2 with int +@stderr calling a non-function */ diff --git a/tolk-tester/tests/invalid-call-9.tolk b/tolk-tester/tests/invalid-call-9.tolk new file mode 100644 index 00000000..87eb61e8 --- /dev/null +++ b/tolk-tester/tests/invalid-call-9.tolk @@ -0,0 +1,10 @@ +fun getOne() { return 1; } + +fun main() { + return getOne(); +} + +/** +@compilation_should_fail +@stderr calling a not generic function with generic T + */ diff --git a/tolk-tester/tests/invalid-declaration-11.tolk b/tolk-tester/tests/invalid-declaration-11.tolk new file mode 100644 index 00000000..75ebb450 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-11.tolk @@ -0,0 +1,13 @@ +// this function is declared incorrectly, +// since it should return 2 values onto a stack (1 for returned slice, 1 for mutated int) +// but contains not 2 numbers in asm ret_order +fun loadAddress2(mutate self: int): slice + asm( -> 1 0 2) "LDMSGADDR"; + +fun main(){} + +/** +@compilation_should_fail +@stderr ret_order (after ->) expected to contain 2 numbers +@stderr asm( -> 1 0 2) + */ diff --git a/tolk-tester/tests/invalid-declaration-12.tolk b/tolk-tester/tests/invalid-declaration-12.tolk new file mode 100644 index 00000000..25ae9de6 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-12.tolk @@ -0,0 +1,16 @@ +fun proxy(x: int) { + return factorial(x); +} + +fun factorial(x: int) { + if (x <= 0) { + return 1; + } + return x * proxy(x-1); +} + +/** +@compilation_should_fail +@stderr could not infer return type of `factorial`, because it appears in a recursive call chain +@stderr fun factorial + */ diff --git a/tolk-tester/tests/invalid-declaration-13.tolk b/tolk-tester/tests/invalid-declaration-13.tolk new file mode 100644 index 00000000..758a4f21 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-13.tolk @@ -0,0 +1,7 @@ +const c: slice = 123 + 456; + +/** +@compilation_should_fail +@stderr expression type does not match declared type +@stderr const c + */ diff --git a/tolk-tester/tests/invalid-generics-1.tolk b/tolk-tester/tests/invalid-generics-1.tolk new file mode 100644 index 00000000..c8ff7fec --- /dev/null +++ b/tolk-tester/tests/invalid-generics-1.tolk @@ -0,0 +1,10 @@ +fun f(v: int, x: X) {} + +fun failCantDeduceWithoutArgument() { + return f(1); +} + +/** +@compilation_should_fail +@stderr can not deduce X for generic function `f` + */ diff --git a/tolk-tester/tests/invalid-generics-10.tolk b/tolk-tester/tests/invalid-generics-10.tolk new file mode 100644 index 00000000..c7f72bf4 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-10.tolk @@ -0,0 +1,9 @@ +fun invalidReferencingGenericMethodWithoutGeneric() { + var t = createEmptyTuple(); + var cb = t.tupleLast; +} + +/** +@compilation_should_fail +@stderr can not use a generic function `tupleLast` as non-call + */ diff --git a/tolk-tester/tests/invalid-generics-11.tolk b/tolk-tester/tests/invalid-generics-11.tolk new file mode 100644 index 00000000..a399bc91 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-11.tolk @@ -0,0 +1,11 @@ +global gVar: int; + +fun main() { + var x = gVar; + return x; +} + +/** +@compilation_should_fail +@stderr generic T not expected here + */ diff --git a/tolk-tester/tests/invalid-generics-2.tolk b/tolk-tester/tests/invalid-generics-2.tolk new file mode 100644 index 00000000..15594433 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-2.tolk @@ -0,0 +1,10 @@ +fun f(v: int, x: T) {} + +fun failCantDeduceWithPlainNull() { + return f(0, null); +} + +/** +@compilation_should_fail +@stderr can not deduce T for generic function `f` + */ diff --git a/tolk-tester/tests/invalid-generics-3.tolk b/tolk-tester/tests/invalid-generics-3.tolk new file mode 100644 index 00000000..72b7df0e --- /dev/null +++ b/tolk-tester/tests/invalid-generics-3.tolk @@ -0,0 +1,11 @@ +fun f(x: T, y: T) {} + +fun failIncompatibleTypesForT() { + return f(32, ""); +} + +/** +@compilation_should_fail +@stderr T is both int and slice for generic function `f` +@stderr f(32 + */ diff --git a/tolk-tester/tests/invalid-generics-4.tolk b/tolk-tester/tests/invalid-generics-4.tolk new file mode 100644 index 00000000..07472ba3 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-4.tolk @@ -0,0 +1,10 @@ +fun f(x: T): void asm "NOP"; + +fun failInstantiatingAsmFunctionWithNon1Slot() { + f((1, 2)); +} + +/** +@compilation_should_fail +@stderr can not call `f` with T=(int, int), because it occupies 2 stack slots in TVM, not 1 + */ diff --git a/tolk-tester/tests/invalid-generics-5.tolk b/tolk-tester/tests/invalid-generics-5.tolk new file mode 100644 index 00000000..4d4f2967 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-5.tolk @@ -0,0 +1,10 @@ +fun f(x: T): void asm "NOP"; + +fun failUsingGenericFunctionPartially() { + var cb = f; +} + +/** +@compilation_should_fail +@stderr can not use a generic function `f` as non-call + */ diff --git a/tolk-tester/tests/invalid-generics-6.tolk b/tolk-tester/tests/invalid-generics-6.tolk new file mode 100644 index 00000000..73e6403f --- /dev/null +++ b/tolk-tester/tests/invalid-generics-6.tolk @@ -0,0 +1,10 @@ +fun eq(t: X) { return t; } + +fun failUsingGenericFunctionPartially() { + var cb = createEmptyTuple().eq().eq().tuplePush; +} + +/** +@compilation_should_fail +@stderr can not use a generic function `tuplePush` as non-call + */ diff --git a/tolk-tester/tests/invalid-generics-7.tolk b/tolk-tester/tests/invalid-generics-7.tolk new file mode 100644 index 00000000..b51bb82c --- /dev/null +++ b/tolk-tester/tests/invalid-generics-7.tolk @@ -0,0 +1,18 @@ +fun failOnInstantiation(a: slice) { + var b: slice = foo(a); +} + +fun bar(value: X) : X { + return 1; +} +fun foo(value: X) : X { + return bar(value); +} + +/** +@compilation_should_fail +@stderr while instantiating generic function `foo` +@stderr while instantiating generic function `bar` +@stderr can not convert type `int` to return type `slice` +@stderr return 1 + */ diff --git a/tolk-tester/tests/invalid-generics-8.tolk b/tolk-tester/tests/invalid-generics-8.tolk new file mode 100644 index 00000000..d2c24e53 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-8.tolk @@ -0,0 +1,11 @@ +fun withT1T2(a: (T1, T2)) {} + +fun wrongTCountPassed() { + withT1T2((5, "")); +} + +/** +@compilation_should_fail +@stderr wrong count of generic T: expected 2, got 1 +@stderr + */ diff --git a/tolk-tester/tests/invalid-generics-9.tolk b/tolk-tester/tests/invalid-generics-9.tolk new file mode 100644 index 00000000..73fd6f87 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-9.tolk @@ -0,0 +1,8 @@ +fun invalidProvidingGenericTsToNotGeneric() { + beginCell(); +} + +/** +@compilation_should_fail +@stderr calling a not generic function with generic T + */ diff --git a/tolk-tester/tests/invalid-mutate-16.tolk b/tolk-tester/tests/invalid-mutate-16.tolk new file mode 100644 index 00000000..9da6e253 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-16.tolk @@ -0,0 +1,9 @@ +fun cantCallMutatingFunctionWithAssignmentLValue() { + var t: tuple = createEmptyTuple(); + (t = createEmptyTuple()).tuplePush(1); +} + +/** +@compilation_should_fail +@stderr assignment can not be used as lvalue + */ diff --git a/tolk-tester/tests/invalid-mutate-17.tolk b/tolk-tester/tests/invalid-mutate-17.tolk new file mode 100644 index 00000000..9327f07d --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-17.tolk @@ -0,0 +1,13 @@ +@pure +fun tupleMut(mutate self: tuple): int + asm "TLEN"; + +fun main() { + var t = createEmptyTuple(); + return [[t.tupleMut]]; +} + +/** +@compilation_should_fail +@stderr saving `tupleMut` into a variable is impossible, since it has `mutate` parameters + */ diff --git a/tolk-tester/tests/invalid-nopar-4.tolk b/tolk-tester/tests/invalid-nopar-4.tolk index 6e833f99..033c483e 100644 --- a/tolk-tester/tests/invalid-nopar-4.tolk +++ b/tolk-tester/tests/invalid-nopar-4.tolk @@ -4,5 +4,5 @@ fun load_u32(cs: slice): (slice, int) { /** @compilation_should_fail -@stderr expected `(`, got `32` +@stderr expected `;`, got `32` */ diff --git a/tolk-tester/tests/invalid-pure-4.tolk b/tolk-tester/tests/invalid-pure-4.tolk deleted file mode 100644 index 868498f6..00000000 --- a/tolk-tester/tests/invalid-pure-4.tolk +++ /dev/null @@ -1,16 +0,0 @@ -global set: int; - -@pure -fun someF(): int { - var set redef = 0; - return set; -} - -/** -@compilation_should_fail -@stderr -""" -an impure operation in a pure function -var set -""" -*/ diff --git a/tolk-tester/tests/invalid-self-4.tolk b/tolk-tester/tests/invalid-self-4.tolk index f4856a46..0be6b9e4 100644 --- a/tolk-tester/tests/invalid-self-4.tolk +++ b/tolk-tester/tests/invalid-self-4.tolk @@ -4,6 +4,6 @@ fun cantReturnNothingFromSelf(mutate self: int): self { /** @compilation_should_fail -@stderr missing return; forgot `return self`? +@stderr missing return @stderr } */ diff --git a/tolk-tester/tests/invalid-syntax-3.tolk b/tolk-tester/tests/invalid-syntax-3.tolk index 26ce82ac..259ea795 100644 --- a/tolk-tester/tests/invalid-syntax-3.tolk +++ b/tolk-tester/tests/invalid-syntax-3.tolk @@ -4,5 +4,5 @@ fun main(x: int) { /** @compilation_should_fail -@stderr null is not a function: use `null`, not `null()` +@stderr calling a non-function */ diff --git a/tolk-tester/tests/invalid.tolk b/tolk-tester/tests/invalid-syntax-5.tolk similarity index 100% rename from tolk-tester/tests/invalid.tolk rename to tolk-tester/tests/invalid-syntax-5.tolk diff --git a/tolk-tester/tests/invalid-syntax-6.tolk b/tolk-tester/tests/invalid-syntax-6.tolk new file mode 100644 index 00000000..12e02645 --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-6.tolk @@ -0,0 +1,9 @@ +fun main() { + var a = 1; + (a += 1) += 2; +} + +/** +@compilation_should_fail +@stderr assignment can not be used as lvalue +*/ diff --git a/tolk-tester/tests/invalid-syntax-7.tolk b/tolk-tester/tests/invalid-syntax-7.tolk new file mode 100644 index 00000000..9f63ac10 --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-7.tolk @@ -0,0 +1,9 @@ +fun main() { + var x = 1; + x += (var y = 2); +} + +/** +@compilation_should_fail +@stderr expected , got `var` +*/ diff --git a/tolk-tester/tests/invalid-typing-1.tolk b/tolk-tester/tests/invalid-typing-1.tolk index a0fe296d..0089bd62 100644 --- a/tolk-tester/tests/invalid-typing-1.tolk +++ b/tolk-tester/tests/invalid-typing-1.tolk @@ -6,5 +6,5 @@ fun main() { /** @compilation_should_fail @stderr .tolk:2 -@stderr expected , got `scli` +@stderr unknown type name `scli` */ diff --git a/tolk-tester/tests/invalid-typing-12.tolk b/tolk-tester/tests/invalid-typing-12.tolk new file mode 100644 index 00000000..3a5b1fe2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-12.tolk @@ -0,0 +1,10 @@ +fun failAssignNullToTensor() { + var ab = (1, 2); + ab = null; + return ab; +} + +/** +@compilation_should_fail +@stderr can not assign `null` to variable of type `(int, int)` + */ diff --git a/tolk-tester/tests/invalid-typing-3.tolk b/tolk-tester/tests/invalid-typing-3.tolk index fb4b0bc5..ac019a42 100644 --- a/tolk-tester/tests/invalid-typing-3.tolk +++ b/tolk-tester/tests/invalid-typing-3.tolk @@ -15,5 +15,5 @@ fun cantMixDifferentThis() { /** @compilation_should_fail -@stderr cannot apply function appendBuilder : builder -> (builder, ()) to arguments of type int: cannot unify type int with builder +@stderr can not call method for `builder` with object of type `int` */ diff --git a/tolk-tester/tests/invalid-typing-4.tolk b/tolk-tester/tests/invalid-typing-4.tolk index 0e655369..1ee71290 100644 --- a/tolk-tester/tests/invalid-typing-4.tolk +++ b/tolk-tester/tests/invalid-typing-4.tolk @@ -7,8 +7,6 @@ fun cantCallNotChainedMethodsInAChain(x: int) { } /** -The error is very weird, but nevertheless, the type system prevents of doing such errors. - @compilation_should_fail -@stderr cannot apply function incNotChained : int -> (int, ()) to arguments of type (): cannot unify type () with int +@stderr can not call method for `int` with object of type `void` */ diff --git a/tolk-tester/tests/invalid-typing-5.tolk b/tolk-tester/tests/invalid-typing-5.tolk index ba3450de..9d8cd480 100644 --- a/tolk-tester/tests/invalid-typing-5.tolk +++ b/tolk-tester/tests/invalid-typing-5.tolk @@ -7,8 +7,7 @@ fun failWhenReturnANotChainedValue(x: int): int { } /** -The error is very weird, but nevertheless, the type system prevents of doing such errors. - @compilation_should_fail -@stderr previous function return type int cannot be unified with return statement expression type (): cannot unify type () with int +@stderr x.incNotChained() +@stderr can not convert type `void` to return type `int` */ diff --git a/tolk-tester/tests/invalid-typing-6.tolk b/tolk-tester/tests/invalid-typing-6.tolk index dcdab5f1..da0ac9bb 100644 --- a/tolk-tester/tests/invalid-typing-6.tolk +++ b/tolk-tester/tests/invalid-typing-6.tolk @@ -4,5 +4,5 @@ fun failWhenTernaryConditionNotInt(cs: slice) { /** @compilation_should_fail -@stderr condition of ternary ?: operator must be an integer +@stderr condition of ternary operator must be an integer */ diff --git a/tolk-tester/tests/invalid-typing-7.tolk b/tolk-tester/tests/invalid-typing-7.tolk new file mode 100644 index 00000000..c192a05b --- /dev/null +++ b/tolk-tester/tests/invalid-typing-7.tolk @@ -0,0 +1,9 @@ +fun failAssignPlainNullToVariable() { + var x = null; +} + +/** +@compilation_should_fail +@stderr can not infer type of `x`, it's always null +@stderr specify its type with `x: ` or use `null as ` + */ diff --git a/tolk-tester/tests/invalid-typing-8.tolk b/tolk-tester/tests/invalid-typing-8.tolk new file mode 100644 index 00000000..d696e132 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-8.tolk @@ -0,0 +1,8 @@ +fun failExplicitCastIncompatible(c: cell) { + return c as slice; +} + +/** +@compilation_should_fail +@stderr type `cell` can not be cast to `slice` + */ diff --git a/tolk-tester/tests/invalid-typing-9.tolk b/tolk-tester/tests/invalid-typing-9.tolk new file mode 100644 index 00000000..a0d5ee04 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-9.tolk @@ -0,0 +1,13 @@ +fun getTupleLastGetter(): tuple -> X { + return tupleLast; +} + +fun failTypeMismatch() { + var t = createEmptyTuple(); + var c: cell = getTupleLastGetter()(t); +} + +/** +@compilation_should_fail +@stderr can not assign `int` to variable of type `cell` + */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index e9774f3f..9e21a968 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -54,7 +54,8 @@ fun testDict(last: int) { @method_id(105) fun testNotNull(x: int) { - return [x == null, null == x, !(x == null), null == null, +(null != null)]; + // return [x == null, null == x, !(x == null), null == null, +(null != null)]; + return [x == null, null == x, !(x == null)]; } @method_id(106) @@ -144,8 +145,8 @@ fun main() { @testcase | 104 | 50 | 3 5 -1 @testcase | 104 | 100 | 3 5 5 @testcase | 104 | 0 | 3 -1 5 -@testcase | 105 | 0 | [ 0 0 -1 -1 0 ] -@testcase | 105 | null | [ -1 -1 0 -1 0 ] +@testcase | 105 | 0 | [ 0 0 -1 ] +@testcase | 105 | null | [ -1 -1 0 ] @testcase | 106 | | [ 0 0 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ 0 -1 ] @testcase | 107 | | [ -1 -1 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ -1 0 ] @testcase | 108 | 1 2 | -1 diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index 73a6591b..816e4c8d 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -154,7 +154,7 @@ fun getSumOfNumbersInCell(c: cell): int { @method_id(110) fun testStoreChaining() { - var b = beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32); + var b = ((beginCell()).storeUint(1, 32)).storeUint(2, 32).storeUint(3, 32); b.storeUint(4, 32); b.myStoreUint(5, 32).storeUint(6, 32); storeUint(mutate b, 7, 32); @@ -198,7 +198,7 @@ fun testStoreAndMutateBoth() { b.myStoreU32_and_mutate_x(mutate x); var cs: slice = b.endCell().beginParse(); - var (n1,n2,n3,n4,n5) = (cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32)); + var (n1,n2,n3,n4,n5) = (cs.loadUint(32),((cs)).loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32)); assert(n5 == x) throw 100; return [n1,n2,n3,n4,n5]; diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 8fcf2584..c4bd0acc 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -7,12 +7,14 @@ fun test1() { numbers = listPrepend(2, numbers); numbers = listPrepend(3, numbers); numbers = listPrepend(4, numbers); - var (h, numbers redef) = listSplit(numbers); + var (h: int, numbers redef) = listSplit(numbers); h += listGetHead(numbers); + _ = null; + (_, _) = (null, null); var t = createEmptyTuple(); do { - var num = numbers.listNext(); + var num: int = numbers.listNext(); t.tuplePush(num); } while (numbers != null); @@ -44,7 +46,7 @@ fun test3(x: int) { } fun getUntypedNull() { - var untyped = null; + var untyped: null = null; if (true) { return untyped; } @@ -52,8 +54,8 @@ fun getUntypedNull() { } @method_id(104) -fun test4() { - var (_, (_, untyped)) = (3, (createEmptyTuple, null)); +fun test4(): null { + var (_, (_, untyped: null)) = (3, (createEmptyTuple, null)); if (true) { return untyped; } @@ -62,15 +64,10 @@ fun test4() { @method_id(105) fun test5() { - var n = getUntypedNull(); + var n: slice = getUntypedNull(); return !(null == n) ? n.loadInt(32) : 100; } -@method_id(106) -fun test6(x: int) { - return x > null; // this compiles (for now), but fails at runtime -} - @method_id(107) fun test7() { var b = beginCell().storeMaybeRef(null); @@ -132,15 +129,6 @@ fun main() { }> """ -@fif_codegen -""" - test6 PROC:<{ - // x - PUSHNULL // x _1 - GREATER // _2 - }> -""" - @fif_codegen """ test7 PROC:<{ diff --git a/tolk-tester/tests/op_priority.tolk b/tolk-tester/tests/op_priority.tolk index e4f97b75..95209c99 100644 --- a/tolk-tester/tests/op_priority.tolk +++ b/tolk-tester/tests/op_priority.tolk @@ -56,7 +56,7 @@ fun test8(b: int): int { return a; } -fun `_ int) { return used_as_noncall2; } +fun receiveGetter(): () -> int { return used_as_noncall2; } @pure fun usedButOptimizedOut(x: int): int { return x + 2; } diff --git a/tolk-tester/tests/self-keyword.tolk b/tolk-tester/tests/self-keyword.tolk index ba779454..b0567696 100644 --- a/tolk-tester/tests/self-keyword.tolk +++ b/tolk-tester/tests/self-keyword.tolk @@ -187,7 +187,7 @@ fun myTupleAt(self: tuple, idx: int): T { global tup111: tuple; @method_id(111) -fun testForallFunctionsWithSelf() { +fun testForallFunctionsWithSelf(): (int, int, tuple) { var t = createEmptyTuple(); tup111 = createEmptyTuple(); t.myTuplePush(10); diff --git a/tolk-tester/tests/test-math.tolk b/tolk-tester/tests/test-math.tolk index 893035fd..95444e6b 100644 --- a/tolk-tester/tests/test-math.tolk +++ b/tolk-tester/tests/test-math.tolk @@ -218,7 +218,7 @@ fun fixed248_log2_const(): int { @pure @inline fun Pi_const_f254(): int { - var (c: auto, _) = Pi_xconst_f254(); + var (c, _) = Pi_xconst_f254(); return c; } @@ -1019,7 +1019,8 @@ fun test_nrand(n: int): tuple { repeat (n) { var x: int = fixed248_nrand(); var bucket: int = (abs(x) >> 243); // 255 buckets starting from x=0, each 1/32 wide - t.tset(bucket, t.tupleAt(bucket) + 1); + var at_bucket: int = t.tupleAt(bucket); + t.tset(bucket, at_bucket + 1); } return t; } diff --git a/tolk-tester/tests/try-func.tolk b/tolk-tester/tests/try-func.tolk index 5ce03ff1..dfd72e9e 100644 --- a/tolk-tester/tests/try-func.tolk +++ b/tolk-tester/tests/try-func.tolk @@ -1,6 +1,3 @@ -fun unsafeGetInt(any: X): int - asm "NOP"; - fun foo(x: int): int { try { if (x == 7) { @@ -28,7 +25,7 @@ fun foo_inlineref(x: int): int { if (x == 7) { throw (44, 2); } return x; } catch (_, arg) { - return unsafeGetInt(arg); + return arg as int; } } @@ -83,7 +80,7 @@ fun foo_big( } return x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + x20; } catch (code, arg) { - return unsafeGetInt(arg); + return arg as int; } } diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk index 6a84a4fa..a0918c18 100644 --- a/tolk-tester/tests/var-apply.tolk +++ b/tolk-tester/tests/var-apply.tolk @@ -92,7 +92,7 @@ fun always_throw2(x: int) { throw 239 + x; } -global global_f: int -> (); +global global_f: int -> void; @method_id(104) fun testGlobalVarApply() { @@ -105,6 +105,30 @@ fun testGlobalVarApply() { } } +@method_id(105) +fun testVarApply2() { + var creator = createEmptyTuple; + var t = creator(); + t.tuplePush(1); + var sizer = t.tupleSize; + return sizer(t); +} + +fun getTupleLastGetter(): (tuple) -> X { + return tupleLast; +} + +@method_id(106) +fun testVarApply3() { + var t = createEmptyTuple(); + t.tuplePush(1); + t.tuplePush([2]); + var getIntAt = t.tupleAt; + var getTupleFirstInt = createEmptyTuple().tupleFirst; + var getTupleLastTuple = getTupleLastGetter(); + return (getIntAt(t, 0), getTupleFirstInt(t), getTupleLastTuple(t), getTupleLastGetter()(t)); +} + fun main() {} /** @@ -112,4 +136,6 @@ fun main() {} @testcase | 102 | | 1000 @testcase | 103 | | [ 1000 1000 0 1001 ] @testcase | 104 | | 240 +@testcase | 105 | | 1 +@testcase | 106 | | 1 1 [ 2 ] [ 2 ] */ diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 0c3e7c63..2ee69686 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -10,10 +10,10 @@ set(TOLK_SOURCE constant-evaluator.cpp pipe-discover-parse-sources.cpp pipe-register-symbols.cpp - pipe-resolve-symbols.cpp + pipe-resolve-identifiers.cpp pipe-calc-rvalue-lvalue.cpp pipe-detect-unreachable.cpp - pipe-infer-check-types.cpp + pipe-infer-types-and-calls.cpp pipe-refine-lvalue-for-mutate.cpp pipe-check-rvalue-lvalue.cpp pipe-check-pure-impure.cpp @@ -21,7 +21,8 @@ set(TOLK_SOURCE pipe-ast-to-legacy.cpp pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp - unify-types.cpp + type-system.cpp + generics-helpers.cpp abscode.cpp analyzer.cpp asmops.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 253e8012..7bcb0f84 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" namespace tolk { @@ -28,7 +29,7 @@ namespace tolk { void TmpVar::dump(std::ostream& os) const { show(os); os << " : " << v_type << " (width "; - v_type->show_width(os); + os << v_type->calc_width_on_stack(); os << ")"; if (coord > 0) { os << " = _" << (coord >> 8) << '.' << (coord & 255); @@ -443,7 +444,7 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation location) { +var_idx_t CodeBlob::create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation location) { vars.emplace_back(var_cnt, var_type, v_sym, location); return var_cnt++; } @@ -454,7 +455,7 @@ bool CodeBlob::import_params(FormalArgList&& arg_list) { } std::vector list; for (const auto& par : arg_list) { - TypeExpr* arg_type; + TypePtr arg_type; const LocalVarData* arg_sym; SrcLocation arg_loc; std::tie(arg_type, arg_sym, arg_loc) = par; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 495ae03b..8539afdd 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" namespace tolk { @@ -25,38 +26,30 @@ namespace tolk { * */ -void CodeBlob::simplify_var_types() { - for (TmpVar& var : vars) { - TypeExpr::remove_indirect(var.v_type); - var.v_type->recompute_width(); - } -} - int CodeBlob::split_vars(bool strict) { int n = var_cnt, changes = 0; for (int j = 0; j < var_cnt; j++) { TmpVar& var = vars[j]; - if (strict && var.v_type->minw != var.v_type->maxw) { + int width_j = var.v_type->calc_width_on_stack(); + if (strict && width_j < 0) { throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"}; } - std::vector comp_types; - int k = var.v_type->extract_components(comp_types); - tolk_assert(k <= 254 && n <= 0x7fff00); - tolk_assert((unsigned)k == comp_types.size()); - if (k != 1) { - var.coord = ~((n << 8) + k); - for (int i = 0; i < k; i++) { - auto v = create_var(comp_types[i], vars[j].v_sym, vars[j].where); - tolk_assert(v == n + i); - tolk_assert(vars[v].idx == v); - vars[v].coord = ((int)j << 8) + i + 1; - } - n += k; - ++changes; - } else if (strict && var.v_type->minw != 1) { - throw ParseError{var.where, - "cannot work with variable or variable component of width greater than one"}; + if (width_j == 1) { + continue; } + std::vector comp_types; + var.v_type->extract_components(comp_types); + tolk_assert(width_j <= 254 && n <= 0x7fff00); + tolk_assert((unsigned)width_j == comp_types.size()); + var.coord = ~((n << 8) + width_j); + for (int i = 0; i < width_j; i++) { + auto v = create_var(comp_types[i], vars[j].v_sym, vars[j].where); + tolk_assert(v == n + i); + tolk_assert(vars[v].idx == v); + vars[v].coord = ((int)j << 8) + i + 1; + } + n += width_j; + ++changes; } if (!changes) { return 0; @@ -687,7 +680,7 @@ void CodeBlob::fwd_analyze() { tolk_assert(ops && ops->cl == Op::_Import); for (var_idx_t i : ops->left) { values += i; - if (vars[i].v_type->is_int()) { + if (vars[i].v_type == TypeDataInt::create()) { values[i]->val |= VarDescr::_Int; } } @@ -732,7 +725,7 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { } case _Call: { prepare_args(values); - if (!f_sym->is_regular_function()) { + if (!f_sym->is_code_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 22d64442..767e6066 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -16,8 +16,8 @@ */ #include "ast-from-tokens.h" #include "ast.h" +#include "type-system.h" #include "platform-utils.h" -#include "type-expr.h" #include "tolk-version.h" /* @@ -130,9 +130,10 @@ static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) } auto v_ident = createV(v->loc, "__isNull"); // built-in function + auto v_ref = createV(v->loc, v_ident, nullptr); AnyExprV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); AnyExprV v_arg = createV(v->loc, v_null, false); - AnyExprV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_arg})); + AnyExprV v_isNull = createV(v->loc, v_ref, createV(v->loc, {v_arg})); if (v->tok == tok_neq) { v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); } @@ -146,98 +147,14 @@ static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) * */ -// TE ::= TA | TA -> TE -// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] -static TypeExpr* parse_type(Lexer& lex, V genericsT_list); - -static TypeExpr* parse_type1(Lexer& lex, V genericsT_list) { - switch (lex.tok()) { - case tok_int: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Int); - case tok_cell: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Cell); - case tok_slice: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Slice); - case tok_builder: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Builder); - case tok_continuation: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Continutaion); - case tok_tuple: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Tuple); - case tok_auto: - lex.next(); - return TypeExpr::new_hole(); - case tok_void: - lex.next(); - return TypeExpr::new_tensor({}); - case tok_bool: - lex.error("bool type is not supported yet"); - case tok_self: - lex.error("`self` type can be used only as a return type of a function (enforcing it to be chainable)"); - case tok_identifier: - if (int idx = genericsT_list ? genericsT_list->lookup_idx(lex.cur_str()) : -1; idx != -1) { - lex.next(); - return genericsT_list->get_item(idx)->created_type; - } - break; - case tok_oppar: { - lex.next(); - if (lex.tok() == tok_clpar) { - lex.next(); - return TypeExpr::new_unit(); - } - std::vector sub{1, parse_type(lex, genericsT_list)}; - while (lex.tok() == tok_comma) { - lex.next(); - sub.push_back(parse_type(lex, genericsT_list)); - } - lex.expect(tok_clpar, "`)`"); - return TypeExpr::new_tensor(std::move(sub)); - } - case tok_opbracket: { - lex.next(); - if (lex.tok() == tok_clbracket) { - lex.next(); - return TypeExpr::new_tuple({}); - } - std::vector sub{1, parse_type(lex, genericsT_list)}; - while (lex.tok() == tok_comma) { - lex.next(); - sub.push_back(parse_type(lex, genericsT_list)); - } - lex.expect(tok_clbracket, "`]`"); - return TypeExpr::new_tuple(std::move(sub)); - } - default: - break; - } - lex.unexpected(""); -} - -static TypeExpr* parse_type(Lexer& lex, V genericsT_list) { - TypeExpr* res = parse_type1(lex, genericsT_list); - if (lex.tok() == tok_arrow) { - lex.next(); - TypeExpr* to = parse_type(lex, genericsT_list); - return TypeExpr::new_map(res, to); - } - return res; -} AnyExprV parse_expr(Lexer& lex); -static AnyV parse_parameter(Lexer& lex, V genericsT_list, bool is_first) { +static AnyV parse_parameter(Lexer& lex, bool is_first) { SrcLocation loc = lex.cur_location(); // optional keyword `mutate` meaning that a function will mutate a passed argument (like passed by reference) bool declared_as_mutate = false; - bool is_param_self = false; if (lex.tok() == tok_mutate) { lex.next(); declared_as_mutate = true; @@ -252,21 +169,14 @@ static AnyV parse_parameter(Lexer& lex, V genericsT_list, bo lex.error("`self` can only be the first parameter"); } param_name = "self"; - is_param_self = true; } else if (lex.tok() != tok_underscore) { lex.unexpected("parameter name"); } lex.next(); - // parameter type after colon, also mandatory (even explicit ":auto") + // parameter type after colon are mandatory lex.expect(tok_colon, "`: `"); - TypeExpr* param_type = parse_type(lex, genericsT_list); - if (declared_as_mutate && !param_type->has_fixed_width()) { - throw ParseError(loc, "`mutate` parameter must be strictly typed"); - } - if (is_param_self && !param_type->has_fixed_width()) { - throw ParseError(loc, "`self` parameter must be strictly typed"); - } + TypePtr param_type = parse_type_from_tokens(lex); return createV(loc, param_name, param_type, declared_as_mutate); } @@ -281,7 +191,7 @@ static AnyV parse_global_var_declaration(Lexer& lex, const std::vector(lex.cur_location(), lex.cur_str()); lex.next(); lex.expect(tok_colon, "`:`"); - TypeExpr* declared_type = parse_type(lex, nullptr); + TypePtr declared_type = parse_type_from_tokens(lex); if (lex.tok() == tok_comma) { lex.error("multiple declarations are not allowed, split globals on separate lines"); } @@ -301,18 +211,10 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector(lex.cur_location(), lex.cur_str()); lex.next(); - TypeExpr *declared_type = nullptr; + TypePtr declared_type = nullptr; if (lex.tok() == tok_colon) { lex.next(); - if (lex.tok() == tok_int) { - declared_type = TypeExpr::new_atomic(TypeExpr::_Int); - lex.next(); - } else if (lex.tok() == tok_slice) { - declared_type = TypeExpr::new_atomic(TypeExpr::_Slice); - lex.next(); - } else { - lex.error("a constant can be int or slice only"); - } + declared_type = parse_type_from_tokens(lex); } lex.expect(tok_assign, "`=`"); AnyExprV init_value = parse_expr(lex); @@ -324,15 +226,15 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector parse_parameter_list(Lexer& lex, V genericsT_list) { +static V parse_parameter_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); std::vector params; lex.expect(tok_oppar, "parameter list"); if (lex.tok() != tok_clpar) { - params.push_back(parse_parameter(lex, genericsT_list, true)); + params.push_back(parse_parameter(lex, true)); while (lex.tok() == tok_comma) { lex.next(); - params.push_back(parse_parameter(lex, genericsT_list, false)); + params.push_back(parse_parameter(lex, false)); } } lex.expect(tok_clpar, "`)`"); @@ -369,6 +271,26 @@ static V parse_argument_list(Lexer& lex) { return createV(loc, std::move(args)); } +static V parse_maybe_instantiationTs_after_identifier(Lexer& lex) { + lex.check(tok_lt, "`<`"); + Lexer::SavedPositionForLookahead backup = lex.save_parsing_position(); + try { + SrcLocation loc = lex.cur_location(); + lex.next(); + std::vector instantiationTs; + instantiationTs.push_back(createV(lex.cur_location(), parse_type_from_tokens(lex))); + while (lex.tok() == tok_comma) { + lex.next(); + instantiationTs.push_back(createV(lex.cur_location(), parse_type_from_tokens(lex))); + } + lex.expect(tok_gt, "`>`"); + return createV(loc, std::move(instantiationTs)); + } catch (const ParseError&) { + lex.restore_position(backup); + return nullptr; + } +} + // parse (expr) / [expr] / identifier / number static AnyExprV parse_expr100(Lexer& lex) { SrcLocation loc = lex.cur_location(); @@ -396,7 +318,7 @@ static AnyExprV parse_expr100(Lexer& lex) { lex.next(); if (lex.tok() == tok_clbracket) { lex.next(); - return createV(loc, {}); + return createV(loc, {}); } std::vector items(1, parse_expr(lex)); while (lex.tok() == tok_comma) { @@ -404,7 +326,7 @@ static AnyExprV parse_expr100(Lexer& lex) { items.emplace_back(parse_expr(lex)); } lex.expect(tok_clbracket, "`]`"); - return createV(loc, std::move(items)); + return createV(loc, std::move(items)); } case tok_int_const: { std::string_view orig_str = lex.cur_str(); @@ -443,12 +365,17 @@ static AnyExprV parse_expr100(Lexer& lex) { } case tok_self: { lex.next(); - return createV(loc); + auto v_ident = createV(loc, "self"); + return createV(loc, v_ident, nullptr); } case tok_identifier: { - std::string_view str_val = lex.cur_str(); + auto v_ident = createV(loc, lex.cur_str()); + V v_instantiationTs = nullptr; lex.next(); - return createV(loc, str_val); + if (lex.tok() == tok_lt) { + v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex); + } + return createV(loc, v_ident, v_instantiationTs); } default: { // show a proper error for `int i` (FunC-style declarations) @@ -461,25 +388,36 @@ static AnyExprV parse_expr100(Lexer& lex) { } } -// parse E(args) +// parse E(...) (left-to-right) static AnyExprV parse_expr90(Lexer& lex) { AnyExprV res = parse_expr100(lex); - if (lex.tok() == tok_oppar) { - return createV(res->loc, res, parse_argument_list(lex)); + while (lex.tok() == tok_oppar) { + res = createV(res->loc, res, parse_argument_list(lex)); } return res; } -// parse E.method(...) (left-to-right) +// parse E.field and E.method(...) (left-to-right) static AnyExprV parse_expr80(Lexer& lex) { AnyExprV lhs = parse_expr90(lex); while (lex.tok() == tok_dot) { SrcLocation loc = lex.cur_location(); lex.next(); - lex.check(tok_identifier, "method name"); - std::string_view method_name = lex.cur_str(); - lex.next(); - lhs = createV(loc, method_name, lhs, parse_argument_list(lex)); + V v_ident = nullptr; + V v_instantiationTs = nullptr; + if (lex.tok() == tok_identifier) { + v_ident = createV(lex.cur_location(), lex.cur_str()); + lex.next(); + if (lex.tok() == tok_lt) { + v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex); + } + } else { + lex.unexpected("method name"); + } + lhs = createV(loc, lhs, v_ident, v_instantiationTs); + while (lex.tok() == tok_oppar) { + lhs = createV(lex.cur_location(), lhs, parse_argument_list(lex)); + } } return lhs; } @@ -497,15 +435,27 @@ static AnyExprV parse_expr75(Lexer& lex) { return parse_expr80(lex); } +// parse E as +static AnyExprV parse_expr40(Lexer& lex) { + AnyExprV lhs = parse_expr75(lex); + if (lex.tok() == tok_as) { + SrcLocation loc = lex.cur_location(); + lex.next(); + TypePtr cast_to_type = parse_type_from_tokens(lex); + lhs = createV(loc, lhs, cast_to_type); + } + return lhs; +} + // parse E * / % ^/ ~/ E (left-to-right) static AnyExprV parse_expr30(Lexer& lex) { - AnyExprV lhs = parse_expr75(lex); + AnyExprV lhs = parse_expr40(lex); TokenType t = lex.tok(); while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyExprV rhs = parse_expr75(lex); + AnyExprV rhs = parse_expr40(lex); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); } @@ -597,15 +547,20 @@ static AnyExprV parse_expr13(Lexer& lex) { static AnyExprV parse_expr10(Lexer& lex) { AnyExprV lhs = parse_expr13(lex); TokenType t = lex.tok(); - if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || - t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || - t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor || - t == tok_assign) { + if (t == tok_assign) { SrcLocation loc = lex.cur_location(); - std::string_view operator_name = lex.cur_str(); lex.next(); AnyExprV rhs = parse_expr10(lex); - return createV(loc, operator_name, t, lhs, rhs); + return createV(loc, lhs, rhs); + } + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || + t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || + t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str().substr(0, lex.cur_str().size() - 1); // "+" for += + lex.next(); + AnyExprV rhs = parse_expr10(lex); + return createV(loc, operator_name, t, lhs, rhs); } if (t == tok_question) { SrcLocation loc = lex.cur_location(); @@ -631,7 +586,7 @@ static AnyExprV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { AnyExprV first = parse_var_declaration_lhs(lex, is_immutable); if (lex.tok() == tok_clpar) { lex.next(); - return createV(loc, first); + return first; } std::vector args(1, first); while (lex.tok() == tok_comma) { @@ -649,51 +604,51 @@ static AnyExprV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { args.push_back(parse_var_declaration_lhs(lex, is_immutable)); } lex.expect(tok_clbracket, "`]`"); - return createV(loc, std::move(args)); + return createV(loc, std::move(args)); } if (lex.tok() == tok_identifier) { auto v_ident = createV(loc, lex.cur_str()); - TypeExpr* declared_type = nullptr; + TypePtr declared_type = nullptr; bool marked_as_redef = false; lex.next(); if (lex.tok() == tok_colon) { lex.next(); - declared_type = parse_type(lex, nullptr); + declared_type = parse_type_from_tokens(lex); } else if (lex.tok() == tok_redef) { lex.next(); marked_as_redef = true; } - return createV(loc, v_ident, declared_type, is_immutable, marked_as_redef); + return createV(loc, v_ident, declared_type, is_immutable, marked_as_redef); } if (lex.tok() == tok_underscore) { - TypeExpr* declared_type = nullptr; + TypePtr declared_type = nullptr; lex.next(); if (lex.tok() == tok_colon) { lex.next(); - declared_type = parse_type(lex, nullptr); + declared_type = parse_type_from_tokens(lex); } - return createV(loc, createV(loc), declared_type, true, false); + return createV(loc, createV(loc, ""), declared_type, true, false); } lex.unexpected("variable name"); } -static AnyV parse_local_vars_declaration(Lexer& lex) { +static AnyV parse_local_vars_declaration_assignment(Lexer& lex) { SrcLocation loc = lex.cur_location(); bool is_immutable = lex.tok() == tok_val; lex.next(); - AnyExprV lhs = parse_var_declaration_lhs(lex, is_immutable); + AnyExprV lhs = createV(loc, parse_var_declaration_lhs(lex, is_immutable)); if (lex.tok() != tok_assign) { lex.error("variables declaration must be followed by assignment: `var xxx = ...`"); } lex.next(); - AnyExprV assigned_val = parse_expr(lex); + AnyExprV rhs = parse_expr(lex); if (lex.tok() == tok_comma) { lex.error("multiple declarations are not allowed, split variables on separate lines"); } lex.expect(tok_semicolon, "`;`"); - return createV(loc, lhs, assigned_val); + return createV(loc, lhs, rhs); } static V parse_sequence(Lexer& lex) { @@ -711,8 +666,8 @@ static V parse_sequence(Lexer& lex) { static AnyV parse_return_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_return, "`return`"); - AnyExprV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void) - ? createV(lex.cur_location(), {}) + AnyExprV child = lex.tok() == tok_semicolon // `return;` actually means "nothing" (inferred as void) + ? createV(lex.cur_location()) : parse_expr(lex); lex.expect(tok_semicolon, "`;`"); return createV(loc, child); @@ -784,15 +739,22 @@ static AnyExprV parse_catch_variable(Lexer& lex) { if (lex.tok() == tok_identifier) { std::string_view var_name = lex.cur_str(); lex.next(); - return createV(loc, var_name); + auto v_ident = createV(loc, var_name); + return createV(loc, v_ident, nullptr); } if (lex.tok() == tok_underscore) { lex.next(); - return createV(loc); + auto v_ident = createV(loc, ""); + return createV(loc, v_ident, nullptr); } lex.unexpected("identifier"); } +static AnyExprV create_catch_underscore_variable(const Lexer& lex) { + auto v_ident = createV(lex.cur_location(), ""); + return createV(lex.cur_location(), v_ident, nullptr); +} + static AnyV parse_throw_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_throw, "`throw`"); @@ -853,12 +815,12 @@ static AnyV parse_try_catch_statement(Lexer& lex) { lex.next(); catch_args.push_back(parse_catch_variable(lex)); } else { // catch (excNo) -> catch (excNo, _) - catch_args.push_back(createV(catch_loc)); + catch_args.push_back(create_catch_underscore_variable(lex)); } lex.expect(tok_clpar, "`)`"); } else { // catch -> catch (_, _) - catch_args.push_back(createV(catch_loc)); - catch_args.push_back(createV(catch_loc)); + catch_args.push_back(create_catch_underscore_variable(lex)); + catch_args.push_back(create_catch_underscore_variable(lex)); } V catch_expr = createV(catch_loc, std::move(catch_args)); @@ -868,9 +830,9 @@ static AnyV parse_try_catch_statement(Lexer& lex) { AnyV parse_statement(Lexer& lex) { switch (lex.tok()) { - case tok_var: - case tok_val: - return parse_local_vars_declaration(lex); + case tok_var: // `var x = 0` is technically an expression, but can not appear in "any place", + case tok_val: // only as a separate declaration + return parse_local_vars_declaration_assignment(lex); case tok_opbrace: return parse_sequence(lex); case tok_return: @@ -952,12 +914,10 @@ static AnyV parse_genericsT_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); std::vector genericsT_items; lex.expect(tok_lt, "`<`"); - int idx = 0; while (true) { lex.check(tok_identifier, "T"); std::string_view nameT = lex.cur_str(); - TypeExpr* type = TypeExpr::new_var(idx++); - genericsT_items.emplace_back(createV(lex.cur_location(), type, nameT)); + genericsT_items.emplace_back(createV(lex.cur_location(), nameT)); lex.next(); if (lex.tok() != tok_comma) { break; @@ -1040,11 +1000,11 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vectoras(); } - V v_param_list = parse_parameter_list(lex, genericsT_list)->as(); + V v_param_list = parse_parameter_list(lex)->as(); bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->param_name == "self"; int n_mutate_params = v_param_list->get_mutate_params_count(); - TypeExpr* ret_type = nullptr; + TypePtr ret_type = nullptr; bool returns_self = false; if (lex.tok() == tok_colon) { // : (if absent, it means "auto infer", not void) lex.next(); @@ -1054,9 +1014,9 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector ret_tensor_items; - ret_tensor_items.reserve(1 + n_mutate_params); - for (AnyV v_param : v_param_list->get_params()) { - if (v_param->as()->declared_as_mutate) { - ret_tensor_items.emplace_back(v_param->as()->declared_type); - } - } - ret_tensor_items.emplace_back(ret_type ? ret_type : TypeExpr::new_hole()); - ret_type = TypeExpr::new_tensor(std::move(ret_tensor_items)); - } - AnyV v_body = nullptr; if (lex.tok() == tok_builtin) { @@ -1096,32 +1044,43 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector(loc, v_ident, v_param_list, v_body); - f_declaration->ret_type = ret_type ? ret_type : TypeExpr::new_hole(); - f_declaration->is_entrypoint = is_entrypoint; - f_declaration->genericsT_list = genericsT_list; - f_declaration->marked_as_get_method = is_get_method; - f_declaration->marked_as_builtin = v_body->type == ast_empty_statement; - f_declaration->accepts_self = accepts_self; - f_declaration->returns_self = returns_self; + int flags = 0; + if (is_entrypoint) { + flags |= FunctionData::flagIsEntrypoint; + } + if (is_get_method) { + flags |= FunctionData::flagGetMethod; + } + if (accepts_self) { + flags |= FunctionData::flagAcceptsSelf; + } + if (returns_self) { + flags |= FunctionData::flagReturnsSelf; + } + td::RefInt256 method_id; for (auto v_annotation : annotations) { switch (v_annotation->kind) { case AnnotationKind::inline_simple: - f_declaration->marked_as_inline = true; + flags |= FunctionData::flagInline; break; case AnnotationKind::inline_ref: - f_declaration->marked_as_inline_ref = true; + flags |= FunctionData::flagInlineRef; break; case AnnotationKind::pure: - f_declaration->marked_as_pure = true; + flags |= FunctionData::flagMarkedAsPure; break; - case AnnotationKind::method_id: + case AnnotationKind::method_id: { if (is_get_method || genericsT_list || is_entrypoint || n_mutate_params || accepts_self) { v_annotation->error("@method_id can be specified only for regular functions"); } - f_declaration->method_id = v_annotation->get_arg()->get_item(0)->as(); + auto v_int = v_annotation->get_arg()->get_item(0)->as(); + if (v_int->intval.is_null() || !v_int->intval->signed_fits_bits(32)) { + v_int->error("invalid integer constant"); + } + method_id = v_int->intval; break; + } case AnnotationKind::deprecated: // no special handling break; @@ -1131,7 +1090,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector(loc, v_ident, v_param_list, v_body, ret_type, genericsT_list, std::move(method_id), flags); } static AnyV parse_tolk_required_version(Lexer& lex) { @@ -1148,7 +1107,7 @@ static AnyV parse_tolk_required_version(Lexer& lex) { return createV(loc, semver); // semicolon is not necessary } -static AnyV parse_import_statement(Lexer& lex) { +static AnyV parse_import_directive(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_import, "`import`"); lex.check(tok_string_const, "source file name"); @@ -1158,7 +1117,7 @@ static AnyV parse_import_statement(Lexer& lex) { } auto v_str = createV(lex.cur_location(), rel_filename, 0); lex.next(); - return createV(loc, v_str); // semicolon is not necessary + return createV(loc, v_str); // semicolon is not necessary } // the main (exported) function @@ -1179,7 +1138,7 @@ AnyV parse_src_file_to_ast(const SrcFile* file) { if (!annotations.empty()) { lex.unexpected("declaration after @annotations"); } - toplevel_declarations.push_back(parse_import_statement(lex)); + toplevel_declarations.push_back(parse_import_directive(lex)); break; case tok_semicolon: if (!annotations.empty()) { diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h index 45f4c638..c8350747 100644 --- a/tolk/ast-replacer.h +++ b/tolk/ast-replacer.h @@ -85,60 +85,65 @@ class ASTReplacerInFunctionBody : public ASTReplacer { protected: using parent = ASTReplacerInFunctionBody; + // expressions + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + // statements virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } - virtual AnyExprV replace(V v) { return replace_children(v); } AnyExprV replace(AnyExprV v) final { switch (v->type) { case ast_empty_expression: return replace(v->as()); case ast_parenthesized_expression: return replace(v->as()); case ast_tensor: return replace(v->as()); - case ast_tensor_square: return replace(v->as()); - case ast_identifier: return replace(v->as()); + case ast_typed_tuple: return replace(v->as()); + case ast_reference: return replace(v->as()); + case ast_local_var_lhs: return replace(v->as()); + case ast_local_vars_declaration: return replace(v->as()); case ast_int_const: return replace(v->as()); case ast_string_const: return replace(v->as()); case ast_bool_const: return replace(v->as()); case ast_null_keyword: return replace(v->as()); - case ast_self_keyword: return replace(v->as()); case ast_argument: return replace(v->as()); case ast_argument_list: return replace(v->as()); + case ast_dot_access: return replace(v->as()); case ast_function_call: return replace(v->as()); - case ast_dot_method_call: return replace(v->as()); case ast_underscore: return replace(v->as()); + case ast_assign: return replace(v->as()); + case ast_set_assign: return replace(v->as()); case ast_unary_operator: return replace(v->as()); case ast_binary_operator: return replace(v->as()); case ast_ternary_operator: return replace(v->as()); - case ast_local_var: return replace(v->as()); + case ast_cast_as_operator: return replace(v->as()); default: throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::replace"); } @@ -147,17 +152,19 @@ protected: AnyV replace(AnyV v) final { switch (v->type) { case ast_empty_statement: return replace(v->as()); - case ast_return_statement: return replace(v->as()); case ast_sequence: return replace(v->as()); + case ast_return_statement: return replace(v->as()); + case ast_if_statement: return replace(v->as()); case ast_repeat_statement: return replace(v->as()); case ast_while_statement: return replace(v->as()); case ast_do_while_statement: return replace(v->as()); case ast_throw_statement: return replace(v->as()); case ast_assert_statement: return replace(v->as()); case ast_try_catch_statement: return replace(v->as()); - case ast_if_statement: return replace(v->as()); - case ast_local_vars_declaration: return replace(v->as()); - case ast_asm_body: return replace(v->as()); +#ifdef TOLK_DEBUG + case ast_asm_body: + throw UnexpectedASTNodeType(v, "ASTReplacer::replace"); +#endif default: { // be very careful, don't forget to handle all statements (not expressions) above! AnyExprV as_expr = reinterpret_cast(v); @@ -167,21 +174,22 @@ protected: } public: - void start_replacing_in_function(V v) { - replace(v->get_body()); + virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + + void start_replacing_in_function(const FunctionData* fun_ref, V v_function) { + replace(v_function->get_body()); } }; + +const std::vector& get_all_not_builtin_functions(); + template -void replace_ast_of_all_functions(const AllSrcFiles& all_files) { - for (const SrcFile* file : all_files) { - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { - if (auto v_func = v->try_as()) { - if (v_func->is_regular_function()) { - BodyReplacerT visitor; - visitor.start_replacing_in_function(v_func); - } - } +void replace_ast_of_all_functions() { + BodyReplacerT visitor; + for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + if (visitor.should_visit_function(fun_ref)) { + visitor.start_replacing_in_function(fun_ref, fun_ref->ast_root->as()); } } } diff --git a/tolk/ast-replicator.h b/tolk/ast-replicator.h new file mode 100644 index 00000000..02198adb --- /dev/null +++ b/tolk/ast-replicator.h @@ -0,0 +1,255 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" +#include "platform-utils.h" + +namespace tolk { + +class ASTReplicator { +protected: + virtual AnyV clone(AnyV v) = 0; + virtual AnyExprV clone(AnyExprV v) = 0; + virtual TypePtr clone(TypePtr) = 0; + +public: + virtual ~ASTReplicator() = default; +}; + +class ASTReplicatorFunction : public ASTReplicator { +protected: + using parent = ASTReplicatorFunction; + + std::vector clone(const std::vector& items) { + std::vector result; + result.reserve(items.size()); + for (AnyV item : items) { + result.push_back(clone(item)); + } + return result; + } + + std::vector clone(const std::vector& items) { + std::vector result; + result.reserve(items.size()); + for (AnyExprV item : items) { + result.push_back(clone(item)); + } + return result; + } + + // expressions + + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_identifier()), v->has_instantiationTs() ? clone(v->get_instantiationTs()) : nullptr); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_identifier()), clone(v->declared_type), v->is_immutable, v->marked_as_redef); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr())); + } + virtual V clone(V v) { + return createV(v->loc, v->intval, v->orig_str); + } + virtual V clone(V v) { + return createV(v->loc, v->str_val, v->modifier); + } + virtual V clone(V v) { + return createV(v->loc, v->bool_val); + } + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr()), v->passed_as_mutate); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_arguments())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_obj()), clone(v->get_identifier()), v->has_instantiationTs() ? clone(v->get_instantiationTs()) : nullptr); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_callee()), clone(v->get_arg_list())); + } + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_lhs()), clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, v->operator_name, v->tok, clone(v->get_lhs()), clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, v->operator_name, v->tok, clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, v->operator_name, v->tok, clone(v->get_lhs()), clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_when_true()), clone(v->get_when_false())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr()), clone(v->cast_to_type)); + } + + // statements + + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, v->loc_end, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_return_value())); + } + virtual V clone(V v) { + return createV(v->loc, v->is_ifnot, clone(v->get_cond()), clone(v->get_if_body()), clone(v->get_else_body())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_body())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_body())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_body()), clone(v->get_cond())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_thrown_code()), clone(v->get_thrown_arg())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_thrown_code())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_try_body()), clone(v->get_catch_expr()), clone(v->get_catch_body())); + } + virtual V clone(V v) { + return createV(v->loc, v->arg_order, v->ret_order, clone(v->get_asm_commands())); + } + + // other + + virtual V clone(V v) { + return createV(v->loc, v->name); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->substituted_type)); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, v->param_name, clone(v->declared_type), v->declared_as_mutate); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_params())); + } + + AnyExprV clone(AnyExprV v) final { + switch (v->type) { + case ast_empty_expression: return clone(v->as()); + case ast_parenthesized_expression: return clone(v->as()); + case ast_tensor: return clone(v->as()); + case ast_typed_tuple: return clone(v->as()); + case ast_reference: return clone(v->as()); + case ast_local_var_lhs: return clone(v->as()); + case ast_local_vars_declaration: return clone(v->as()); + case ast_int_const: return clone(v->as()); + case ast_string_const: return clone(v->as()); + case ast_bool_const: return clone(v->as()); + case ast_null_keyword: return clone(v->as()); + case ast_argument: return clone(v->as()); + case ast_argument_list: return clone(v->as()); + case ast_dot_access: return clone(v->as()); + case ast_function_call: return clone(v->as()); + case ast_underscore: return clone(v->as()); + case ast_assign: return clone(v->as()); + case ast_set_assign: return clone(v->as()); + case ast_unary_operator: return clone(v->as()); + case ast_binary_operator: return clone(v->as()); + case ast_ternary_operator: return clone(v->as()); + case ast_cast_as_operator: return clone(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTReplicatorFunction::clone"); + } + } + + AnyV clone(AnyV v) final { + switch (v->type) { + case ast_empty_statement: return clone(v->as()); + case ast_sequence: return clone(v->as()); + case ast_return_statement: return clone(v->as()); + case ast_if_statement: return clone(v->as()); + case ast_repeat_statement: return clone(v->as()); + case ast_while_statement: return clone(v->as()); + case ast_do_while_statement: return clone(v->as()); + case ast_throw_statement: return clone(v->as()); + case ast_assert_statement: return clone(v->as()); + case ast_try_catch_statement: return clone(v->as()); + case ast_asm_body: return clone(v->as()); + // other AST nodes that can be children of ast nodes of function body + case ast_identifier: return clone(v->as()); + case ast_instantiationT_item: return clone(v->as()); + case ast_instantiationT_list: return clone(v->as()); + case ast_parameter: return clone(v->as()); + case ast_parameter_list: return clone(v->as()); + + default: { + // be very careful, don't forget to handle all statements/other (not expressions) above! + AnyExprV as_expr = reinterpret_cast(v); + return clone(as_expr); + } + } + } + + TypePtr clone(TypePtr t) override { + return t; + } + + public: + virtual V clone_function_body(V v_function) { + return createV( + v_function->loc, + clone(v_function->get_identifier()), + clone(v_function->get_param_list()), + clone(v_function->get_body()->as()), + clone(v_function->declared_return_type), + v_function->genericsT_list, + v_function->method_id, + v_function->flags + ); + } +}; + +} // namespace tolk diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index cc91371c..4ec72cdd 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -20,6 +20,7 @@ #include "ast.h" #include "ast-visitor.h" +#include "type-system.h" #include /* @@ -31,47 +32,55 @@ namespace tolk { class ASTStringifier final : public ASTVisitor { constexpr static std::pair name_pairs[] = { - {ast_empty_statement, "ast_empty_statement"}, + {ast_identifier, "ast_identifier"}, + // expressions {ast_empty_expression, "ast_empty_expression"}, {ast_parenthesized_expression, "ast_parenthesized_expression"}, {ast_tensor, "ast_tensor"}, - {ast_tensor_square, "ast_tensor_square"}, - {ast_identifier, "ast_identifier"}, + {ast_typed_tuple, "ast_typed_tuple"}, + {ast_reference, "ast_reference"}, + {ast_local_var_lhs, "ast_local_var_lhs"}, + {ast_local_vars_declaration, "ast_local_vars_declaration"}, {ast_int_const, "ast_int_const"}, {ast_string_const, "ast_string_const"}, {ast_bool_const, "ast_bool_const"}, {ast_null_keyword, "ast_null_keyword"}, - {ast_self_keyword, "ast_self_keyword"}, {ast_argument, "ast_argument"}, {ast_argument_list, "ast_argument_list"}, + {ast_dot_access, "ast_dot_access"}, {ast_function_call, "ast_function_call"}, - {ast_dot_method_call, "ast_dot_method_call"}, - {ast_global_var_declaration, "ast_global_var_declaration"}, - {ast_constant_declaration, "ast_constant_declaration"}, {ast_underscore, "ast_underscore"}, + {ast_assign, "ast_assign"}, + {ast_set_assign, "ast_set_assign"}, {ast_unary_operator, "ast_unary_operator"}, {ast_binary_operator, "ast_binary_operator"}, {ast_ternary_operator, "ast_ternary_operator"}, - {ast_return_statement, "ast_return_statement"}, + {ast_cast_as_operator, "ast_cast_as_operator"}, + // statements + {ast_empty_statement, "ast_empty_statement"}, {ast_sequence, "ast_sequence"}, + {ast_return_statement, "ast_return_statement"}, + {ast_if_statement, "ast_if_statement"}, {ast_repeat_statement, "ast_repeat_statement"}, {ast_while_statement, "ast_while_statement"}, {ast_do_while_statement, "ast_do_while_statement"}, {ast_throw_statement, "ast_throw_statement"}, {ast_assert_statement, "ast_assert_statement"}, {ast_try_catch_statement, "ast_try_catch_statement"}, - {ast_if_statement, "ast_if_statement"}, + {ast_asm_body, "ast_asm_body"}, + // other {ast_genericsT_item, "ast_genericsT_item"}, {ast_genericsT_list, "ast_genericsT_list"}, + {ast_instantiationT_item, "ast_instantiationT_item"}, + {ast_instantiationT_list, "ast_instantiationT_list"}, {ast_parameter, "ast_parameter"}, {ast_parameter_list, "ast_parameter_list"}, - {ast_asm_body, "ast_asm_body"}, {ast_annotation, "ast_annotation"}, {ast_function_declaration, "ast_function_declaration"}, - {ast_local_var, "ast_local_var"}, - {ast_local_vars_declaration, "ast_local_vars_declaration"}, + {ast_global_var_declaration, "ast_global_var_declaration"}, + {ast_constant_declaration, "ast_constant_declaration"}, {ast_tolk_required_version, "ast_tolk_required_version"}, - {ast_import_statement, "ast_import_statement"}, + {ast_import_directive, "ast_import_directive"}, {ast_tolk_file, "ast_tolk_file"}, }; @@ -115,6 +124,13 @@ class ASTStringifier final : public ASTVisitor { switch (v->type) { case ast_identifier: return static_cast(v->as()->name); + case ast_reference: { + std::string result(v->as()->get_name()); + if (v->as()->has_instantiationTs()) { + result += specific_str(v->as()->get_instantiationTs()); + } + return result; + } case ast_int_const: return static_cast(v->as()->orig_str); case ast_string_const: @@ -123,24 +139,40 @@ class ASTStringifier final : public ASTVisitor { } else { return "\"" + static_cast(v->as()->str_val) + "\""; } - case ast_function_call: { - if (auto v_lhs = v->as()->get_called_f()->try_as()) { - return static_cast(v_lhs->name) + "()"; + case ast_bool_const: + return v->as()->bool_val ? "true" : "false"; + case ast_dot_access: { + std::string result = "." + static_cast(v->as()->get_field_name()); + if (v->as()->has_instantiationTs()) { + result += specific_str(v->as()->get_instantiationTs()); } - return {}; + return result; + } + case ast_function_call: { + std::string inner = specific_str(v->as()->get_callee()); + if (int n_args = v->as()->get_num_args()) { + return inner + "(..." + std::to_string(n_args) + ")"; + } + return inner + "()"; } - case ast_dot_method_call: - return static_cast(v->as()->method_name); case ast_global_var_declaration: return static_cast(v->as()->get_identifier()->name); case ast_constant_declaration: return static_cast(v->as()->get_identifier()->name); + case ast_assign: + return "="; + case ast_set_assign: + return static_cast(v->as()->operator_name) + "="; case ast_unary_operator: return static_cast(v->as()->operator_name); case ast_binary_operator: return static_cast(v->as()->operator_name); + case ast_cast_as_operator: + return v->as()->cast_to_type->as_human_readable(); case ast_sequence: return "↓" + std::to_string(v->as()->get_items().size()); + case ast_instantiationT_item: + return v->as()->substituted_type->as_human_readable(); case ast_if_statement: return v->as()->is_ifnot ? "ifnot" : ""; case ast_annotation: @@ -159,18 +191,27 @@ class ASTStringifier final : public ASTVisitor { } return "fun " + static_cast(v->as()->get_identifier()->name) + "(" + param_names + ")"; } - case ast_local_var: { + case ast_local_var_lhs: { std::ostringstream os; - os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); - if (auto v_ident = v->as()->get_identifier()->try_as()) { - return static_cast(v_ident->name) + ":" + os.str(); + os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); + if (v->as()->get_name().empty()) { + return "_: " + os.str(); } - return "_: " + os.str(); + return static_cast(v->as()->get_name()) + ":" + os.str(); + } + case ast_instantiationT_list: { + std::string result = "<"; + for (AnyV item : v->as()->get_items()) { + if (result.size() > 1) + result += ","; + result += item->as()->substituted_type->as_human_readable(); + } + return result + ">"; } case ast_tolk_required_version: return static_cast(v->as()->semver); - case ast_import_statement: - return static_cast(v->as()->get_file_leaf()->str_val); + case ast_import_directive: + return static_cast(v->as()->get_file_leaf()->str_val); case ast_tolk_file: return v->as()->file->rel_filename; default: @@ -203,47 +244,55 @@ public: void visit(AnyV v) override { switch (v->type) { - case ast_empty_statement: return handle_vertex(v->as()); + case ast_identifier: return handle_vertex(v->as()); + // expressions case ast_empty_expression: return handle_vertex(v->as()); case ast_parenthesized_expression: return handle_vertex(v->as()); case ast_tensor: return handle_vertex(v->as()); - case ast_tensor_square: return handle_vertex(v->as()); - case ast_identifier: return handle_vertex(v->as()); + case ast_typed_tuple: return handle_vertex(v->as()); + case ast_reference: return handle_vertex(v->as()); + case ast_local_var_lhs: return handle_vertex(v->as()); + case ast_local_vars_declaration: return handle_vertex(v->as()); case ast_int_const: return handle_vertex(v->as()); case ast_string_const: return handle_vertex(v->as()); case ast_bool_const: return handle_vertex(v->as()); case ast_null_keyword: return handle_vertex(v->as()); - case ast_self_keyword: return handle_vertex(v->as()); case ast_argument: return handle_vertex(v->as()); case ast_argument_list: return handle_vertex(v->as()); + case ast_dot_access: return handle_vertex(v->as()); case ast_function_call: return handle_vertex(v->as()); - case ast_dot_method_call: return handle_vertex(v->as()); - case ast_global_var_declaration: return handle_vertex(v->as()); - case ast_constant_declaration: return handle_vertex(v->as()); case ast_underscore: return handle_vertex(v->as()); + case ast_assign: return handle_vertex(v->as()); + case ast_set_assign: return handle_vertex(v->as()); case ast_unary_operator: return handle_vertex(v->as()); case ast_binary_operator: return handle_vertex(v->as()); case ast_ternary_operator: return handle_vertex(v->as()); - case ast_return_statement: return handle_vertex(v->as()); + case ast_cast_as_operator: return handle_vertex(v->as()); + // statements + case ast_empty_statement: return handle_vertex(v->as()); case ast_sequence: return handle_vertex(v->as()); + case ast_return_statement: return handle_vertex(v->as()); + case ast_if_statement: return handle_vertex(v->as()); case ast_repeat_statement: return handle_vertex(v->as()); case ast_while_statement: return handle_vertex(v->as()); case ast_do_while_statement: return handle_vertex(v->as()); case ast_throw_statement: return handle_vertex(v->as()); case ast_assert_statement: return handle_vertex(v->as()); case ast_try_catch_statement: return handle_vertex(v->as()); - case ast_if_statement: return handle_vertex(v->as()); + case ast_asm_body: return handle_vertex(v->as()); + // other case ast_genericsT_item: return handle_vertex(v->as()); case ast_genericsT_list: return handle_vertex(v->as()); + case ast_instantiationT_item: return handle_vertex(v->as()); + case ast_instantiationT_list: return handle_vertex(v->as()); case ast_parameter: return handle_vertex(v->as()); case ast_parameter_list: return handle_vertex(v->as()); - case ast_asm_body: return handle_vertex(v->as()); case ast_annotation: return handle_vertex(v->as()); case ast_function_declaration: return handle_vertex(v->as()); - case ast_local_var: return handle_vertex(v->as()); - case ast_local_vars_declaration: return handle_vertex(v->as()); + case ast_global_var_declaration: return handle_vertex(v->as()); + case ast_constant_declaration: return handle_vertex(v->as()); case ast_tolk_required_version: return handle_vertex(v->as()); - case ast_import_statement: return handle_vertex(v->as()); + case ast_import_directive: return handle_vertex(v->as()); case ast_tolk_file: return handle_vertex(v->as()); default: throw UnexpectedASTNodeType(v, "ASTStringifier::visit"); diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index a67f6800..a54cb13b 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -86,92 +86,103 @@ class ASTVisitorFunctionBody : public ASTVisitor { protected: using parent = ASTVisitorFunctionBody; - virtual void visit(V v) { return visit_children(v); } + // expressions virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + // statements + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } void visit(AnyV v) final { switch (v->type) { - case ast_empty_statement: return visit(v->as()); + // expressions case ast_empty_expression: return visit(v->as()); case ast_parenthesized_expression: return visit(v->as()); case ast_tensor: return visit(v->as()); - case ast_tensor_square: return visit(v->as()); - case ast_identifier: return visit(v->as()); + case ast_typed_tuple: return visit(v->as()); + case ast_reference: return visit(v->as()); + case ast_local_var_lhs: return visit(v->as()); + case ast_local_vars_declaration: return visit(v->as()); case ast_int_const: return visit(v->as()); case ast_string_const: return visit(v->as()); case ast_bool_const: return visit(v->as()); case ast_null_keyword: return visit(v->as()); - case ast_self_keyword: return visit(v->as()); case ast_argument: return visit(v->as()); case ast_argument_list: return visit(v->as()); + case ast_dot_access: return visit(v->as()); case ast_function_call: return visit(v->as()); - case ast_dot_method_call: return visit(v->as()); case ast_underscore: return visit(v->as()); + case ast_assign: return visit(v->as()); + case ast_set_assign: return visit(v->as()); case ast_unary_operator: return visit(v->as()); case ast_binary_operator: return visit(v->as()); case ast_ternary_operator: return visit(v->as()); - case ast_return_statement: return visit(v->as()); + case ast_cast_as_operator: return visit(v->as()); + // statements + case ast_empty_statement: return visit(v->as()); case ast_sequence: return visit(v->as()); + case ast_return_statement: return visit(v->as()); + case ast_if_statement: return visit(v->as()); case ast_repeat_statement: return visit(v->as()); case ast_while_statement: return visit(v->as()); case ast_do_while_statement: return visit(v->as()); case ast_throw_statement: return visit(v->as()); case ast_assert_statement: return visit(v->as()); case ast_try_catch_statement: return visit(v->as()); - case ast_if_statement: return visit(v->as()); - case ast_local_var: return visit(v->as()); - case ast_local_vars_declaration: return visit(v->as()); - case ast_asm_body: return visit(v->as()); +#ifdef TOLK_DEBUG + case ast_asm_body: + throw UnexpectedASTNodeType(v, "ASTVisitor; forgot to filter out asm functions in should_visit_function()?"); +#endif default: throw UnexpectedASTNodeType(v, "ASTVisitorFunctionBody::visit"); } } public: - virtual void start_visiting_function(V v_function) { + virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + + virtual void start_visiting_function(const FunctionData* fun_ref, V v_function) { visit(v_function->get_body()); } }; + +const std::vector& get_all_not_builtin_functions(); + template -void visit_ast_of_all_functions(const AllSrcFiles& all_files) { - for (const SrcFile* file : all_files) { - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { - if (auto v_func = v->try_as()) { - if (v_func->is_regular_function()) { - BodyVisitorT visitor; - visitor.start_visiting_function(v_func); - } - } +void visit_ast_of_all_functions() { + BodyVisitorT visitor; + for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + if (visitor.should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); } } } diff --git a/tolk/ast.cpp b/tolk/ast.cpp index 4e78b013..092260ff 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -15,8 +15,9 @@ along with TON Blockchain Library. If not, see . */ #include "ast.h" +#ifdef TOLK_DEBUG #include "ast-stringifier.h" -#include +#endif namespace tolk { @@ -104,7 +105,7 @@ int Vertex::get_mutate_params_count() const { // Therefore, there is a guarantee, that all AST mutations are done via these methods, // easily searched by usages, and there is no another way to modify any other field. -void ASTNodeExpressionBase::assign_inferred_type(TypeExpr* type) { +void ASTNodeExpressionBase::assign_inferred_type(TypePtr type) { this->inferred_type = type; } @@ -116,43 +117,79 @@ void ASTNodeExpressionBase::assign_lvalue_true() { this->is_lvalue = true; } -void Vertex::assign_sym(const Symbol* sym) { +void Vertex::assign_sym(const Symbol* sym) { this->sym = sym; } -void Vertex::assign_param_ref(const LocalVarData* self_param) { - this->param_ref = self_param; -} - void Vertex::assign_fun_ref(const FunctionData* fun_ref) { this->fun_maybe = fun_ref; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { - this->fun_ref = fun_ref; +void Vertex::assign_resolved_type(TypePtr cast_to_type) { + this->cast_to_type = cast_to_type; } void Vertex::assign_var_ref(const GlobalVarData* var_ref) { this->var_ref = var_ref; } +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + void Vertex::assign_const_ref(const GlobalConstData* const_ref) { this->const_ref = const_ref; } +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_resolved_type(TypePtr substituted_type) { + this->substituted_type = substituted_type; +} + void Vertex::assign_param_ref(const LocalVarData* param_ref) { this->param_ref = param_ref; } +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_target(const DotTarget& target) { + this->target = target; +} + void Vertex::assign_fun_ref(const FunctionData* fun_ref) { this->fun_ref = fun_ref; } -void Vertex::assign_var_ref(const Symbol* var_ref) { - this->var_maybe = var_ref; +void Vertex::assign_resolved_type(TypePtr declared_return_type) { + this->declared_return_type = declared_return_type; } -void Vertex::assign_src_file(const SrcFile* file) { +void Vertex::assign_var_ref(const LocalVarData* var_ref) { + this->var_ref = var_ref; +} + +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_src_file(const SrcFile* file) { this->file = file; } diff --git a/tolk/ast.h b/tolk/ast.h index ccc4ac58..b90507e7 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -20,7 +20,6 @@ #include "fwd-declarations.h" #include "platform-utils.h" #include "src-file.h" -#include "type-expr.h" #include "lexer.h" #include "symtable.h" @@ -65,47 +64,55 @@ namespace tolk { enum ASTNodeType { - ast_empty_statement, + ast_identifier, + // expressions ast_empty_expression, ast_parenthesized_expression, ast_tensor, - ast_tensor_square, - ast_identifier, + ast_typed_tuple, + ast_reference, + ast_local_var_lhs, + ast_local_vars_declaration, ast_int_const, ast_string_const, ast_bool_const, ast_null_keyword, - ast_self_keyword, ast_argument, ast_argument_list, + ast_dot_access, ast_function_call, - ast_dot_method_call, - ast_global_var_declaration, - ast_constant_declaration, ast_underscore, + ast_assign, + ast_set_assign, ast_unary_operator, ast_binary_operator, ast_ternary_operator, - ast_return_statement, + ast_cast_as_operator, + // statements + ast_empty_statement, ast_sequence, + ast_return_statement, + ast_if_statement, ast_repeat_statement, ast_while_statement, ast_do_while_statement, ast_throw_statement, ast_assert_statement, ast_try_catch_statement, - ast_if_statement, + ast_asm_body, + // other ast_genericsT_item, ast_genericsT_list, + ast_instantiationT_item, + ast_instantiationT_list, ast_parameter, ast_parameter_list, - ast_asm_body, ast_annotation, ast_function_declaration, - ast_local_var, - ast_local_vars_declaration, + ast_global_var_declaration, + ast_constant_declaration, ast_tolk_required_version, - ast_import_statement, + ast_import_directive, ast_tolk_file, }; @@ -144,6 +151,7 @@ struct ASTNodeBase { const SrcLocation loc; ASTNodeBase(ASTNodeType type, SrcLocation loc) : type(type), loc(loc) {} + ASTNodeBase(const ASTNodeBase&) = delete; template V as() const { @@ -171,12 +179,14 @@ struct ASTNodeBase { }; struct ASTNodeExpressionBase : ASTNodeBase { - TypeExpr* inferred_type = nullptr; // todo make it const + friend class ASTDuplicatorFunction; + + TypePtr inferred_type = nullptr; bool is_rvalue: 1 = false; bool is_lvalue: 1 = false; ASTNodeExpressionBase* mutate() const { return const_cast(this); } - void assign_inferred_type(TypeExpr* type); + void assign_inferred_type(TypePtr type); void assign_rvalue_true(); void assign_lvalue_true(); @@ -226,6 +236,8 @@ struct ASTExprVararg : ASTNodeExpressionBase { protected: std::vector children; + AnyExprV child(int i) const { return children.at(i); } + ASTExprVararg(ASTNodeType type, SrcLocation loc, std::vector children) : ASTNodeExpressionBase(type, loc), children(std::move(children)) {} @@ -254,7 +266,6 @@ struct ASTStatementVararg : ASTNodeStatementBase { protected: std::vector children; - AnyV child(int i) const { return children.at(i); } AnyExprV child_as_expr(int i) const { return reinterpret_cast(children.at(i)); } ASTStatementVararg(ASTNodeType type, SrcLocation loc, std::vector children) @@ -281,7 +292,7 @@ struct ASTOtherVararg : ASTNodeBase { protected: std::vector children; - AnyV child(int i) const { return children.at(i); } + AnyExprV child_as_expr(int i) const { return reinterpret_cast(children.at(i)); } ASTOtherVararg(ASTNodeType type, SrcLocation loc, std::vector children) : ASTNodeBase(type, loc), children(std::move(children)) {} @@ -291,21 +302,42 @@ public: bool empty() const { return children.empty(); } }; -// --------------------------------------------------------- template<> -struct Vertex final : ASTStatementVararg { - explicit Vertex(SrcLocation loc) - : ASTStatementVararg(ast_empty_statement, loc, {}) {} +// ast_identifier is "a name" in AST structure +// it's NOT a standalone expression, it's "implementation details" of other AST vertices +// example: `var x = 5` then "x" is identifier (inside local var declaration) +// example: `global g: int` then "g" is identifier +// example: `someF` is a reference, which contains identifier +// example: `someF` is a reference which contains identifier and generics instantiation +// example: `fun f()` then "f" is identifier, "" is a generics declaration +struct Vertex final : ASTOtherLeaf { + std::string_view name; // empty for underscore + + Vertex(SrcLocation loc, std::string_view name) + : ASTOtherLeaf(ast_identifier, loc) + , name(name) {} }; + +// +// --------------------------------------------------------- +// expressions +// + + template<> +// ast_empty_expression is "nothing" in context of expression, it has "unknown" type +// example: `throw 123;` then "throw arg" is empty expression (opposed to `throw (123, arg)`) struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) : ASTExprLeaf(ast_empty_expression, loc) {} }; + template<> +// ast_parenthesized_expression is something surrounded embraced by (parenthesis) +// example: `(1)`, `((f()))` (two nested) struct Vertex final : ASTExprUnary { AnyExprV get_expr() const { return child; } @@ -314,37 +346,101 @@ struct Vertex final : ASTExprUnary { }; template<> +// ast_tensor is a set of expressions embraced by (parenthesis) +// in most languages, it's called "tuple", but in TVM, "tuple" is a TVM primitive, that's why "tensor" +// example: `(1, 2)`, `(1, (2, 3))` (nested), `()` (empty tensor) +// note, that `(1)` is not a tensor, it's a parenthesized expression +// a tensor of N elements occupies N slots on a stack (opposed to TVM tuple primitive, 1 slot) struct Vertex final : ASTExprVararg { const std::vector& get_items() const { return children; } - AnyExprV get_item(int i) const { return children.at(i); } + AnyExprV get_item(int i) const { return child(i); } Vertex(SrcLocation loc, std::vector items) : ASTExprVararg(ast_tensor, loc, std::move(items)) {} }; template<> -struct Vertex final : ASTExprVararg { +// ast_typed_tuple is a set of expressions in [square brackets] +// in TVM, it's a TVM tuple, that occupies 1 slot, but the compiler knows its "typed structure" +// example: `[1, x]`, `[[0]]` (nested) +// typed tuples can be assigned to N variables, like `[one, _, three] = [1,2,3]` +struct Vertex final : ASTExprVararg { const std::vector& get_items() const { return children; } - AnyExprV get_item(int i) const { return children.at(i); } + AnyExprV get_item(int i) const { return child(i); } Vertex(SrcLocation loc, std::vector items) - : ASTExprVararg(ast_tensor_square, loc, std::move(items)) {} + : ASTExprVararg(ast_typed_tuple, loc, std::move(items)) {} }; template<> -struct Vertex final : ASTExprLeaf { - const Symbol* sym = nullptr; // always filled (after resolved); points to local / global / function / constant - std::string_view name; +// ast_reference is "something that references a symbol" +// examples: `x` / `someF` / `someF` +// it's a leaf expression from traversing point of view, but actually, has children (not expressions) +// note, that both `someF()` and `someF()` are function calls, where a callee is just a reference +struct Vertex final : ASTExprLeaf { +private: + V identifier; // its name, `x` / `someF` + V instantiationTs; // not null if ``, otherwise nullptr + +public: + const Symbol* sym = nullptr; // filled on resolve or type inferring; points to local / global / function / constant + + auto get_identifier() const { return identifier; } + bool has_instantiationTs() const { return instantiationTs != nullptr; } + auto get_instantiationTs() const { return instantiationTs; } + std::string_view get_name() const { return identifier->name; } Vertex* mutate() const { return const_cast(this); } void assign_sym(const Symbol* sym); - Vertex(SrcLocation loc, std::string_view name) - : ASTExprLeaf(ast_identifier, loc) - , name(name) {} + Vertex(SrcLocation loc, V name_identifier, V instantiationTs) + : ASTExprLeaf(ast_reference, loc) + , identifier(name_identifier), instantiationTs(instantiationTs) {} }; template<> +// ast_local_var_lhs is one variable inside `var` declaration +// example: `var x = 0;` then "x" is local var lhs +// example: `val (x: int, [y redef], _) = rhs` then "x" and "y" and "_" are +// it's a leaf from expression's point of view, though technically has an "identifier" child +struct Vertex final : ASTExprLeaf { +private: + V identifier; + +public: + const LocalVarData* var_ref = nullptr; // filled on resolve identifiers; for `redef` points to declared above; for underscore, name is empty + TypePtr declared_type; // not null for `var x: int = rhs`, otherwise nullptr + bool is_immutable; // declared via 'val', not 'var' + bool marked_as_redef; // var (existing_var redef, new_var: int) = ... + + V get_identifier() const { return identifier; } + std::string_view get_name() const { return identifier->name; } // empty for underscore + + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const LocalVarData* var_ref); + void assign_resolved_type(TypePtr declared_type); + + Vertex(SrcLocation loc, V identifier, TypePtr declared_type, bool is_immutable, bool marked_as_redef) + : ASTExprLeaf(ast_local_var_lhs, loc) + , identifier(identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} +}; + +template<> +// ast_local_vars_declaration is an expression declaring local variables on the left side of assignment +// examples: see above +// for `var (x, [y])` its expr is "tensor (local var, typed tuple (local var))" +// for assignment `var x = 5`, this node is `var x`, lhs of assignment +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } // ast_local_var_lhs / ast_tensor / ast_typed_tuple + + Vertex(SrcLocation loc, AnyExprV expr) + : ASTExprUnary(ast_local_vars_declaration, loc, expr) {} +}; + +template<> +// ast_int_const is an integer literal +// examples: `0` / `0xFF` +// note, that `-1` is unary minus of `1` int const struct Vertex final : ASTExprLeaf { td::RefInt256 intval; // parsed value, 255 for "0xFF" std::string_view orig_str; // original "0xFF"; empty for nodes generated by compiler (e.g. in constant folding) @@ -356,6 +452,10 @@ struct Vertex final : ASTExprLeaf { }; template<> +// ast_string_const is a string literal in double quotes or """ when multiline +// examples: "asdf" / "Ef8zMz..."a / "to_calc_crc32_from"c +// an optional modifier specifies how a string is parsed (probably, like an integer) +// note, that TVM doesn't have strings, it has only slices, so "hello" has type slice struct Vertex final : ASTExprLeaf { std::string_view str_val; char modifier; @@ -375,6 +475,7 @@ struct Vertex final : ASTExprLeaf { }; template<> +// ast_bool_const is either `true` or `false` struct Vertex final : ASTExprLeaf { bool bool_val; @@ -384,25 +485,20 @@ struct Vertex final : ASTExprLeaf { }; template<> +// ast_null_keyword is the `null` literal +// it should be handled with care; for instance, `null` takes special place in the type system struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) : ASTExprLeaf(ast_null_keyword, loc) {} }; template<> -struct Vertex final : ASTExprLeaf { - const LocalVarData* param_ref = nullptr; // filled after resolve identifiers, points to `self` parameter - - Vertex* mutate() const { return const_cast(this); } - void assign_param_ref(const LocalVarData* self_param); - - explicit Vertex(SrcLocation loc) - : ASTExprLeaf(ast_self_keyword, loc) {} -}; - -template<> +// ast_argument is an element of an argument list of a function/method call +// example: `f(1, x)` has 2 arguments, `t.tupleFirst()` has no arguments (though `t` is passed as `self`) +// example: `f(mutate arg)` has 1 argument with `passed_as_mutate` flag +// (without `mutate` keyword, the entity "argument" could be replaced just by "any expression") struct Vertex final : ASTExprUnary { - bool passed_as_mutate; // when called `f(mutate arg)`, not `f(arg)` + bool passed_as_mutate; AnyExprV get_expr() const { return child; } @@ -412,19 +508,57 @@ struct Vertex final : ASTExprUnary { }; template<> +// ast_argument_list contains N arguments of a function/method call struct Vertex final : ASTExprVararg { const std::vector& get_arguments() const { return children; } - auto get_arg(int i) const { return children.at(i)->as(); } + auto get_arg(int i) const { return child(i)->as(); } Vertex(SrcLocation loc, std::vector arguments) : ASTExprVararg(ast_argument_list, loc, std::move(arguments)) {} }; template<> -struct Vertex final : ASTExprBinary { - const FunctionData* fun_maybe = nullptr; // filled after resolve; remains nullptr for `localVar()` / `getF()()` +// ast_dot_access is "object before dot, identifier + optional after dot" +// examples: `tensorVar.0` / `obj.field` / `getObj().method` / `t.tupleFirst` +// from traversing point of view, it's an unary expression: only obj is expression, field name is not +// note, that `obj.method()` is a function call with "dot access `obj.method`" callee +struct Vertex final : ASTExprUnary { +private: + V identifier; // `0` / `field` / `method` + V instantiationTs; // not null if ``, otherwise nullptr - AnyExprV get_called_f() const { return lhs; } +public: + + typedef const FunctionData* DotTarget; // for `t.tupleAt` target is `tupleAt` global function + DotTarget target = nullptr; // filled at type inferring + + AnyExprV get_obj() const { return child; } + auto get_identifier() const { return identifier; } + bool has_instantiationTs() const { return instantiationTs != nullptr; } + auto get_instantiationTs() const { return instantiationTs; } + std::string_view get_field_name() const { return identifier->name; } + + Vertex* mutate() const { return const_cast(this); } + void assign_target(const DotTarget& target); + + Vertex(SrcLocation loc, AnyExprV obj, V identifier, V instantiationTs) + : ASTExprUnary(ast_dot_access, loc, obj) + , identifier(identifier), instantiationTs(instantiationTs) {} +}; + +template<> +// ast_function_call is "calling some lhs with parenthesis", lhs is arbitrary expression (callee) +// example: `globalF()` then callee is reference +// example: `globalF()` then callee is reference (with instantiation Ts filled) +// example: `local_var()` then callee is reference (points to local var, filled at resolve identifiers) +// example: `getF()()` then callee is another func call (which type is TypeDataFunCallable) +// example: `obj.method()` then callee is dot access (resolved while type inferring) +struct Vertex final : ASTExprBinary { + const FunctionData* fun_maybe = nullptr; // filled while type inferring for `globalF()` / `obj.f()`; remains nullptr for `local_var()` / `getF()()` + + AnyExprV get_callee() const { return lhs; } + bool is_dot_call() const { return lhs->type == ast_dot_access; } + AnyExprV get_dot_obj() const { return lhs->as()->get_obj(); } auto get_arg_list() const { return rhs->as(); } int get_num_args() const { return rhs->as()->size(); } auto get_arg(int i) const { return rhs->as()->get_arg(i); } @@ -437,90 +571,79 @@ struct Vertex final : ASTExprBinary { }; template<> -struct Vertex final : ASTExprBinary { - const FunctionData* fun_ref = nullptr; // points to global function (after resolve) - std::string_view method_name; - - AnyExprV get_obj() const { return lhs; } - auto get_arg_list() const { return rhs->as(); } - int get_num_args() const { return rhs->as()->size(); } - auto get_arg(int i) const { return rhs->as()->get_arg(i); } - - Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); - - Vertex(SrcLocation loc, std::string_view method_name, AnyExprV lhs, V arguments) - : ASTExprBinary(ast_dot_method_call, loc, lhs, arguments) - , method_name(method_name) {} -}; - -template<> -struct Vertex final : ASTStatementUnary { - const GlobalVarData* var_ref = nullptr; // filled after register - TypeExpr* declared_type; - - auto get_identifier() const { return child->as(); } - - Vertex* mutate() const { return const_cast(this); } - void assign_var_ref(const GlobalVarData* var_ref); - - Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type) - : ASTStatementUnary(ast_global_var_declaration, loc, name_identifier) - , declared_type(declared_type) {} -}; - -template<> -struct Vertex final : ASTStatementVararg { - const GlobalConstData* const_ref = nullptr; // filled after register - TypeExpr* declared_type; // may be nullptr - - auto get_identifier() const { return child(0)->as(); } - AnyExprV get_init_value() const { return child_as_expr(1); } - - Vertex* mutate() const { return const_cast(this); } - void assign_const_ref(const GlobalConstData* const_ref); - - Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type, AnyExprV init_value) - : ASTStatementVararg(ast_constant_declaration, loc, {name_identifier, init_value}) - , declared_type(declared_type) {} -}; - -template<> +// ast_underscore represents `_` symbol used for left side of assignment +// example: `(cs, _) = cs.loadAndReturn()` +// though it's the only correct usage, using _ as rvalue like `var x = _;` is correct from AST point of view +// note, that for declaration `var _ = 1` underscore is a regular local var declared (with empty name) +// but for `_ = 1` (not declaration) it's underscore; it's because `var _:int` is also correct struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) : ASTExprLeaf(ast_underscore, loc) {} }; template<> +// ast_assign represents assignment "lhs = rhs" +// examples: `a = 4` / `var a = 4` / `(cs, b, mode) = rhs` / `f() = g()` +// note, that `a = 4` lhs is ast_reference, `var a = 4` lhs is ast_local_vars_declaration +struct Vertex final : ASTExprBinary { + AnyExprV get_lhs() const { return lhs; } + AnyExprV get_rhs() const { return rhs; } + + explicit Vertex(SrcLocation loc, AnyExprV lhs, AnyExprV rhs) + : ASTExprBinary(ast_assign, loc, lhs, rhs) {} +}; + +template<> +// ast_set_assign represents assignment-and-set operation "lhs = rhs" +// examples: `a += 4` / `b <<= c` +struct Vertex final : ASTExprBinary { + const FunctionData* fun_ref = nullptr; // filled at type inferring, points to `_+_` built-in for += + std::string_view operator_name; // without equal sign, "+" for operator += + TokenType tok; // tok_set_* + + AnyExprV get_lhs() const { return lhs; } + AnyExprV get_rhs() const { return rhs; } + + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) + : ASTExprBinary(ast_set_assign, loc, lhs, rhs) + , operator_name(operator_name), tok(tok) {} +}; + +template<> +// ast_unary_operator is "some operator over one expression" +// examples: `-1` / `~found` struct Vertex final : ASTExprUnary { + const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; AnyExprV get_rhs() const { return child; } + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV rhs) : ASTExprUnary(ast_unary_operator, loc, rhs) , operator_name(operator_name), tok(tok) {} }; template<> +// ast_binary_operator is "some operator over two expressions" +// examples: `a + b` / `x & true` / `(a, b) << g()` +// note, that `a = b` is NOT a binary operator, it's ast_assign, also `a += b`, it's ast_set_assign struct Vertex final : ASTExprBinary { + const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; AnyExprV get_lhs() const { return lhs; } AnyExprV get_rhs() const { return rhs; } - bool is_set_assign() const { - TokenType t = tok; - return t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || - t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || - t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor; - } - - bool is_assign() const { - return tok == tok_assign; - } + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) : ASTExprBinary(ast_binary_operator, loc, lhs, rhs) @@ -528,24 +651,53 @@ struct Vertex final : ASTExprBinary { }; template<> +// ast_ternary_operator is a traditional ternary construction +// example: `cond ? a : b` struct Vertex final : ASTExprVararg { - AnyExprV get_cond() const { return children.at(0); } - AnyExprV get_when_true() const { return children.at(1); } - AnyExprV get_when_false() const { return children.at(2); } + AnyExprV get_cond() const { return child(0); } + AnyExprV get_when_true() const { return child(1); } + AnyExprV get_when_false() const { return child(2); } Vertex(SrcLocation loc, AnyExprV cond, AnyExprV when_true, AnyExprV when_false) : ASTExprVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} }; template<> -struct Vertex : ASTStatementUnary { - AnyExprV get_return_value() const { return child_as_expr(); } +// ast_cast_as_operator is explicit casting with "as" keyword +// examples: `arg as int` / `null as cell` / `t.tupleAt(2) as slice` +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } - Vertex(SrcLocation loc, AnyExprV child) - : ASTStatementUnary(ast_return_statement, loc, child) {} + TypePtr cast_to_type; + + Vertex* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr cast_to_type); + + Vertex(SrcLocation loc, AnyExprV expr, TypePtr cast_to_type) + : ASTExprUnary(ast_cast_as_operator, loc, expr) + , cast_to_type(cast_to_type) {} +}; + + +// +// --------------------------------------------------------- +// statements +// + + +template<> +// ast_empty_statement is very similar to "empty sequence" but has a special treatment +// example: `;` (just semicolon) +// example: body of `builtin` function is empty statement (not a zero sequence) +struct Vertex final : ASTStatementVararg { + explicit Vertex(SrcLocation loc) + : ASTStatementVararg(ast_empty_statement, loc, {}) {} }; template<> +// ast_sequence is "some sequence of statements" +// example: function body is a sequence +// example: do while body is a sequence struct Vertex final : ASTStatementVararg { SrcLocation loc_end; @@ -558,26 +710,61 @@ struct Vertex final : ASTStatementVararg { }; template<> +// ast_return_statement is "return something from a function" +// examples: `return a` / `return any_expr()()` / `return;` +// note, that for `return;` (without a value, meaning "void"), in AST, it's stored as empty expression +struct Vertex : ASTStatementUnary { + AnyExprV get_return_value() const { return child_as_expr(); } + bool has_return_value() const { return child->type != ast_empty_expression; } + + Vertex(SrcLocation loc, AnyExprV child) + : ASTStatementUnary(ast_return_statement, loc, child) {} +}; + +template<> +// ast_if_statement is a traditional if statement, probably followed by an else branch +// examples: `if (cond) { ... } else { ... }` / `if (cond) { ... }` +// when else branch is missing, it's stored as empty statement +// for "else if", it's just "if statement" inside a sequence of else branch +struct Vertex final : ASTStatementVararg { + bool is_ifnot; // if(!cond), to generate more optimal fift code + + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_if_body() const { return children.at(1)->as(); } + auto get_else_body() const { return children.at(2)->as(); } // always exists (when else omitted, it's empty) + + Vertex(SrcLocation loc, bool is_ifnot, AnyExprV cond, V if_body, V else_body) + : ASTStatementVararg(ast_if_statement, loc, {cond, if_body, else_body}) + , is_ifnot(is_ifnot) {} +}; + +template<> +// ast_repeat_statement is "repeat something N times" +// example: `repeat (10) { ... }` struct Vertex final : ASTStatementVararg { AnyExprV get_cond() const { return child_as_expr(0); } - auto get_body() const { return child(1)->as(); } + auto get_body() const { return children.at(1)->as(); } Vertex(SrcLocation loc, AnyExprV cond, V body) : ASTStatementVararg(ast_repeat_statement, loc, {cond, body}) {} }; template<> +// ast_while_statement is a standard "while" loop +// example: `while (x > 0) { ... }` struct Vertex final : ASTStatementVararg { AnyExprV get_cond() const { return child_as_expr(0); } - auto get_body() const { return child(1)->as(); } + auto get_body() const { return children.at(1)->as(); } Vertex(SrcLocation loc, AnyExprV cond, V body) : ASTStatementVararg(ast_while_statement, loc, {cond, body}) {} }; template<> +// ast_do_while_statement is a standard "do while" loop +// example: `do { ... } while (x > 0);` struct Vertex final : ASTStatementVararg { - auto get_body() const { return child(0)->as(); } + auto get_body() const { return children.at(0)->as(); } AnyExprV get_cond() const { return child_as_expr(1); } Vertex(SrcLocation loc, V body, AnyExprV cond) @@ -585,16 +772,21 @@ struct Vertex final : ASTStatementVararg { }; template<> +// ast_throw_statement is throwing an exception, it accepts excNo and optional arg +// examples: `throw 10` / `throw (ERR_LOW_BALANCE)` / `throw (1001, incomingAddr)` +// when thrown arg is missing, it's stored as empty expression struct Vertex final : ASTStatementVararg { AnyExprV get_thrown_code() const { return child_as_expr(0); } - AnyExprV get_thrown_arg() const { return child_as_expr(1); } // may be ast_empty bool has_thrown_arg() const { return child_as_expr(1)->type != ast_empty_expression; } + AnyExprV get_thrown_arg() const { return child_as_expr(1); } Vertex(SrcLocation loc, AnyExprV thrown_code, AnyExprV thrown_arg) : ASTStatementVararg(ast_throw_statement, loc, {thrown_code, thrown_arg}) {} }; template<> +// ast_assert_statement is "assert that cond is true, otherwise throw an exception" +// examples: `assert (balance > 0, ERR_ZERO_BALANCE)` / `assert (balance > 0) throw (ERR_ZERO_BALANCE)` struct Vertex final : ASTStatementVararg { AnyExprV get_cond() const { return child_as_expr(0); } AnyExprV get_thrown_code() const { return child_as_expr(1); } @@ -604,6 +796,10 @@ struct Vertex final : ASTStatementVararg { }; template<> +// ast_try_catch_statement is a standard try catch (finally block doesn't exist) +// example: `try { ... } catch (excNo) { ... }` +// there are two formal "arguments" of catch: excNo and arg, but both can be omitted +// when omitted, they are stored as underscores, so len of a catch tensor is always 2 struct Vertex final : ASTStatementVararg { auto get_try_body() const { return children.at(0)->as(); } auto get_catch_expr() const { return children.at(1)->as(); } // (excNo, arg), always len 2 @@ -614,29 +810,42 @@ struct Vertex final : ASTStatementVararg { }; template<> -struct Vertex final : ASTStatementVararg { - bool is_ifnot; // if(!cond), to generate more optimal fift code +// ast_asm_body is a body of `asm` function — a set of strings, and optionally stack order manipulations +// example: `fun skipMessageOp... asm "32 PUSHINT" "SDSKIPFIRST";` +// user can specify "arg order"; example: `fun store(self: builder, op: int) asm (op self)` then [1, 0] +// user can specify "ret order"; example: `fun modDiv... asm(-> 1 0) "DIVMOD";` then [1, 0] +struct Vertex final : ASTStatementVararg { + std::vector arg_order; + std::vector ret_order; - AnyExprV get_cond() const { return child_as_expr(0); } - auto get_if_body() const { return child(1)->as(); } - auto get_else_body() const { return child(2)->as(); } // always exists (when else omitted, it's empty) + const std::vector& get_asm_commands() const { return children; } // ast_string_const[] - Vertex(SrcLocation loc, bool is_ifnot, AnyExprV cond, V if_body, V else_body) - : ASTStatementVararg(ast_if_statement, loc, {cond, if_body, else_body}) - , is_ifnot(is_ifnot) {} + Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) + : ASTStatementVararg(ast_asm_body, loc, std::move(asm_commands)) + , arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} }; + +// +// --------------------------------------------------------- +// other +// + + template<> +// ast_genericsT_item is generics T at declaration +// example: `fun f` has a list of 2 generic Ts struct Vertex final : ASTOtherLeaf { - TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates std::string_view nameT; - Vertex(SrcLocation loc, TypeExpr* created_type, std::string_view nameT) + Vertex(SrcLocation loc, std::string_view nameT) : ASTOtherLeaf(ast_genericsT_item, loc) - , created_type(created_type), nameT(nameT) {} + , nameT(nameT) {} }; template<> +// ast_genericsT_list is a container for generics T at declaration +// example: see above struct Vertex final : ASTOtherVararg { std::vector get_items() const { return children; } auto get_item(int i) const { return children.at(i)->as(); } @@ -647,24 +856,55 @@ struct Vertex final : ASTOtherVararg { int lookup_idx(std::string_view nameT) const; }; + template<> +// ast_instantiationT_item is manual substitution of generic T used in code, mostly for func calls +// examples: `g()` / `t.tupleFirst()` / `f<(int, slice), builder>()` +struct Vertex final : ASTOtherLeaf { + TypePtr substituted_type; + + Vertex* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr substituted_type); + + Vertex(SrcLocation loc, TypePtr substituted_type) + : ASTOtherLeaf(ast_instantiationT_item, loc) + , substituted_type(substituted_type) {} +}; + +template<> +// ast_instantiationT_list is a container for generic T substitutions used in code +// examples: see above +struct Vertex final : ASTOtherVararg { + std::vector get_items() const { return children; } + auto get_item(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector instantiationTs) + : ASTOtherVararg(ast_instantiationT_list, loc, std::move(instantiationTs)) {} +}; + +template<> +// ast_parameter is a parameter of a function in its declaration +// example: `fun f(a: int, mutate b: slice)` has 2 parameters struct Vertex final : ASTOtherLeaf { - const LocalVarData* param_ref = nullptr; // filled after resolved + const LocalVarData* param_ref = nullptr; // filled on resolve identifiers std::string_view param_name; - TypeExpr* declared_type; - bool declared_as_mutate; // declared as `mutate param_name` + TypePtr declared_type; + bool declared_as_mutate; // declared as `mutate param_name` bool is_underscore() const { return param_name.empty(); } Vertex* mutate() const { return const_cast(this); } void assign_param_ref(const LocalVarData* param_ref); + void assign_resolved_type(TypePtr declared_type); - Vertex(SrcLocation loc, std::string_view param_name, TypeExpr* declared_type, bool declared_as_mutate) + Vertex(SrcLocation loc, std::string_view param_name, TypePtr declared_type, bool declared_as_mutate) : ASTOtherLeaf(ast_parameter, loc) , param_name(param_name), declared_type(declared_type), declared_as_mutate(declared_as_mutate) {} }; template<> +// ast_parameter_list is a container of parameters +// example: see above struct Vertex final : ASTOtherVararg { const std::vector& get_params() const { return children; } auto get_param(int i) const { return children.at(i)->as(); } @@ -678,22 +918,12 @@ struct Vertex final : ASTOtherVararg { }; template<> -struct Vertex final : ASTStatementVararg { - std::vector arg_order; - std::vector ret_order; - - const std::vector& get_asm_commands() const { return children; } // ast_string_const[] - - Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) - : ASTStatementVararg(ast_asm_body, loc, std::move(asm_commands)) - , arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} -}; - -template<> +// ast_annotation is @annotation above a declaration +// example: `@pure fun ...` struct Vertex final : ASTOtherVararg { AnnotationKind kind; - auto get_arg() const { return child(0)->as(); } + auto get_arg() const { return children.at(0)->as(); } static AnnotationKind parse_kind(std::string_view name); @@ -703,63 +933,79 @@ struct Vertex final : ASTOtherVararg { }; template<> -struct Vertex final : ASTExprUnary { - const Symbol* var_maybe = nullptr; // typically local var; can be global var if `var g_v redef`; remains nullptr for underscore - TypeExpr* declared_type; - bool is_immutable; // declared via 'val', not 'var' - bool marked_as_redef; // var (existing_var redef, new_var: int) = ... - - AnyExprV get_identifier() const { return child; } // ast_identifier / ast_underscore - - Vertex* mutate() const { return const_cast(this); } - void assign_var_ref(const Symbol* var_ref); - - Vertex(SrcLocation loc, AnyExprV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef) - : ASTExprUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} -}; - -template<> -struct Vertex final : ASTStatementVararg { - AnyExprV get_lhs() const { return child_as_expr(0); } // ast_local_var / ast_tensor / ast_tensor_square - AnyExprV get_assigned_val() const { return child_as_expr(1); } - - Vertex(SrcLocation loc, AnyExprV lhs, AnyExprV assigned_val) - : ASTStatementVararg(ast_local_vars_declaration, loc, {lhs, assigned_val}) {} -}; - -template<> +// ast_function_declaration is declaring a function/method +// methods are still global functions, just accepting "self" first parameter +// example: `fun f() { ... }` +// functions can be generic, `fun f(params) { ... }` +// their body is either sequence (regular code function), or `asm`, or `builtin` struct Vertex final : ASTOtherVararg { - auto get_identifier() const { return child(0)->as(); } - int get_num_params() const { return child(1)->as()->size(); } - auto get_param_list() const { return child(1)->as(); } - auto get_param(int i) const { return child(1)->as()->get_param(i); } - AnyV get_body() const { return child(2); } // ast_sequence / ast_asm_body + auto get_identifier() const { return children.at(0)->as(); } + int get_num_params() const { return children.at(1)->as()->size(); } + auto get_param_list() const { return children.at(1)->as(); } + auto get_param(int i) const { return children.at(1)->as()->get_param(i); } + AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body const FunctionData* fun_ref = nullptr; // filled after register - TypeExpr* ret_type = nullptr; - V genericsT_list = nullptr; - bool is_entrypoint = false; - bool marked_as_pure = false; - bool marked_as_builtin = false; - bool marked_as_get_method = false; - bool marked_as_inline = false; - bool marked_as_inline_ref = false; - bool accepts_self = false; - bool returns_self = false; - V method_id = nullptr; + TypePtr declared_return_type; // filled at ast parsing; if unspecified (nullptr), means "auto infer" + V genericsT_list; // for non-generics it's nullptr + td::RefInt256 method_id; // specified via @method_id annotation + int flags; // from enum in FunctionData bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } - bool is_regular_function() const { return children.at(2)->type == ast_sequence; } - bool is_builtin_function() const { return marked_as_builtin; } + bool is_code_function() const { return children.at(2)->type == ast_sequence; } + bool is_builtin_function() const { return children.at(2)->type == ast_empty_statement; } Vertex* mutate() const { return const_cast(this); } void assign_fun_ref(const FunctionData* fun_ref); + void assign_resolved_type(TypePtr declared_return_type); - Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body) - : ASTOtherVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {} + Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body, TypePtr declared_return_type, V genericsT_list, td::RefInt256 method_id, int flags) + : ASTOtherVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) + , declared_return_type(declared_return_type), genericsT_list(genericsT_list), method_id(std::move(method_id)), flags(flags) {} }; template<> +// ast_global_var_declaration is declaring a global var, outside a function +// example: `global g: int;` +// note, that globals don't have default values, since there is no single "entrypoint" for a contract +struct Vertex final : ASTOtherVararg { + const GlobalVarData* var_ref = nullptr; // filled after register + TypePtr declared_type; // filled always, typing globals is mandatory + + auto get_identifier() const { return children.at(0)->as(); } + + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const GlobalVarData* var_ref); + void assign_resolved_type(TypePtr declared_type); + + Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type) + : ASTOtherVararg(ast_global_var_declaration, loc, {name_identifier}) + , declared_type(declared_type) {} +}; + +template<> +// ast_constant_declaration is declaring a global constant, outside a function +// example: `const op = 0x123;` +struct Vertex final : ASTOtherVararg { + const GlobalConstData* const_ref = nullptr; // filled after register + TypePtr declared_type; // not null for `const op: int = ...` + + auto get_identifier() const { return children.at(0)->as(); } + AnyExprV get_init_value() const { return child_as_expr(1); } + + Vertex* mutate() const { return const_cast(this); } + void assign_const_ref(const GlobalConstData* const_ref); + void assign_resolved_type(TypePtr declared_type); + + Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type, AnyExprV init_value) + : ASTOtherVararg(ast_constant_declaration, loc, {name_identifier, init_value}) + , declared_type(declared_type) {} +}; + +template<> +// ast_tolk_required_version is a preamble fixating compiler's version at the top of the file +// example: `tolk 0.6` +// when compiler version mismatches, it means, that another compiler was earlier for that sources, a warning is emitted struct Vertex final : ASTOtherLeaf { std::string_view semver; @@ -769,21 +1015,27 @@ struct Vertex final : ASTOtherLeaf { }; template<> -struct Vertex final : ASTOtherVararg { - const SrcFile* file = nullptr; // assigned after imports have been resolved +// ast_import_directive is an import at the top of the file +// examples: `import "another.tolk"` / `import "@stdlib/tvm-dicts"` +struct Vertex final : ASTOtherVararg { + const SrcFile* file = nullptr; // assigned after imports have been resolved, just after parsing a file to ast - auto get_file_leaf() const { return child(0)->as(); } + auto get_file_leaf() const { return children.at(0)->as(); } - std::string get_file_name() const { return static_cast(child(0)->as()->str_val); } + std::string get_file_name() const { return static_cast(children.at(0)->as()->str_val); } Vertex* mutate() const { return const_cast(this); } void assign_src_file(const SrcFile* file); Vertex(SrcLocation loc, V file_name) - : ASTOtherVararg(ast_import_statement, loc, {file_name}) {} + : ASTOtherVararg(ast_import_directive, loc, {file_name}) {} }; template<> +// ast_tolk_file represents a whole parsed input .tolk file +// with functions, constants, etc. +// particularly, it contains imports that lead to loading other files +// a whole program consists of multiple parsed files, each of them has a parsed ast tree (stdlib is also parsed) struct Vertex final : ASTOtherVararg { const SrcFile* const file; diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 01b096da..68c3b998 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -16,6 +16,8 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" +#include "generics-helpers.h" namespace tolk { using namespace std::literals::string_literals; @@ -23,46 +25,33 @@ using namespace std::literals::string_literals; // given func_type = `(slice, int) -> slice` and func flags, create SymLocalVarOrParameter // currently (see at the bottom) parameters of built-in functions are unnamed: // built-in functions are created using a resulting type -static std::vector define_builtin_parameters(const TypeExpr* func_type, int func_flags) { +static std::vector define_builtin_parameters(const std::vector& params_types, int func_flags) { // `loadInt()`, `storeInt()`: they accept `self` and mutate it; no other options available in built-ins for now bool is_mutate_self = func_flags & FunctionData::flagHasMutateParams; - // func_type a map (params_type -> ret_type), probably surrounded by forall (internal representation of ) - TypeExpr* params_type = func_type->constr == TypeExpr::te_ForAll ? func_type->args[0]->args[0] : func_type->args[0]; std::vector parameters; + parameters.reserve(params_types.size()); - if (params_type->constr == TypeExpr::te_Tensor) { // multiple parameters: it's a tensor - parameters.reserve(params_type->args.size()); - for (int i = 0; i < static_cast(params_type->args.size()); ++i) { - LocalVarData p_sym("", {}, i, params_type->args[i]); - if (i == 0 && is_mutate_self) { - p_sym.flags |= LocalVarData::flagMutateParameter; - } - parameters.push_back(std::move(p_sym)); - } - } else { // single parameter - LocalVarData p_sym("", {}, 0, params_type); - if (is_mutate_self) { - p_sym.flags |= LocalVarData::flagMutateParameter; - } + for (int i = 0; i < static_cast(params_types.size()); ++i) { + LocalVarData p_sym("", {}, params_types[i], (i == 0 && is_mutate_self) * LocalVarData::flagMutateParameter, i); parameters.push_back(std::move(p_sym)); } return parameters; } -static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) { - auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func)); +static void define_builtin_func(const std::string& name, const std::vector& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const simple_compile_func_t& func, int flags) { + auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(func), nullptr); G.symtable.add_function(f_sym); } -static void define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) { - auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(make_simple_compile(macro))); +static void define_builtin_func(const std::string& name, const std::vector& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const AsmOp& macro, int flags) { + auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(make_simple_compile(macro)), nullptr); G.symtable.add_function(f_sym); } -static void define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags, +static void define_builtin_func(const std::string& name, const std::vector& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const simple_compile_func_t& func, int flags, std::initializer_list arg_order, std::initializer_list ret_order) { - auto* f_sym = new FunctionData(name, {}, func_type, define_builtin_parameters(func_type, flags), flags, new FunctionBodyBuiltin(func)); + auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(func), nullptr); f_sym->arg_order = arg_order; f_sym->ret_order = ret_order; G.symtable.add_function(f_sym); @@ -1080,128 +1069,169 @@ AsmOp compile_is_null(std::vector& res, std::vector& args, S void define_builtins() { using namespace std::placeholders; - TypeExpr* Unit = TypeExpr::new_unit(); - TypeExpr* Int = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* Slice = TypeExpr::new_atomic(TypeExpr::_Slice); - TypeExpr* Builder = TypeExpr::new_atomic(TypeExpr::_Builder); - TypeExpr* Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple); - TypeExpr* Int2 = TypeExpr::new_tensor({Int, Int}); - TypeExpr* Int3 = TypeExpr::new_tensor({Int, Int, Int}); - TypeExpr* TupleInt = TypeExpr::new_tensor({Tuple, Int}); - TypeExpr* SliceInt = TypeExpr::new_tensor({Slice, Int}); - TypeExpr* X = TypeExpr::new_var(0); - TypeExpr* arith_bin_op = TypeExpr::new_map(Int2, Int); - TypeExpr* arith_un_op = TypeExpr::new_map(Int, Int); - TypeExpr* impure_un_op = TypeExpr::new_map(Int, Unit); - TypeExpr* fetch_int_op_mutate = TypeExpr::new_map(SliceInt, SliceInt); - TypeExpr* prefetch_int_op = TypeExpr::new_map(SliceInt, Int); - TypeExpr* store_int_mutate = TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), TypeExpr::new_tensor({Builder, Unit})); - TypeExpr* fetch_slice_op_mutate = TypeExpr::new_map(SliceInt, TypeExpr::new_tensor({Slice, Slice})); - TypeExpr* prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice); - TypeExpr* throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); + TypePtr Unit = TypeDataVoid::create(); + TypePtr Int = TypeDataInt::create(); + TypePtr Slice = TypeDataSlice::create(); + TypePtr Builder = TypeDataBuilder::create(); + TypePtr Tuple = TypeDataTuple::create(); - define_builtin_func("_+_", arith_bin_op, compile_add, + std::vector itemsT; + itemsT.emplace_back("T"); + TypePtr typeT = TypeDataGenericT::create("T"); + const GenericsDeclaration* declGenericT = new GenericsDeclaration(std::move(itemsT)); + + std::vector ParamsInt1 = {Int}; + std::vector ParamsInt2 = {Int, Int}; + std::vector ParamsInt3 = {Int, Int, Int}; + std::vector ParamsSliceInt = {Slice, Int}; + + define_builtin_func("_+_", ParamsInt2, Int, nullptr, + compile_add, FunctionData::flagMarkedAsPure); - define_builtin_func("_-_", arith_bin_op, compile_sub, + define_builtin_func("_-_", ParamsInt2, Int, nullptr, + compile_sub, FunctionData::flagMarkedAsPure); - define_builtin_func("-_", arith_un_op, compile_unary_minus, + define_builtin_func("-_", ParamsInt1, Int, nullptr, + compile_unary_minus, FunctionData::flagMarkedAsPure); - define_builtin_func("+_", arith_un_op, compile_unary_plus, + define_builtin_func("+_", ParamsInt1, Int, nullptr, + compile_unary_plus, FunctionData::flagMarkedAsPure); - define_builtin_func("_*_", arith_bin_op, compile_mul, + define_builtin_func("_*_", ParamsInt2, Int, nullptr, + compile_mul, FunctionData::flagMarkedAsPure); - define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), + define_builtin_func("_/_", ParamsInt2, Int, nullptr, + std::bind(compile_div, _1, _2, _3, -1), FunctionData::flagMarkedAsPure); - define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0), + define_builtin_func("_~/_", ParamsInt2, Int, nullptr, + std::bind(compile_div, _1, _2, _3, 0), FunctionData::flagMarkedAsPure); - define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1), + define_builtin_func("_^/_", ParamsInt2, Int, nullptr, + std::bind(compile_div, _1, _2, _3, 1), FunctionData::flagMarkedAsPure); - define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), + define_builtin_func("_%_", ParamsInt2, Int, nullptr, + std::bind(compile_mod, _1, _2, _3, -1), FunctionData::flagMarkedAsPure); - define_builtin_func("_<<_", arith_bin_op, compile_lshift, + define_builtin_func("_<<_", ParamsInt2, Int, nullptr, + compile_lshift, FunctionData::flagMarkedAsPure); - define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), + define_builtin_func("_>>_", ParamsInt2, Int, nullptr, + std::bind(compile_rshift, _1, _2, _3, -1), FunctionData::flagMarkedAsPure); - define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0), + define_builtin_func("_~>>_", ParamsInt2, Int, nullptr, + std::bind(compile_rshift, _1, _2, _3, 0), FunctionData::flagMarkedAsPure); - define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1), + define_builtin_func("_^>>_", ParamsInt2, Int, nullptr, + std::bind(compile_rshift, _1, _2, _3, 1), FunctionData::flagMarkedAsPure); - define_builtin_func("!_", arith_un_op, compile_logical_not, + define_builtin_func("!_", ParamsInt1, Int, nullptr, + compile_logical_not, FunctionData::flagMarkedAsPure); - define_builtin_func("~_", arith_un_op, compile_bitwise_not, + define_builtin_func("~_", ParamsInt1, Int, nullptr, + compile_bitwise_not, FunctionData::flagMarkedAsPure); - define_builtin_func("_&_", arith_bin_op, compile_bitwise_and, + define_builtin_func("_&_", ParamsInt2, Int, nullptr, + compile_bitwise_and, FunctionData::flagMarkedAsPure); - define_builtin_func("_|_", arith_bin_op, compile_bitwise_or, + define_builtin_func("_|_", ParamsInt2, Int, nullptr, + compile_bitwise_or, FunctionData::flagMarkedAsPure); - define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor, + define_builtin_func("_^_", ParamsInt2, Int, nullptr, + compile_bitwise_xor, FunctionData::flagMarkedAsPure); - define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2), + define_builtin_func("_==_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 2), FunctionData::flagMarkedAsPure); - define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5), + define_builtin_func("_!=_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 5), FunctionData::flagMarkedAsPure); - define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4), + define_builtin_func("_<_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 4), FunctionData::flagMarkedAsPure); - define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1), + define_builtin_func("_>_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 1), FunctionData::flagMarkedAsPure); - define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6), + define_builtin_func("_<=_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 6), FunctionData::flagMarkedAsPure); - define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3), + define_builtin_func("_>=_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 3), FunctionData::flagMarkedAsPure); - define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7), + define_builtin_func("_<=>_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 7), FunctionData::flagMarkedAsPure); - define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1), + define_builtin_func("mulDivFloor", ParamsInt3, Int, nullptr, + std::bind(compile_muldiv, _1, _2, _3, -1), FunctionData::flagMarkedAsPure); - define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0), + define_builtin_func("mulDivRound", ParamsInt3, Int, nullptr, + std::bind(compile_muldiv, _1, _2, _3, 0), FunctionData::flagMarkedAsPure); - define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1), + define_builtin_func("mulDivCeil", ParamsInt3, Int, nullptr, + std::bind(compile_muldiv, _1, _2, _3, 1), FunctionData::flagMarkedAsPure); - define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2), + define_builtin_func("mulDivMod", ParamsInt3, TypeDataTensor::create({Int, Int}), nullptr, + AsmOp::Custom("MULDIVMOD", 3, 2), FunctionData::flagMarkedAsPure); - define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true), + define_builtin_func("__true", {}, Int, nullptr, /* AsmOp::Const("TRUE") */ + std::bind(compile_bool_const, _1, _2, true), FunctionData::flagMarkedAsPure); - define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false), + define_builtin_func("__false", {}, Int, nullptr, /* AsmOp::Const("FALSE") */ + std::bind(compile_bool_const, _1, _2, false), FunctionData::flagMarkedAsPure); - define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL"), + define_builtin_func("__null", {}, typeT, declGenericT, + AsmOp::Const("PUSHNULL"), FunctionData::flagMarkedAsPure); - define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null, + define_builtin_func("__isNull", {typeT}, Int, declGenericT, + compile_is_null, FunctionData::flagMarkedAsPure); - define_builtin_func("__throw", impure_un_op, compile_throw, + define_builtin_func("__throw", ParamsInt1, Unit, nullptr, + compile_throw, 0); - define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, + define_builtin_func("__throw_arg", {typeT, Int}, Unit, declGenericT, + compile_throw_arg, 0); - define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), compile_throw_if_unless, + define_builtin_func("__throw_if_unless", ParamsInt3, Unit, nullptr, + compile_throw_if_unless, 0); - define_builtin_func("loadInt", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, true), + define_builtin_func("loadInt", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, true, true), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, {}, {1, 0}); - define_builtin_func("loadUint", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, false), + define_builtin_func("loadUint", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, true, false), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, {}, {1, 0}); - define_builtin_func("loadBits", fetch_slice_op_mutate, std::bind(compile_fetch_slice, _1, _2, true), + define_builtin_func("loadBits", ParamsSliceInt, Slice, nullptr, + std::bind(compile_fetch_slice, _1, _2, true), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, {}, {1, 0}); - define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true), + define_builtin_func("preloadInt", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, false, true), FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); - define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false), + define_builtin_func("preloadUint", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, false, false), FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); - define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false), + define_builtin_func("preloadBits", ParamsSliceInt, Slice, nullptr, + std::bind(compile_fetch_slice, _1, _2, false), FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); - define_builtin_func("storeInt", store_int_mutate, std::bind(compile_store_int, _1, _2, true), + define_builtin_func("storeInt", {Builder, Int, Int}, Unit, nullptr, + std::bind(compile_store_int, _1, _2, true), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, {1, 0, 2}, {}); - define_builtin_func("storeUint", store_int_mutate, std::bind(compile_store_int, _1, _2, false), + define_builtin_func("storeUint", {Builder, Int, Int}, Unit, nullptr, + std::bind(compile_store_int, _1, _2, false), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, {1, 0, 2}, {}); - define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at, + define_builtin_func("tupleAt", {Tuple, Int}, typeT, declGenericT, + compile_tuple_at, FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); - define_builtin_func("debugPrint", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + define_builtin_func("debugPrint", {typeT}, Unit, declGenericT, AsmOp::Custom("s0 DUMP DROP", 1, 1), 0); - define_builtin_func("debugPrintString", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + define_builtin_func("debugPrintString", {typeT}, Unit, declGenericT, AsmOp::Custom("STRDUMP DROP", 1, 1), 0); - define_builtin_func("debugDumpStack", TypeExpr::new_map(Unit, Unit), + define_builtin_func("debugDumpStack", {}, Unit, nullptr, AsmOp::Custom("DUMPSTK", 0, 0), 0); } diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 0529696f..3830f7ae 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" namespace tolk { @@ -345,16 +346,16 @@ bool Op::generate_code_step(Stack& stack) { if (f_sym->is_asm_function() || f_sym->is_builtin_function()) { // TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly) std::vector args0, res; - TypeExpr* func_type = f_sym->full_type; - TypeExpr::remove_indirect(func_type); - tolk_assert(func_type->is_map()); - auto wr = func_type->args.at(0)->get_width(); - auto wl = func_type->args.at(1)->get_width(); - tolk_assert(wl >= 0 && wr >= 0); - for (int i = 0; i < wl; i++) { + int w_arg = 0; + for (const LocalVarData& param : f_sym->parameters) { + w_arg += param.declared_type->calc_width_on_stack(); + } + int w_ret = f_sym->inferred_return_type->calc_width_on_stack(); + tolk_assert(w_ret >= 0 && w_arg >= 0); + for (int i = 0; i < w_ret; i++) { res.emplace_back(0); } - for (int i = 0; i < wr; i++) { + for (int i = 0; i < w_arg; i++) { args0.emplace_back(0); } if (f_sym->is_asm_function()) { @@ -456,14 +457,12 @@ bool Op::generate_code_step(Stack& stack) { right1.push_back(arg.idx); } } - } else if (arg_order) { - for (int i = 0; i < (int)right.size(); i++) { - right1.push_back(right.at(arg_order->at(i))); - } } else { + tolk_assert(!arg_order); right1 = right; } std::vector last; + last.reserve(right1.size()); for (var_idx_t x : right1) { last.push_back(var_info[x] && var_info[x]->is_last()); } @@ -489,7 +488,7 @@ bool Op::generate_code_step(Stack& stack) { }; if (cl == _CallInd) { exec_callxargs((int)right.size() - 1, (int)left.size()); - } else if (!f_sym->is_regular_function()) { + } else if (!f_sym->is_code_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { @@ -503,7 +502,7 @@ bool Op::generate_code_step(Stack& stack) { } else { if (f_sym->is_inline() || f_sym->is_inline_ref()) { stack.o << AsmOp::Custom(f_sym->name + " INLINECALLDICT", (int)right.size(), (int)left.size()); - } else if (f_sym->is_regular_function() && std::get(f_sym->body)->code->require_callxargs) { + } else if (f_sym->is_code_function() && std::get(f_sym->body)->code->require_callxargs) { stack.o << AsmOp::Custom(f_sym->name + (" PREPAREDICT"), 0, 2); exec_callxargs((int)right.size() + 1, (int)left.size()); } else { diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp index 14d064d9..66fad844 100644 --- a/tolk/compiler-state.cpp +++ b/tolk/compiler-state.cpp @@ -66,4 +66,8 @@ void CompilerSettings::parse_experimental_options_cmd_arg(const std::string& cmd } } +const std::vector& get_all_not_builtin_functions() { + return G.all_functions; +} + } // namespace tolk diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index 56158876..d33eec81 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -95,7 +95,7 @@ struct CompilerState { GlobalSymbolTable symtable; PersistentHeapAllocator persistent_mem; - std::vector all_code_functions; + std::vector all_functions; // all user-defined (not built-in) functions, with generic instantiations std::vector all_get_methods; std::vector all_global_vars; std::vector all_constants; diff --git a/tolk/constant-evaluator.cpp b/tolk/constant-evaluator.cpp index 73c80b9a..9ad27381 100644 --- a/tolk/constant-evaluator.cpp +++ b/tolk/constant-evaluator.cpp @@ -248,15 +248,19 @@ struct ConstantEvaluator { return ConstantValue::from_int(std::move(intval)); } - static ConstantValue handle_identifier(V v) { + static ConstantValue handle_reference(V v) { // todo better handle "appears, directly or indirectly, in its own initializer" - const Symbol* sym = lookup_global_symbol(v->name); + std::string_view name = v->get_name(); + const Symbol* sym = lookup_global_symbol(name); if (!sym) { - v->error("undefined symbol `" + static_cast(v->name) + "`"); + v->error("undefined symbol `" + static_cast(name) + "`"); } const GlobalConstData* const_ref = sym->try_as(); if (!const_ref) { - v->error("symbol `" + static_cast(v->name) + "` is not a constant"); + v->error("symbol `" + static_cast(name) + "` is not a constant"); + } + if (v->has_instantiationTs()) { // SOME_CONST + v->error("constant is not a generic"); } return {const_ref->value}; } @@ -274,8 +278,8 @@ struct ConstantEvaluator { if (auto v_binop = v->try_as()) { return handle_binary_operator(v_binop, visit(v_binop->get_lhs()), visit(v_binop->get_rhs())); } - if (auto v_ident = v->try_as()) { - return handle_identifier(v_ident); + if (auto v_ref = v->try_as()) { + return handle_reference(v_ref); } if (auto v_par = v->try_as()) { return visit(v_par->get_expr()); diff --git a/tolk/fwd-declarations.h b/tolk/fwd-declarations.h index d2197e66..e3599f36 100644 --- a/tolk/fwd-declarations.h +++ b/tolk/fwd-declarations.h @@ -32,7 +32,8 @@ struct FunctionData; struct GlobalVarData; struct GlobalConstData; -struct TypeExpr; +class TypeData; +using TypePtr = const TypeData*; struct SrcFile; diff --git a/tolk/generics-helpers.cpp b/tolk/generics-helpers.cpp new file mode 100644 index 00000000..3d353cc4 --- /dev/null +++ b/tolk/generics-helpers.cpp @@ -0,0 +1,272 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "generics-helpers.h" +#include "tolk.h" +#include "ast.h" +#include "ast-replicator.h" +#include "type-system.h" +#include "compiler-state.h" +#include "pipeline.h" + +namespace tolk { + +// given orig = "(int, T)" and substitutions = [slice], return "(int, slice)" +static TypePtr replace_genericT_with_deduced(TypePtr orig, const GenericsDeclaration* genericTs, const std::vector& substitutionTs) { + if (!orig || !orig->has_genericT_inside()) { + return orig; + } + tolk_assert(genericTs->size() == substitutionTs.size()); + + return orig->replace_children_custom([genericTs, substitutionTs](TypePtr child) { + if (const TypeDataGenericT* asT = child->try_as()) { + int idx = genericTs->find_nameT(asT->nameT); + if (idx == -1) { + throw Fatal("can not replace generic " + asT->nameT); + } + return substitutionTs[idx]; + } + return child; + }); +} + +// purpose: having `f(value: T)` and call `f(5)`, deduce T = int +// generally, there may be many generic Ts for declaration, and many arguments +// for every argument, `consider_next_condition()` is called +// example: `f(a: int, b: T1, c: (T1, T2))` and call `f(6, 7, (8, cs))` +// - `a` does not affect, it doesn't depend on generic Ts +// - next condition: param_type = `T1`, arg_type = `int`, deduce T1 = int +// - next condition: param_type = `(T1, T2)`, arg_type = `(int, slice)`, deduce T1 = int, T2 = slice +// for call `f(6, cs, (8, cs))` T1 will be both `slice` and `int`, fired an error +class GenericSubstitutionsDeduceForFunctionCall final { + const FunctionData* fun_ref; + std::vector substitutions; + + void provideDeducedT(const std::string& nameT, TypePtr deduced) { + if (deduced == TypeDataNullLiteral::create() || deduced->has_unknown_inside()) { + return; // just 'null' doesn't give sensible info + } + + int idx = fun_ref->genericTs->find_nameT(nameT); + if (substitutions[idx] == nullptr) { + substitutions[idx] = deduced; + } else if (substitutions[idx] != deduced) { + throw std::runtime_error(nameT + " is both " + substitutions[idx]->as_human_readable() + " and " + deduced->as_human_readable()); + } + } + +public: + explicit GenericSubstitutionsDeduceForFunctionCall(const FunctionData* fun_ref) + : fun_ref(fun_ref) { + substitutions.resize(fun_ref->genericTs->size()); // filled with nullptr (nothing deduced) + } + + void consider_next_condition(TypePtr param_type, TypePtr arg_type) { + if (const auto* asT = param_type->try_as()) { + // `(arg: T)` called as `f([1, 2])` => T is [int, int] + provideDeducedT(asT->nameT, arg_type); + } else if (const auto* p_tensor = param_type->try_as()) { + // `arg: (int, T)` called as `f((5, cs))` => T is slice + if (const auto* a_tensor = arg_type->try_as(); a_tensor && a_tensor->size() == p_tensor->size()) { + for (int i = 0; i < a_tensor->size(); ++i) { + consider_next_condition(p_tensor->items[i], a_tensor->items[i]); + } + } + } else if (const auto* p_tuple = param_type->try_as()) { + // `arg: [int, T]` called as `f([5, cs])` => T is slice + if (const auto* a_tuple = arg_type->try_as(); a_tuple && a_tuple->size() == p_tuple->size()) { + for (int i = 0; i < a_tuple->size(); ++i) { + consider_next_condition(p_tuple->items[i], a_tuple->items[i]); + } + } + } else if (const auto* p_callable = param_type->try_as()) { + // `arg: fun(TArg) -> TResult` called as `f(calcTupleLen)` => TArg is tuple, TResult is int + if (const auto* a_callable = arg_type->try_as(); a_callable && a_callable->params_size() == p_callable->params_size()) { + for (int i = 0; i < a_callable->params_size(); ++i) { + consider_next_condition(p_callable->params_types[i], a_callable->params_types[i]); + } + consider_next_condition(p_callable->return_type, a_callable->return_type); + } + } + } + + int get_first_not_deduced_idx() const { + for (int i = 0; i < static_cast(substitutions.size()); ++i) { + if (substitutions[i] == nullptr) { + return i; + } + } + return -1; + } + + std::vector flush() { + return {std::move(substitutions)}; + } +}; + +// clone the body of `f` replacing T everywhere with a substitution +// before: `fun f(v: T) { var cp: [T] = [v]; }` +// after: `fun f(v: int) { var cp: [int] = [v]; }` +// an instantiated function becomes a deep copy, all AST nodes are copied, no previous pointers left +class GenericFunctionReplicator final : public ASTReplicatorFunction { + const GenericsDeclaration* genericTs; + const std::vector& substitutionTs; + +protected: + using ASTReplicatorFunction::clone; + + TypePtr clone(TypePtr t) override { + return replace_genericT_with_deduced(t, genericTs, substitutionTs); + } + +public: + GenericFunctionReplicator(const GenericsDeclaration* genericTs, const std::vector& substitutionTs) + : genericTs(genericTs) + , substitutionTs(substitutionTs) { + } + + V clone_function_body(V v_function) override { + return createV( + v_function->loc, + clone(v_function->get_identifier()), + clone(v_function->get_param_list()), + clone(v_function->get_body()), + clone(v_function->declared_return_type), + nullptr, // a newly-created function is not generic + v_function->method_id, + v_function->flags + ); + } +}; + +std::string GenericsDeclaration::as_human_readable() const { + std::string result = "<"; + for (const GenericsItem& item : itemsT) { + if (result.size() > 1) { + result += ","; + } + result += item.nameT; + } + result += ">"; + return result; +} + +int GenericsDeclaration::find_nameT(std::string_view nameT) const { + for (int i = 0; i < static_cast(itemsT.size()); ++i) { + if (itemsT[i].nameT == nameT) { + return i; + } + } + return -1; +} + +// after creating a deep copy of `f` like `f`, its new and fresh body needs the previous pipeline to run +// for example, all local vars need to be registered as symbols, etc. +static void run_pipeline_for_instantiated_function(const FunctionData* inst_fun_ref) { + // these pipes are exactly the same as in tolk.cpp — all preceding (and including) type inferring + pipeline_resolve_identifiers_and_assign_symbols(inst_fun_ref); + pipeline_calculate_rvalue_lvalue(inst_fun_ref); + pipeline_detect_unreachable_statements(inst_fun_ref); + pipeline_infer_types_and_calls_and_fields(inst_fun_ref); +} + +std::string generate_instantiated_name(const std::string& orig_name, const std::vector& substitutions) { + // an instantiated function name will be "{orig_name}<{T1,T2,...}>" + std::string name = orig_name; + name += "<"; + for (TypePtr subs : substitutions) { + if (name.size() > orig_name.size() + 1) { + name += ","; + } + name += subs->as_human_readable(); + } + name.erase(std::remove(name.begin(), name.end(), ' '), name.end()); + name += ">"; + return name; +} + +td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint) { + try { + GenericSubstitutionsDeduceForFunctionCall deducing(called_fun); + for (const LocalVarData& param : called_fun->parameters) { + if (param.declared_type->has_genericT_inside() && param.idx < static_cast(arg_types.size())) { + deducing.consider_next_condition(param.declared_type, arg_types[param.idx]); + } + } + int idx = deducing.get_first_not_deduced_idx(); + if (idx != -1 && return_hint && called_fun->declared_return_type->has_genericT_inside()) { + deducing.consider_next_condition(called_fun->declared_return_type, return_hint); + idx = deducing.get_first_not_deduced_idx(); + } + if (idx != -1) { + return td::Status::Error(td::Slice{"can not deduce " + called_fun->genericTs->get_nameT(idx)}); + } + return deducing.flush(); + } catch (const std::runtime_error& ex) { + return td::Status::Error(td::Slice{ex.what()}); + } +} + +const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs) { + tolk_assert(fun_ref->genericTs); + + // if `f` was earlier instantiated, return it + if (const auto* existing = lookup_global_symbol(inst_name)) { + const FunctionData* inst_ref = existing->try_as(); + tolk_assert(inst_ref); + return inst_ref; + } + + std::vector parameters; + parameters.reserve(fun_ref->get_num_params()); + for (const LocalVarData& orig_p : fun_ref->parameters) { + parameters.emplace_back(orig_p.name, orig_p.loc, replace_genericT_with_deduced(orig_p.declared_type, fun_ref->genericTs, substitutionTs), orig_p.flags, orig_p.idx); + } + TypePtr declared_return_type = replace_genericT_with_deduced(fun_ref->declared_return_type, fun_ref->genericTs, substitutionTs); + const GenericsInstantiation* instantiationTs = new GenericsInstantiation(loc, std::move(substitutionTs)); + + if (fun_ref->is_asm_function()) { + FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, new FunctionBodyAsm, fun_ref->ast_root); + inst_ref->arg_order = fun_ref->arg_order; + inst_ref->ret_order = fun_ref->ret_order; + G.symtable.add_function(inst_ref); + G.all_functions.push_back(inst_ref); + run_pipeline_for_instantiated_function(inst_ref); + return inst_ref; + } + + if (fun_ref->is_builtin_function()) { + FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, fun_ref->body, fun_ref->ast_root); + inst_ref->arg_order = fun_ref->arg_order; + inst_ref->ret_order = fun_ref->ret_order; + G.symtable.add_function(inst_ref); + return inst_ref; + } + + GenericFunctionReplicator replicator(fun_ref->genericTs, instantiationTs->substitutions); + V inst_root = replicator.clone_function_body(fun_ref->ast_root->as()); + + FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, new FunctionBodyCode, inst_root); + inst_ref->arg_order = fun_ref->arg_order; + inst_ref->ret_order = fun_ref->ret_order; + inst_root->mutate()->assign_fun_ref(inst_ref); + G.symtable.add_function(inst_ref); + G.all_functions.push_back(inst_ref); + run_pipeline_for_instantiated_function(inst_ref); + return inst_ref; +} + +} // namespace tolk diff --git a/tolk/generics-helpers.h b/tolk/generics-helpers.h new file mode 100644 index 00000000..2a304f55 --- /dev/null +++ b/tolk/generics-helpers.h @@ -0,0 +1,64 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" +#include "fwd-declarations.h" +#include "td/utils/Status.h" +#include + +namespace tolk { + +// when a function is declared `f`, this "" is represented as this class +// (not at AST, but at symbol storage level) +struct GenericsDeclaration { + struct GenericsItem { + std::string_view nameT; + + explicit GenericsItem(std::string_view nameT) + : nameT(nameT) {} + }; + + explicit GenericsDeclaration(std::vector&& itemsT) + : itemsT(std::move(itemsT)) {} + + const std::vector itemsT; + + std::string as_human_readable() const; + + size_t size() const { return itemsT.size(); } + bool has_nameT(std::string_view nameT) const { return find_nameT(nameT) != -1; } + int find_nameT(std::string_view nameT) const; + std::string get_nameT(int idx) const { return static_cast(itemsT[idx].nameT); } +}; + +// when a function call is `f()`, this "" is represented as this class +struct GenericsInstantiation { + const std::vector substitutions; // for genericTs + const SrcLocation loc; // first instantiation location + + explicit GenericsInstantiation(SrcLocation loc, std::vector&& substitutions) + : substitutions(std::move(substitutions)) + , loc(loc) { + } +}; + +std::string generate_instantiated_name(const std::string& orig_name, const std::vector& substitutions); +td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint); +const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs); + +} // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 4278f040..7e8c8fb2 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -328,6 +328,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { case 2: if (str == "do") return tok_do; if (str == "if") return tok_if; + if (str == "as") return tok_as; break; case 3: if (str == "int") return tok_int; @@ -345,7 +346,6 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (str == "null") return tok_null; if (str == "void") return tok_void; if (str == "bool") return tok_bool; - if (str == "auto") return tok_auto; if (str == "self") return tok_self; if (str == "tolk") return tok_tolk; if (str == "type") return tok_type; @@ -578,6 +578,16 @@ void Lexer::next_special(TokenType parse_next_as, const char* str_expected) { cur_token = tokens_circularbuf[++cur_token_idx & 7]; } +Lexer::SavedPositionForLookahead Lexer::save_parsing_position() const { + return {p_next, cur_token_idx, cur_token}; +} + +void Lexer::restore_position(SavedPositionForLookahead saved) { + p_next = saved.p_next; + cur_token_idx = last_token_idx = saved.cur_token_idx; + cur_token = saved.cur_token; +} + void Lexer::error(const std::string& err_msg) const { throw ParseError(cur_location(), err_msg); } @@ -595,7 +605,7 @@ void lexer_init() { // Hence, it's difficult to measure Lexer performance separately. // This function can be called just to tick Lexer performance, it just scans all input files. // There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful. -void lexer_measure_performance(const AllSrcFiles& files_to_just_parse) { +void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse) { for (const SrcFile* file : files_to_just_parse) { Lexer lex(file); while (!lex.is_eof()) { diff --git a/tolk/lexer.h b/tolk/lexer.h index bf116188..81d579db 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -125,9 +125,9 @@ enum TokenType { tok_builder, tok_continuation, tok_tuple, - tok_auto, tok_void, tok_arrow, + tok_as, tok_tolk, tok_semver, @@ -166,6 +166,12 @@ class Lexer { public: + struct SavedPositionForLookahead { + const char* p_next = nullptr; + int cur_token_idx = 0; + Token cur_token; + }; + explicit Lexer(const SrcFile* file); Lexer(const Lexer&) = delete; Lexer &operator=(const Lexer&) = delete; @@ -209,6 +215,9 @@ public: void next(); void next_special(TokenType parse_next_as, const char* str_expected); + SavedPositionForLookahead save_parsing_position() const; + void restore_position(SavedPositionForLookahead saved); + void check(TokenType next_tok, const char* str_expected) const { if (cur_token.type != next_tok) { unexpected(str_expected); // unlikely path, not inlined @@ -230,6 +239,6 @@ public: void lexer_init(); // todo #ifdef TOLK_PROFILING -void lexer_measure_performance(const AllSrcFiles& files_to_just_parse); +void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse); } // namespace tolk diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 3c02c7d1..77dbee41 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -17,7 +17,8 @@ #include "tolk.h" #include "src-file.h" #include "ast.h" -#include "compiler-state.h" +#include "ast-visitor.h" +#include "type-system.h" #include "common/refint.h" #include "constant-evaluator.h" @@ -48,7 +49,7 @@ struct LValGlobs { }; std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs = nullptr); -void process_statement(AnyV v, CodeBlob& code); +void process_any_statement(AnyV v, CodeBlob& code); static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, @@ -128,25 +129,24 @@ static std::vector pre_compile_tensor(CodeBlob& code, const std::vect static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) { // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" - if (lhs->type == ast_tensor_square && rhs->type == ast_tensor_square) { - std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); + if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) { + std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); LValGlobs globs; - std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); code.on_var_modification(left, loc); code.emplace_back(loc, Op::_Let, std::move(left), right); globs.gen_ops_set_globs(code, loc); return right; } // [lhs] = rhs; it's un-tuple to N left vars - if (lhs->type == ast_tensor_square) { + if (lhs->type == ast_typed_tuple) { std::vector right = pre_compile_expr(rhs, code); - TypeExpr* rhs_type = rhs->inferred_type; - TypeExpr::remove_indirect(rhs_type); - TypeExpr* unpacked_type = rhs_type->args.at(0); // rhs->inferred_type is tuple> - std::vector rvect = {code.create_tmp_var(unpacked_type, rhs->loc)}; + const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as(); + std::vector types_list = inferred_tuple->items; + std::vector rvect = {code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc)}; code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); LValGlobs globs; - std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); code.on_var_modification(left, loc); code.emplace_back(loc, Op::_Let, std::move(left), rvect); globs.gen_ops_set_globs(code, loc); @@ -162,7 +162,7 @@ static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE return right; } -static std::vector gen_op_call(CodeBlob& code, TypeExpr* ret_type, SrcLocation here, +static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation here, std::vector&& args_vars, const FunctionData* fun_ref) { std::vector rvect = {code.create_tmp_var(ret_type, here)}; Op& op = code.emplace_back(here, Op::_Call, rvect, std::move(args_vars), fun_ref); @@ -173,38 +173,75 @@ static std::vector gen_op_call(CodeBlob& code, TypeExpr* ret_type, Sr } -static std::vector process_binary_operator(V v, CodeBlob& code) { - TokenType t = v->tok; - std::string operator_name = static_cast(v->operator_name); - - if (v->is_set_assign()) { - std::string_view calc_operator = std::string_view{operator_name}.substr(0, operator_name.size() - 1); - auto v_apply = createV(v->loc, calc_operator, static_cast(t - 1), v->get_lhs(), v->get_rhs()); - v_apply->assign_inferred_type(v->inferred_type); - return pre_compile_let(code, v->get_lhs(), v_apply, v->loc); +static std::vector process_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValGlobs* lval_globs) { + if (const auto* glob_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(glob_ref->declared_type, loc)}; + if (lval_globs) { + lval_globs->add_modified_glob(glob_ref, rvect[0]); + return rvect; + } else { + code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, glob_ref); + return rvect; + } } - if (v->is_assign()) { + if (const auto* const_ref = sym->try_as()) { + if (const_ref->is_int_const()) { + std::vector rvect = {code.create_tmp_var(TypeDataInt::create(), loc)}; + code.emplace_back(loc, Op::_IntConst, rvect, const_ref->as_int_const()); + return rvect; + } else { + std::vector rvect = {code.create_tmp_var(TypeDataSlice::create(), loc)}; + code.emplace_back(loc, Op::_SliceConst, rvect, const_ref->as_slice_const()); + return rvect; + } + } + if (const auto* fun_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(fun_ref->inferred_full_type, loc)}; + code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); + return rvect; + } + if (const auto* var_ref = sym->try_as()) { + return {var_ref->idx}; + } + throw Fatal("process_symbol"); +} + +static std::vector process_assign(V v, CodeBlob& code) { + if (auto lhs_decl = v->get_lhs()->try_as()) { + return pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); + } else { return pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); } - if (t == tok_minus || t == tok_plus || - t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor || - t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship || - t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR || - t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { - const FunctionData* fun_ref = lookup_global_symbol("_" + operator_name + "_")->as(); +} + +static std::vector process_set_assign(V v, CodeBlob& code) { + // for "a += b", emulate "a = a + b" + // seems not beautiful, but it works; probably, this transformation should be done at AST level in advance + std::string_view calc_operator = v->operator_name; // "+" for operator += + auto v_apply = createV(v->loc, calc_operator, static_cast(v->tok - 1), v->get_lhs(), v->get_rhs()); + v_apply->assign_inferred_type(v->inferred_type); + v_apply->assign_fun_ref(v->fun_ref); + return pre_compile_let(code, v->get_lhs(), v_apply, v->loc); +} + +static std::vector process_binary_operator(V v, CodeBlob& code) { + TokenType t = v->tok; + + if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref); } if (t == tok_logical_and || t == tok_logical_or) { // do the following transformations: // a && b -> a ? (b != 0) : 0 // a || b -> a ? 1 : (b != 0) AnyExprV v_0 = createV(v->loc, td::make_refint(0), "0"); - v_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + v_0->mutate()->assign_inferred_type(TypeDataInt::create()); AnyExprV v_1 = createV(v->loc, td::make_refint(-1), "-1"); - v_1->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - AnyExprV v_b_ne_0 = createV(v->loc, "!=", tok_neq, v->get_rhs(), v_0); - v_b_ne_0->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + v_1->mutate()->assign_inferred_type(TypeDataInt::create()); + auto v_b_ne_0 = createV(v->loc, "!=", tok_neq, v->get_rhs(), v_0); + v_b_ne_0->mutate()->assign_inferred_type(TypeDataInt::create()); + v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); std::vector cond = pre_compile_expr(v->get_lhs(), code); tolk_assert(cond.size() == 1); std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; @@ -222,9 +259,8 @@ static std::vector process_binary_operator(V v, } static std::vector process_unary_operator(V v, CodeBlob& code) { - const FunctionData* fun_ref = lookup_global_symbol(static_cast(v->operator_name) + "_")->as(); std::vector args_vars = pre_compile_tensor(code, {v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), fun_ref); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref); } static std::vector process_ternary_operator(V v, CodeBlob& code) { @@ -241,8 +277,17 @@ static std::vector process_ternary_operator(V v return rvect; } +static std::vector process_dot_access(V v, CodeBlob& code, LValGlobs* lval_globs) { + // it's NOT a method call `t.tupleSize()` (since such cases are handled by process_function_call) + // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) + // currently, nothing except a global function can be a target of dot access + const FunctionData* fun_ref = v->target; + tolk_assert(fun_ref); + return process_symbol(v->loc, fun_ref, code, lval_globs); +} + static std::vector process_function_call(V v, CodeBlob& code) { - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` const FunctionData* fun_ref = v->fun_maybe; if (!fun_ref) { std::vector args; @@ -251,7 +296,7 @@ static std::vector process_function_call(V v, Code args.push_back(v->get_arg(i)->get_expr()); } std::vector args_vars = pre_compile_tensor(code, args); - std::vector tfunc = pre_compile_expr(v->get_called_f(), code); + std::vector tfunc = pre_compile_expr(v->get_callee(), code); tolk_assert(tfunc.size() == 1); args_vars.push_back(tfunc[0]); std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; @@ -260,95 +305,54 @@ static std::vector process_function_call(V v, Code return rvect; } + int delta_self = v->is_dot_call(); + AnyExprV obj_leftmost = nullptr; std::vector args; - args.reserve(v->get_num_args()); - for (int i = 0; i < v->get_num_args(); ++i) { - args.push_back(v->get_arg(i)->get_expr()); - } - std::vector args_vars = pre_compile_tensor(code, args); - - TypeExpr* op_call_type = v->inferred_type; - if (fun_ref->has_mutate_params()) { - std::vector types_list; - for (int i = 0; i < v->get_num_args(); ++i) { - if (fun_ref->parameters[i].is_mutate_parameter()) { - types_list.push_back(args[i]->inferred_type); - } + args.reserve(delta_self + v->get_num_args()); + if (delta_self) { + args.push_back(v->get_dot_obj()); + obj_leftmost = v->get_dot_obj(); + while (obj_leftmost->type == ast_function_call && obj_leftmost->as()->is_dot_call() && obj_leftmost->as()->fun_maybe && obj_leftmost->as()->fun_maybe->does_return_self()) { + obj_leftmost = obj_leftmost->as()->get_dot_obj(); } - types_list.push_back(v->inferred_type); - op_call_type = TypeExpr::new_tensor(std::move(types_list)); } - - std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref); - - if (fun_ref->has_mutate_params()) { - LValGlobs local_globs; - std::vector left; - for (int i = 0; i < v->get_num_args(); ++i) { - if (fun_ref->parameters[i].is_mutate_parameter()) { - AnyExprV arg_i = v->get_arg(i)->get_expr(); - tolk_assert(arg_i->is_lvalue); - std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); - left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); - } - } - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; - left.push_back(rvect[0]); - code.on_var_modification(left, v->loc); - code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); - local_globs.gen_ops_set_globs(code, v->loc); - return rvect; - } - - return rvect_apply; -} - -static std::vector process_dot_method_call(V v, CodeBlob& code) { - std::vector args; - args.reserve(1 + v->get_num_args()); - args.push_back(v->get_obj()); for (int i = 0; i < v->get_num_args(); ++i) { args.push_back(v->get_arg(i)->get_expr()); } std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, nullptr); - TypeExpr* op_call_type = v->inferred_type; - TypeExpr* real_ret_type = v->inferred_type; - if (v->fun_ref->does_return_self()) { - real_ret_type = TypeExpr::new_unit(); - if (!v->fun_ref->parameters[0].is_mutate_parameter()) { - op_call_type = TypeExpr::new_unit(); + TypePtr op_call_type = v->inferred_type; + TypePtr real_ret_type = v->inferred_type; + if (delta_self && fun_ref->does_return_self()) { + real_ret_type = TypeDataVoid::create(); + if (!fun_ref->parameters[0].is_mutate_parameter()) { + op_call_type = TypeDataVoid::create(); } } - if (v->fun_ref->has_mutate_params()) { - std::vector types_list; - for (int i = 0; i < 1 + v->get_num_args(); ++i) { - if (v->fun_ref->parameters[i].is_mutate_parameter()) { + if (fun_ref->has_mutate_params()) { + std::vector types_list; + for (int i = 0; i < delta_self + v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { types_list.push_back(args[i]->inferred_type); } } types_list.push_back(real_ret_type); - op_call_type = TypeExpr::new_tensor(std::move(types_list)); + op_call_type = TypeDataTensor::create(std::move(types_list)); } std::vector args_vars; for (const std::vector& list : vars_per_arg) { args_vars.insert(args_vars.end(), list.cbegin(), list.cend()); } - std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), v->fun_ref); + std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref); - AnyExprV obj_leftmost = args[0]; - while (obj_leftmost->type == ast_dot_method_call && obj_leftmost->as()->fun_ref->does_return_self()) { - obj_leftmost = obj_leftmost->as()->get_obj(); - } - - if (v->fun_ref->has_mutate_params()) { + if (fun_ref->has_mutate_params()) { LValGlobs local_globs; std::vector left; - for (int i = 0; i < 1 + v->get_num_args(); ++i) { - if (v->fun_ref->parameters[i].is_mutate_parameter()) { - AnyExprV arg_i = i == 0 ? obj_leftmost : args[i]; - tolk_assert (arg_i->is_lvalue || i == 0); + for (int i = 0; i < delta_self + v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { + AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[i]; + tolk_assert(arg_i->is_lvalue || i == 0); if (arg_i->is_lvalue) { std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); @@ -365,7 +369,7 @@ static std::vector process_dot_method_call(V v, rvect_apply = rvect; } - if (v->fun_ref->does_return_self()) { + if (obj_leftmost && fun_ref->does_return_self()) { if (obj_leftmost->is_lvalue) { // to handle if obj is global var, potentially re-assigned inside a chain rvect_apply = pre_compile_expr(obj_leftmost, code); } else { // temporary object, not lvalue, pre_compile_expr @@ -380,7 +384,7 @@ static std::vector process_tensor(V v, CodeBlob& code, LV return pre_compile_tensor(code, v->get_items(), lval_globs); } -static std::vector process_tensor_square(V v, CodeBlob& code, LValGlobs* lval_globs) { +static std::vector process_typed_tuple(V v, CodeBlob& code, LValGlobs* lval_globs) { if (lval_globs) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work v->error("[...] can not be used as lvalue here"); } @@ -417,82 +421,53 @@ static std::vector process_null_keyword(V v, CodeBl return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); } -static std::vector process_self_keyword(V v, CodeBlob& code) { - tolk_assert(code.fun_ref->does_accept_self() && v->param_ref); - tolk_assert(v->param_ref->idx == 0); - return {0}; -} - -static std::vector process_identifier(V v, CodeBlob& code, LValGlobs* lval_globs) { - const Symbol* sym = v->sym; - if (const auto* glob_ref = sym->try_as()) { - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; - if (lval_globs) { - lval_globs->add_modified_glob(glob_ref, rvect[0]); - return rvect; - } else { - code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector{}, glob_ref); - return rvect; - } - } - if (const auto* const_ref = sym->try_as()) { - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; - if (const_ref->is_int_const()) { - code.emplace_back(v->loc, Op::_IntConst, rvect, const_ref->as_int_const()); - } else { - code.emplace_back(v->loc, Op::_SliceConst, rvect, const_ref->as_slice_const()); - } - return rvect; - } - if (const auto* fun_ref = sym->try_as()) { - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; - code.emplace_back(v->loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); - return rvect; - } - if (const auto* var_ref = sym->try_as()) { -#ifdef TOLK_DEBUG - tolk_assert(var_ref->idx != -1); -#endif - return {var_ref->idx}; - } - throw UnexpectedASTNodeType(v, "process_identifier"); -} - -static std::vector process_local_var(V v, CodeBlob& code, LValGlobs* lval_globs) { +static std::vector process_local_var(V v, CodeBlob& code) { if (v->marked_as_redef) { - return process_identifier(v->get_identifier()->as(), code, lval_globs); + return process_symbol(v->loc, v->var_ref, code, nullptr); } - if (v->get_identifier()->try_as()) { - const LocalVarData* var_ref = v->var_maybe->as(); - tolk_assert(var_ref->idx == -1); - var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, var_ref, v->loc)); - return {var_ref->idx}; - } - return {code.create_tmp_var(v->inferred_type, v->loc)}; // underscore + + tolk_assert(v->var_ref->idx == -1); + v->var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, v->var_ref, v->loc)); + return {v->var_ref->idx}; +} + +static std::vector process_local_vars_declaration(V, CodeBlob&) { + // it can not appear as a standalone expression + // `var ... = rhs` is handled by ast_assign + tolk_assert(false); } static std::vector process_underscore(V v, CodeBlob& code) { + // when _ is used as left side of assignment, like `(cs, _) = cs.loadAndReturn()` return {code.create_tmp_var(v->inferred_type, v->loc)}; } std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs) { switch (v->type) { + case ast_reference: + return process_symbol(v->loc, v->as()->sym, code, lval_globs); + case ast_assign: + return process_assign(v->as(), code); + case ast_set_assign: + return process_set_assign(v->as(), code); case ast_binary_operator: return process_binary_operator(v->as(), code); case ast_unary_operator: return process_unary_operator(v->as(), code); case ast_ternary_operator: return process_ternary_operator(v->as(), code); + case ast_cast_as_operator: + return pre_compile_expr(v->as()->get_expr(), code, lval_globs); + case ast_dot_access: + return process_dot_access(v->as(), code, lval_globs); case ast_function_call: return process_function_call(v->as(), code); - case ast_dot_method_call: - return process_dot_method_call(v->as(), code); case ast_parenthesized_expression: return pre_compile_expr(v->as()->get_expr(), code, lval_globs); case ast_tensor: return process_tensor(v->as(), code, lval_globs); - case ast_tensor_square: - return process_tensor_square(v->as(), code, lval_globs); + case ast_typed_tuple: + return process_typed_tuple(v->as(), code, lval_globs); case ast_int_const: return process_int_const(v->as(), code); case ast_string_const: @@ -501,12 +476,10 @@ std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* l return process_bool_const(v->as(), code); case ast_null_keyword: return process_null_keyword(v->as(), code); - case ast_self_keyword: - return process_self_keyword(v->as(), code); - case ast_identifier: - return process_identifier(v->as(), code, lval_globs); - case ast_local_var: - return process_local_var(v->as(), code, lval_globs); + case ast_local_var_lhs: + return process_local_var(v->as(), code); + case ast_local_vars_declaration: + return process_local_vars_declaration(v->as(), code); case ast_underscore: return process_underscore(v->as(), code); default: @@ -515,39 +488,34 @@ std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* l } -static void process_local_vars_declaration(V v, CodeBlob& code) { - pre_compile_let(code, v->get_lhs(), v->get_assigned_val(), v->loc); -} - static void process_sequence(V v, CodeBlob& code) { for (AnyV item : v->get_items()) { - process_statement(item, code); + process_any_statement(item, code); } } - static void process_assert_statement(V v, CodeBlob& code) { std::vector args(3); if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { args[0] = v->get_thrown_code(); args[1] = v->get_cond()->as()->get_rhs(); args[2] = createV(v->loc, true); - args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + args[2]->mutate()->assign_inferred_type(TypeDataInt::create()); } else { args[0] = v->get_thrown_code(); args[1] = v->get_cond(); args[2] = createV(v->loc, false); - args[2]->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + args[2]->mutate()->assign_inferred_type(TypeDataInt::create()); } const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as(); std::vector args_vars = pre_compile_tensor(code, args); - gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); } static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { - if (auto v_ident = v_catch_var->try_as()) { - const LocalVarData* var_ref = v_ident->sym->as(); + if (auto v_ref = v_catch_var->try_as(); v_ref && v_ref->sym) { // not underscore + const LocalVarData* var_ref = v_ref->sym->as(); tolk_assert(var_ref->idx == -1); var_ref->mutate()->assign_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc)); } @@ -557,7 +525,7 @@ static void process_try_catch_statement(V v, CodeBlob& code.require_callxargs = true; Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); code.push_set_cur(try_catch_op.block0); - process_statement(v->get_try_body(), code); + process_any_statement(v->get_try_body(), code); code.close_pop_cur(v->get_try_body()->loc_end); code.push_set_cur(try_catch_op.block1); @@ -567,7 +535,7 @@ static void process_try_catch_statement(V v, CodeBlob& process_catch_variable(catch_vars[0], code); process_catch_variable(catch_vars[1], code); try_catch_op.left = pre_compile_tensor(code, {catch_vars[1], catch_vars[0]}); - process_statement(v->get_catch_body(), code); + process_any_statement(v->get_catch_body(), code); code.close_pop_cur(v->get_catch_body()->loc_end); } @@ -575,7 +543,7 @@ static void process_repeat_statement(V v, CodeBlob& code) std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); - process_statement(v->get_body(), code); + process_any_statement(v->get_body(), code); code.close_pop_cur(v->get_body()->loc_end); } @@ -583,10 +551,10 @@ static void process_if_statement(V v, CodeBlob& code) { std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars)); code.push_set_cur(if_op.block0); - process_statement(v->get_if_body(), code); + process_any_statement(v->get_if_body(), code); code.close_pop_cur(v->get_if_body()->loc_end); code.push_set_cur(if_op.block1); - process_statement(v->get_else_body(), code); + process_any_statement(v->get_else_body(), code); code.close_pop_cur(v->get_else_body()->loc_end); if (v->is_ifnot) { std::swap(if_op.block0, if_op.block1); @@ -596,7 +564,7 @@ static void process_if_statement(V v, CodeBlob& code) { static void process_do_while_statement(V v, CodeBlob& code) { Op& until_op = code.emplace_back(v->loc, Op::_Until); code.push_set_cur(until_op.block0); - process_statement(v->get_body(), code); + process_any_statement(v->get_body(), code); // in TVM, there is only "do until", but in Tolk, we want "do while" // here we negate condition to pass it forward to legacy to Op::_Until @@ -621,7 +589,12 @@ static void process_do_while_statement(V v, CodeBlob& co } else { until_cond = createV(cond->loc, "!", tok_logical_not, cond); } - until_cond->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + until_cond->mutate()->assign_inferred_type(TypeDataInt::create()); + if (auto v_bin = until_cond->try_as(); v_bin && !v_bin->fun_ref) { + v_bin->mutate()->assign_fun_ref(lookup_global_symbol("_" + static_cast(v_bin->operator_name) + "_")->as()); + } else if (auto v_un = until_cond->try_as(); v_un && !v_un->fun_ref) { + v_un->mutate()->assign_fun_ref(lookup_global_symbol(static_cast(v_un->operator_name) + "_")->as()); + } until_op.left = pre_compile_expr(until_cond, code); code.close_pop_cur(v->get_body()->loc_end); @@ -633,7 +606,7 @@ static void process_while_statement(V v, CodeBlob& code) { while_op.left = pre_compile_expr(v->get_cond(), code); code.close_pop_cur(v->get_body()->loc); code.push_set_cur(while_op.block1); - process_statement(v->get_body(), code); + process_any_statement(v->get_body(), code); code.close_pop_cur(v->get_body()->loc_end); } @@ -641,16 +614,16 @@ static void process_throw_statement(V v, CodeBlob& code) { if (v->has_thrown_arg()) { const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()}); - gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); } else { const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_code()}); - gen_op_call(code, TypeExpr::new_unit(), v->loc, std::move(args_vars), builtin_sym); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); } } static void process_return_statement(V v, CodeBlob& code) { - std::vector return_vars = pre_compile_expr(v->get_return_value(), code); + std::vector return_vars = v->has_return_value() ? pre_compile_expr(v->get_return_value(), code) : std::vector{}; if (code.fun_ref->does_return_self()) { tolk_assert(return_vars.size() == 1); return_vars = {}; @@ -680,10 +653,8 @@ static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code } -void process_statement(AnyV v, CodeBlob& code) { +void process_any_statement(AnyV v, CodeBlob& code) { switch (v->type) { - case ast_local_vars_declaration: - return process_local_vars_declaration(v->as(), code); case ast_sequence: return process_sequence(v->as(), code); case ast_return_statement: @@ -709,30 +680,31 @@ void process_statement(AnyV v, CodeBlob& code) { } } -static void convert_function_body_to_CodeBlob(V v, V v_body) { - CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, v->fun_ref, v->ret_type}; +static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, FunctionBodyCode* code_body) { + auto v_body = fun_ref->ast_root->as()->get_body()->as(); + CodeBlob* blob = new CodeBlob{fun_ref->name, fun_ref->loc, fun_ref}; FormalArgList legacy_arg_list; - for (int i = 0; i < v->get_num_params(); ++i) { - legacy_arg_list.emplace_back(v->get_param(i)->declared_type, &v->fun_ref->parameters[i], v->loc); + for (const LocalVarData& param : fun_ref->parameters) { + legacy_arg_list.emplace_back(param.declared_type, ¶m, param.loc); } blob->import_params(std::move(legacy_arg_list)); for (AnyV item : v_body->get_items()) { - process_statement(item, *blob); + process_any_statement(item, *blob); } - if (v->fun_ref->is_implicit_return()) { + if (fun_ref->is_implicit_return()) { append_implicit_return_statement(v_body->loc_end, *blob); } blob->close_blk(v_body->loc_end); - std::get(v->fun_ref->body)->set_code(blob); + code_body->set_code(blob); } -static void convert_asm_body_to_AsmOp(V v, V v_body) { - int cnt = v->get_num_params(); - int width = v->ret_type->get_width(); +static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyAsm* asm_body) { + int cnt = fun_ref->get_num_params(); + int width = fun_ref->inferred_return_type->calc_width_on_stack(); std::vector asm_ops; - for (AnyV v_child : v_body->get_asm_commands()) { + for (AnyV v_child : fun_ref->ast_root->as()->get_body()->as()->get_asm_commands()) { std::string_view ops = v_child->as()->str_val; // \n\n... std::string op; for (char c : ops) { @@ -756,21 +728,77 @@ static void convert_asm_body_to_AsmOp(V v, V(v->fun_ref->body)->set_code(std::move(asm_ops)); + asm_body->set_code(std::move(asm_ops)); } -void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) { - for (const SrcFile* file : all_src_files) { - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { - if (auto v_func = v->try_as()) { - if (v_func->is_asm_function()) { - convert_asm_body_to_AsmOp(v_func, v_func->get_body()->as()); - } else if (!v_func->marked_as_builtin) { - convert_function_body_to_CodeBlob(v_func, v_func->get_body()->as()); +class UpdateArgRetOrderConsideringStackWidth final { +public: + static bool should_visit_function(const FunctionData* fun_ref) { + return !fun_ref->is_generic_function() && (!fun_ref->ret_order.empty() || !fun_ref->arg_order.empty()); + } + + static void start_visiting_function(const FunctionData* fun_ref, V v_function) { + int total_arg_mutate_width = 0; + bool has_arg_width_not_1 = false; + for (const LocalVarData& param : fun_ref->parameters) { + int arg_width = param.declared_type->calc_width_on_stack(); + has_arg_width_not_1 |= arg_width != 1; + total_arg_mutate_width += param.is_mutate_parameter() * arg_width; + } + + // example: `fun f(a: int, b: (int, (int, int)), c: int)` with `asm (b a c)` + // current arg_order is [1 0 2] + // needs to be converted to [1 2 3 0 4] because b width is 3 + if (has_arg_width_not_1) { + int total_arg_width = 0; + std::vector cum_arg_width; + cum_arg_width.reserve(1 + fun_ref->get_num_params()); + cum_arg_width.push_back(0); + for (const LocalVarData& param : fun_ref->parameters) { + cum_arg_width.push_back(total_arg_width += param.declared_type->calc_width_on_stack()); + } + std::vector arg_order; + for (int i = 0; i < fun_ref->get_num_params(); ++i) { + int j = fun_ref->arg_order[i]; + int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; + while (c1 < c2) { + arg_order.push_back(c1++); } } + fun_ref->mutate()->assign_arg_order(std::move(arg_order)); + } + + // example: `fun f(mutate self: slice): slice` with `asm(-> 1 0)` + // ret_order is a shuffled range 0...N + // validate N: a function should return value and mutated arguments onto a stack + if (!fun_ref->ret_order.empty()) { + size_t expected_width = fun_ref->inferred_return_type->calc_width_on_stack() + total_arg_mutate_width; + if (expected_width != fun_ref->ret_order.size()) { + v_function->get_body()->error("ret_order (after ->) expected to contain " + std::to_string(expected_width) + " numbers"); + } } } +}; + +class ConvertASTToLegacyOpVisitor final { +public: + static bool should_visit_function(const FunctionData* fun_ref) { + return !fun_ref->is_generic_function(); + } + + static void start_visiting_function(const FunctionData* fun_ref, V) { + tolk_assert(fun_ref->is_type_inferring_done()); + if (fun_ref->is_code_function()) { + convert_function_body_to_CodeBlob(fun_ref, std::get(fun_ref->body)); + } else if (fun_ref->is_asm_function()) { + convert_asm_body_to_AsmOp(fun_ref, std::get(fun_ref->body)); + } + } +}; + +void pipeline_convert_ast_to_legacy_Expr_Op() { + visit_ast_of_all_functions(); + visit_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-calc-rvalue-lvalue.cpp b/tolk/pipe-calc-rvalue-lvalue.cpp index 1738226b..041aec89 100644 --- a/tolk/pipe-calc-rvalue-lvalue.cpp +++ b/tolk/pipe-calc-rvalue-lvalue.cpp @@ -74,12 +74,12 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v); } - void visit(V v) override { + void visit(V v) override { mark_vertex_cur_or_rvalue(v); parent::visit(v); } - void visit(V v) override { + void visit(V v) override { mark_vertex_cur_or_rvalue(v); } @@ -99,10 +99,6 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { mark_vertex_cur_or_rvalue(v); } - void visit(V v) override { - mark_vertex_cur_or_rvalue(v); - } - void visit(V v) override { mark_vertex_cur_or_rvalue(v); MarkingState saved = enter_state(v->passed_as_mutate ? MarkingState::LValueAndRValue : MarkingState::RValue); @@ -115,6 +111,13 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v); } + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v->get_obj()); + restore_state(saved); + } + void visit(V v) override { mark_vertex_cur_or_rvalue(v); MarkingState saved = enter_state(MarkingState::RValue); @@ -122,15 +125,6 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { restore_state(saved); } - void visit(V v) override { - mark_vertex_cur_or_rvalue(v); - MarkingState saved = enter_state(MarkingState::RValue); - parent::visit(v->get_obj()); - enter_state(MarkingState::RValue); - parent::visit(v->get_arg_list()); - restore_state(saved); - } - void visit(V v) override { // underscore is a placeholder to ignore left side of assignment: `(a, _) = get2params()` // so, if current state is "lvalue", `_` will be marked as lvalue, and ok @@ -139,6 +133,24 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { mark_vertex_cur_or_rvalue(v); } + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::LValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_rhs()); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::LValueAndRValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_rhs()); + restore_state(saved); + } + void visit(V v) override { mark_vertex_cur_or_rvalue(v); MarkingState saved = enter_state(MarkingState::RValue); @@ -148,10 +160,8 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { mark_vertex_cur_or_rvalue(v); - MarkingState saved = enter_state(v->is_set_assign() ? MarkingState::LValueAndRValue : v->is_assign() ? MarkingState::LValue : MarkingState::RValue); - parent::visit(v->get_lhs()); - enter_state(MarkingState::RValue); - parent::visit(v->get_rhs()); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); restore_state(saved); } @@ -162,15 +172,18 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { restore_state(saved); } - void visit(V v) override { - MarkingState saved = enter_state(MarkingState::LValue); - parent::visit(v->get_lhs()); - enter_state(MarkingState::RValue); - parent::visit(v->get_assigned_val()); - restore_state(saved); + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v->get_expr()); // leave lvalue state unchanged, for `mutate (t.0 as int)` both `t.0 as int` and `t.0` are lvalue } - void visit(V v) override { + void visit(V v) override { + tolk_assert(cur_state == MarkingState::LValue); + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { tolk_assert(cur_state == MarkingState::LValue); mark_vertex_cur_or_rvalue(v); parent::visit(v); @@ -183,10 +196,22 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { restore_state(saved); parent::visit(v->get_catch_body()); } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } }; -void pipeline_calculate_rvalue_lvalue(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_calculate_rvalue_lvalue() { + visit_ast_of_all_functions(); +} + +void pipeline_calculate_rvalue_lvalue(const FunctionData* fun_ref) { + CalculateRvalueLvalueVisitor visitor; + if (visitor.should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + } } } // namespace tolk diff --git a/tolk/pipe-check-pure-impure.cpp b/tolk/pipe-check-pure-impure.cpp index 6cef9f15..2b2e1e67 100644 --- a/tolk/pipe-check-pure-impure.cpp +++ b/tolk/pipe-check-pure-impure.cpp @@ -33,31 +33,27 @@ static void fire_error_impure_operation_inside_pure_function(AnyV v) { class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFunctionBody { static void fire_if_global_var(AnyExprV v) { - if (auto v_ident = v->try_as()) { + if (auto v_ident = v->try_as()) { if (v_ident->sym->try_as()) { fire_error_impure_operation_inside_pure_function(v); } } } - void visit(V v) override { - if (v->marked_as_redef) { - fire_if_global_var(v->get_identifier()); - } + void visit(V v) override { + fire_if_global_var(v->get_lhs()); + parent::visit(v); } - void visit(V v) override { - if (v->is_set_assign() || v->is_assign()) { - fire_if_global_var(v->get_lhs()); - } - + void visit(V v) override { + fire_if_global_var(v->get_lhs()); parent::visit(v); } void visit(V v) override { - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` if (!v->fun_maybe) { - // calling variables is always impure, no considerations about what's there at runtime + // `local_var(args)` is always impure, no considerations about what's there at runtime fire_error_impure_operation_inside_pure_function(v); } @@ -68,14 +64,6 @@ class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFuncti parent::visit(v); } - void visit(V v) override { - if (!v->fun_ref->is_marked_as_pure()) { - fire_error_impure_operation_inside_pure_function(v); - } - - parent::visit(v); - } - void visit(V v) override { if (v->passed_as_mutate) { fire_if_global_var(v->get_expr()); @@ -93,15 +81,13 @@ class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFuncti } public: - void start_visiting_function(V v_function) override { - if (v_function->marked_as_pure) { - parent::visit(v_function->get_body()); - } + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function() && fun_ref->is_marked_as_pure(); } }; -void pipeline_check_pure_impure_operations(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_check_pure_impure_operations() { + visit_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-check-rvalue-lvalue.cpp b/tolk/pipe-check-rvalue-lvalue.cpp index f5bf8526..038b0999 100644 --- a/tolk/pipe-check-rvalue-lvalue.cpp +++ b/tolk/pipe-check-rvalue-lvalue.cpp @@ -36,9 +36,18 @@ static void fire_error_cannot_be_used_as_lvalue(AnyV v, const std::string& detai v->error(details + " can not be used as lvalue"); } -// handle when a function used as rvalue, like `var cb = f` -static void handle_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) { - fun_ref->mutate()->assign_is_used_as_noncall(); +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_modifying_immutable_variable(AnyExprV v, const LocalVarData* var_ref) { + if (var_ref->idx == 0 && var_ref->name == "self") { + v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); + } else { + v->error("modifying immutable variable `" + var_ref->name + "`"); + } +} + +// validate a function used as rvalue, like `var cb = f` +// it's not a generic function (ensured earlier at type inferring) and has some more restrictions +static void validate_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) { if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { v->error("saving `" + fun_ref->name + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); } @@ -48,16 +57,30 @@ static void handle_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ } class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "assignment"); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "assignment"); + } + parent::visit(v); + } + void visit(V v) override { if (v->is_lvalue) { - fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast(v->operator_name)); + fire_error_cannot_be_used_as_lvalue(v, "operator " + static_cast(v->operator_name)); } parent::visit(v); } void visit(V v) override { if (v->is_lvalue) { - fire_error_cannot_be_used_as_lvalue(v, "operator `" + static_cast(v->operator_name)); + fire_error_cannot_be_used_as_lvalue(v, "operator " + static_cast(v->operator_name)); } parent::visit(v); } @@ -69,6 +92,11 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v); } + void visit(V v) override { + // if `x as int` is lvalue, then `x` is also lvalue, so check that `x` is ok + parent::visit(v->get_expr()); + } + void visit(V v) override { if (v->is_lvalue) { fire_error_cannot_be_used_as_lvalue(v, "literal"); @@ -93,46 +121,45 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { } } + void visit(V v) override { + // a reference to a method used as rvalue, like `var v = t.tupleAt` + if (const FunctionData* fun_ref = v->target; v->is_rvalue) { + validate_function_used_as_noncall(v, fun_ref); + } + } + void visit(V v) override { if (v->is_lvalue) { fire_error_cannot_be_used_as_lvalue(v, "function call"); } if (!v->fun_maybe) { - parent::visit(v->get_called_f()); + parent::visit(v->get_callee()); + } + // for `f()` don't visit ast_reference `f`, to detect `f` usage as non-call, like `var cb = f` + // same for `obj.method()`, don't visit ast_reference method, visit only obj + if (v->is_dot_call()) { + parent::visit(v->get_dot_obj()); } - // for `f(...)` don't visit identifier `f`, to detect `f` usage as non-call, like `var cb = f` for (int i = 0; i < v->get_num_args(); ++i) { parent::visit(v->get_arg(i)); } } - void visit(V v) override { - if (v->is_lvalue) { - fire_error_cannot_be_used_as_lvalue(v, "method call"); - } - - parent::visit(v->get_obj()); - - for (int i = 0; i < v->get_num_args(); ++i) { - parent::visit(v->get_arg(i)); - } - } - - void visit(V v) override { + void visit(V v) override { if (v->marked_as_redef) { - tolk_assert(v->var_maybe); // always filled, but for `var g_var redef` might point not to a local - if (const LocalVarData* var_ref = v->var_maybe->try_as(); var_ref && var_ref->is_immutable()) { + tolk_assert(v->var_ref); + if (v->var_ref->is_immutable()) { v->error("`redef` for immutable variable"); } } } - void visit(V v) override { + void visit(V v) override { if (v->is_lvalue) { tolk_assert(v->sym); if (const auto* var_ref = v->sym->try_as(); var_ref && var_ref->is_immutable()) { - v->error("modifying immutable variable `" + var_ref->name + "`"); + fire_error_modifying_immutable_variable(v, var_ref); } else if (v->sym->try_as()) { v->error("modifying immutable constant"); } else if (v->sym->try_as()) { @@ -142,13 +169,7 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { // a reference to a function used as rvalue, like `var v = someFunction` if (const FunctionData* fun_ref = v->sym->try_as(); fun_ref && v->is_rvalue) { - handle_function_used_as_noncall(v, fun_ref); - } - } - - void visit(V v) override { - if (v->is_lvalue && v->param_ref->is_immutable()) { - v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); + validate_function_used_as_noncall(v, fun_ref); } } @@ -163,10 +184,15 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { // skip catch(_,excNo), there are always vars due to grammar, lvalue/rvalue aren't set to them parent::visit(v->get_catch_body()); } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } }; -void pipeline_check_rvalue_lvalue(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_check_rvalue_lvalue() { + visit_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp index 9e266e6d..4090d247 100644 --- a/tolk/pipe-constant-folding.cpp +++ b/tolk/pipe-constant-folding.cpp @@ -17,6 +17,7 @@ #include "tolk.h" #include "ast.h" #include "ast-replacer.h" +#include "type-system.h" /* * This pipe is supposed to do constant folding, like replacing `2 + 3` with `5`. @@ -33,7 +34,7 @@ namespace tolk { class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { auto v_int = createV(loc, std::move(intval), {}); - v_int->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); + v_int->assign_inferred_type(TypeDataInt::create()); v_int->assign_rvalue_true(); return v_int; } @@ -59,10 +60,15 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { return v; } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } }; -void pipeline_constant_folding(const AllSrcFiles& all_src_files) { - replace_ast_of_all_functions(all_src_files); +void pipeline_constant_folding() { + replace_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-detect-unreachable.cpp b/tolk/pipe-detect-unreachable.cpp index 96de2eb0..15824cf3 100644 --- a/tolk/pipe-detect-unreachable.cpp +++ b/tolk/pipe-detect-unreachable.cpp @@ -111,17 +111,28 @@ class UnreachableStatementsDetectVisitor final { } public: - void start_visiting_function(V v_function) { + static bool should_visit_function(const FunctionData* fun_ref) { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } + + void start_visiting_function(const FunctionData* fun_ref, V v_function) { bool control_flow_reaches_end = !always_returns(v_function->get_body()->as()); if (control_flow_reaches_end) { - v_function->fun_ref->mutate()->assign_is_implicit_return(); + fun_ref->mutate()->assign_is_implicit_return(); } } }; -void pipeline_detect_unreachable_statements(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_detect_unreachable_statements() { + visit_ast_of_all_functions(); +} + +void pipeline_detect_unreachable_statements(const FunctionData* fun_ref) { + UnreachableStatementsDetectVisitor visitor; + if (UnreachableStatementsDetectVisitor::should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + } } } // namespace tolk diff --git a/tolk/pipe-discover-parse-sources.cpp b/tolk/pipe-discover-parse-sources.cpp index 92cc2807..d31348ba 100644 --- a/tolk/pipe-discover-parse-sources.cpp +++ b/tolk/pipe-discover-parse-sources.cpp @@ -38,7 +38,7 @@ namespace tolk { -AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { +void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { G.all_src_files.locate_and_register_source_file(stdlib_filename, {}); G.all_src_files.locate_and_register_source_file(entrypoint_filename, {}); @@ -46,27 +46,25 @@ AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filena tolk_assert(!file->ast); file->ast = parse_src_file_to_ast(file); - // file->ast->debug_print(); + // if (!file->is_stdlib_file()) file->ast->debug_print(); for (AnyV v_toplevel : file->ast->as()->get_toplevel_declarations()) { - if (auto v_import = v_toplevel->try_as()) { + if (auto v_import = v_toplevel->try_as()) { std::string imported_str = v_import->get_file_name(); size_t cur_slash_pos = file->rel_filename.rfind('/'); std::string rel_filename = cur_slash_pos == std::string::npos || imported_str[0] == '@' ? std::move(imported_str) : file->rel_filename.substr(0, cur_slash_pos + 1) + imported_str; - SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); - file->imports.push_back(SrcFile::ImportStatement{imported}); + const SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); + file->imports.push_back(SrcFile::ImportDirective{imported}); v_import->mutate()->assign_src_file(imported); } } } // todo #ifdef TOLK_PROFILING - // lexer_measure_performance(G.all_src_files.get_all_files()); - - return G.all_src_files.get_all_files(); + lexer_measure_performance(G.all_src_files); } } // namespace tolk diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp index 815905e6..29584cbf 100644 --- a/tolk/pipe-find-unused-symbols.cpp +++ b/tolk/pipe-find-unused-symbols.cpp @@ -37,7 +37,7 @@ namespace tolk { static void mark_function_used_dfs(const std::unique_ptr& op); static void mark_function_used(const FunctionData* fun_ref) { - if (!fun_ref->is_regular_function() || fun_ref->is_really_used()) { // already handled + if (!fun_ref->is_code_function() || fun_ref->is_really_used()) { // already handled return; } @@ -66,7 +66,7 @@ static void mark_function_used_dfs(const std::unique_ptr& op) { } void pipeline_find_unused_symbols() { - for (const FunctionData* fun_ref : G.all_code_functions) { + for (const FunctionData* fun_ref : G.all_functions) { if (fun_ref->is_method_id_not_empty()) { // get methods, main and other entrypoints, regular functions with @method_id mark_function_used(fun_ref); } diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 5c0f1647..9092e564 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -40,20 +40,15 @@ void FunctionBodyAsm::set_code(std::vector&& code) { static void generate_output_func(const FunctionData* fun_ref) { - tolk_assert(fun_ref->is_regular_function()); + tolk_assert(fun_ref->is_code_function()); if (G.is_verbosity(2)) { - std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->full_type << std::endl; + std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->inferred_return_type << std::endl; } CodeBlob* code = std::get(fun_ref->body)->code; if (G.is_verbosity(3)) { code->print(std::cerr, 9); } - code->simplify_var_types(); - if (G.is_verbosity(5)) { - std::cerr << "after simplify_var_types: \n"; - code->print(std::cerr, 0); - } code->prune_unreachable_code(); if (G.is_verbosity(5)) { std::cerr << "after prune_unreachable: \n"; @@ -112,11 +107,11 @@ static void generate_output_func(const FunctionData* fun_ref) { } } -void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) { +void pipeline_generate_fif_output_to_std_cout() { std::cout << "\"Asm.fif\" include\n"; std::cout << "// automatically generated from "; bool need_comma = false; - for (const SrcFile* file : all_src_files) { + for (const SrcFile* file : G.all_src_files) { if (!file->is_stdlib_file()) { if (need_comma) { std::cout << ", "; @@ -129,9 +124,9 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) std::cout << "PROGRAM{\n"; bool has_main_procedure = false; - for (const FunctionData* fun_ref : G.all_code_functions) { + for (const FunctionData* fun_ref : G.all_functions) { if (!fun_ref->does_need_codegen()) { - if (G.is_verbosity(2)) { + if (G.is_verbosity(2) && fun_ref->is_code_function()) { std::cerr << fun_ref->name << ": code not generated, function does not need codegen\n"; } continue; @@ -164,7 +159,7 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) std::cout << std::string(2, ' ') << "DECLGLOBVAR " << var_ref->name << "\n"; } - for (const FunctionData* fun_ref : G.all_code_functions) { + for (const FunctionData* fun_ref : G.all_functions) { if (!fun_ref->does_need_codegen()) { continue; } diff --git a/tolk/pipe-infer-check-types.cpp b/tolk/pipe-infer-check-types.cpp deleted file mode 100644 index 8c18bae9..00000000 --- a/tolk/pipe-infer-check-types.cpp +++ /dev/null @@ -1,524 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" -#include "src-file.h" -#include "ast.h" -#include "ast-visitor.h" - -/* - * This pipe does type inferring. - * It will be fully rewritten, because current type system is based on Hindley-Milner (unifying usages), - * and I am going to introduce a static type system, drop TypeExpr completely, etc. - * Currently, after this inferring, lots of `te_Indirect` and partially complete types still exist, - * whey are partially refined during converting AST to legacy. - */ - -namespace tolk { - -class InferAndCheckTypesInsideFunctionVisitor final : public ASTVisitorFunctionBody { - const FunctionData* current_function = nullptr; - - static bool expect_integer(TypeExpr* inferred) { - try { - TypeExpr* t_int = TypeExpr::new_atomic(TypeExpr::_Int); - unify(inferred, t_int); - return true; - } catch (UnifyError&) { - return false; - } - } - - static bool expect_integer(AnyExprV v_inferred) { - return expect_integer(v_inferred->inferred_type); - } - - static bool is_expr_valid_as_return_self(AnyExprV return_expr) { - // `return self` - if (return_expr->type == ast_self_keyword) { - return true; - } - // `return self.someMethod()` - if (auto v_call = return_expr->try_as()) { - return v_call->fun_ref->does_return_self() && is_expr_valid_as_return_self(v_call->get_obj()); - } - // `return cond ? ... : ...` - if (auto v_ternary = return_expr->try_as()) { - return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); - } - return false; - } - - void visit(V v) override { - parent::visit(v->get_expr()); - v->mutate()->assign_inferred_type(v->get_expr()->inferred_type); - } - - void visit(V v) override { - if (v->empty()) { - v->mutate()->assign_inferred_type(TypeExpr::new_unit()); - return; - } - std::vector types_list; - types_list.reserve(v->get_items().size()); - for (AnyExprV item : v->get_items()) { - parent::visit(item); - types_list.emplace_back(item->inferred_type); - } - v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list))); - } - - void visit(V v) override { - if (v->empty()) { - v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_unit())); - return; - } - std::vector types_list; - types_list.reserve(v->get_items().size()); - for (AnyExprV item : v->get_items()) { - parent::visit(item); - types_list.emplace_back(item->inferred_type); - } - v->mutate()->assign_inferred_type(TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types_list), false))); - } - - void visit(V v) override { - if (const auto* glob_ref = v->sym->try_as()) { - v->mutate()->assign_inferred_type(glob_ref->declared_type); - } else if (const auto* const_ref = v->sym->try_as()) { - v->mutate()->assign_inferred_type(const_ref->inferred_type); - } else if (const auto* fun_ref = v->sym->try_as()) { - v->mutate()->assign_inferred_type(fun_ref->full_type); - } else if (const auto* var_ref = v->sym->try_as()) { - v->mutate()->assign_inferred_type(var_ref->declared_type); - } - } - - void visit(V v) override { - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - } - - void visit(V v) override { - switch (v->modifier) { - case 0: - case 's': - case 'a': - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Slice)); - break; - case 'u': - case 'h': - case 'H': - case 'c': - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - break; - default: - break; - } - } - - void visit(V v) override { - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - } - - void visit(V v) override { - const FunctionData* fun_ref = lookup_global_symbol("__null")->as(); - TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_unit(), TypeExpr::new_hole()); - TypeExpr* sym_type = fun_ref->full_type; - try { - unify(fun_type, sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type " - << fun_type->args[0] << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - v->mutate()->assign_inferred_type(e_type); - } - - void visit(V v) override { - v->mutate()->assign_inferred_type(v->param_ref->declared_type); - } - - void visit(V v) override { - parent::visit(v->get_expr()); - v->mutate()->assign_inferred_type(v->get_expr()->inferred_type); - } - - void visit(V v) override { - if (v->empty()) { - v->mutate()->assign_inferred_type(TypeExpr::new_unit()); - return; - } - std::vector types_list; - types_list.reserve(v->size()); - for (AnyExprV item : v->get_arguments()) { - parent::visit(item); - types_list.emplace_back(item->inferred_type); - } - v->mutate()->assign_inferred_type(TypeExpr::new_tensor(std::move(types_list))); - } - - void visit(V v) override { - // special error for "null()" which is a FunC syntax - if (v->get_called_f()->type == ast_null_keyword) { - v->error("null is not a function: use `null`, not `null()`"); - } - - parent::visit(v->get_called_f()); - visit(v->get_arg_list()); - - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` - const FunctionData* fun_ref = v->fun_maybe; - if (!fun_ref) { - TypeExpr* arg_tensor = v->get_arg_list()->inferred_type; - TypeExpr* lhs_type = v->get_called_f()->inferred_type; - TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); - try { - unify(fun_type, lhs_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply expression of type " << lhs_type << " to an expression of type " << arg_tensor - << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - v->mutate()->assign_inferred_type(e_type); - return; - } - - TypeExpr* arg_tensor = v->get_arg_list()->inferred_type; - TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); - TypeExpr* sym_type = fun_ref->full_type; - try { - unify(fun_type, sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply function " << fun_ref->name << " : " << fun_ref->full_type << " to arguments of type " - << fun_type->args[0] << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - - if (fun_ref->has_mutate_params()) { - tolk_assert(e_type->constr == TypeExpr::te_Tensor); - e_type = e_type->args[e_type->args.size() - 1]; - } - - v->mutate()->assign_inferred_type(e_type); - } - - void visit(V v) override { - parent::visit(v->get_obj()); - visit(v->get_arg_list()); - std::vector arg_types; - arg_types.reserve(1 + v->get_num_args()); - arg_types.push_back(v->get_obj()->inferred_type); - for (int i = 0; i < v->get_num_args(); ++i) { - arg_types.push_back(v->get_arg(i)->inferred_type); - } - - TypeExpr* arg_tensor = TypeExpr::new_tensor(std::move(arg_types)); - TypeExpr* fun_type = TypeExpr::new_map(arg_tensor, TypeExpr::new_hole()); - TypeExpr* sym_type = v->fun_ref->full_type; - try { - unify(fun_type, sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply function " << v->fun_ref->name << " : " << v->fun_ref->full_type << " to arguments of type " - << fun_type->args[0] << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - - if (v->fun_ref->has_mutate_params()) { - tolk_assert(e_type->constr == TypeExpr::te_Tensor); - e_type = e_type->args[e_type->args.size() - 1]; - } - if (v->fun_ref->does_return_self()) { - e_type = v->get_obj()->inferred_type; - TypeExpr::remove_indirect(e_type); - } - - v->mutate()->assign_inferred_type(e_type); - } - - void visit(V v) override { - v->mutate()->assign_inferred_type(TypeExpr::new_hole()); - } - - void visit(V v) override { - parent::visit(v->get_rhs()); - if (!expect_integer(v->get_rhs())) { - v->error("operator `" + static_cast(v->operator_name) + "` expects integer operand"); - } - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - } - - void visit(V v) override { - parent::visit(v->get_lhs()); - parent::visit(v->get_rhs()); - switch (v->tok) { - case tok_assign: { - TypeExpr* lhs_type = v->get_lhs()->inferred_type; - TypeExpr* rhs_type = v->get_rhs()->inferred_type; - try { - unify(lhs_type, rhs_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot assign an expression of type " << rhs_type << " to a variable or pattern of type " - << lhs_type << ": " << ue; - v->error(os.str()); - } - TypeExpr* e_type = lhs_type; - TypeExpr::remove_indirect(e_type); - v->mutate()->assign_inferred_type(e_type); - break; - } - case tok_eq: - case tok_neq: - case tok_spaceship: { - if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { - v->error("comparison operators `== !=` can compare only integers"); - } - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - break; - } - case tok_logical_and: - case tok_logical_or: { - if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { - v->error("logical operators `&& ||` expect integer operands"); - } - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - break; - } - default: - if (!expect_integer(v->get_lhs()) || !expect_integer(v->get_rhs())) { - v->error("operator `" + static_cast(v->operator_name) + "` expects integer operands"); - } - v->mutate()->assign_inferred_type(TypeExpr::new_atomic(TypeExpr::_Int)); - } - } - - void visit(V v) override { - parent::visit(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of ternary ?: operator must be an integer"); - } - parent::visit(v->get_when_true()); - parent::visit(v->get_when_false()); - - TypeExpr* res = TypeExpr::new_hole(); - TypeExpr *ttrue = v->get_when_true()->inferred_type; - TypeExpr *tfals = v->get_when_false()->inferred_type; - unify(res, ttrue); - unify(res, tfals); - v->mutate()->assign_inferred_type(res); - } - - void visit(V v) override { - parent::visit(v->get_cond()); - parent::visit(v->get_if_body()); - parent::visit(v->get_else_body()); - TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* cond_type = v->get_cond()->inferred_type; - try { - - unify(cond_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`if` condition value of type " << cond_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - v->get_cond()->mutate()->assign_inferred_type(cond_type); - } - - void visit(V v) override { - parent::visit(v->get_cond()); - parent::visit(v->get_body()); - TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* cond_type = v->get_cond()->inferred_type; - try { - unify(cond_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "repeat count value of type " << cond_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - v->get_cond()->mutate()->assign_inferred_type(cond_type); - } - - void visit(V v) override { - parent::visit(v->get_cond()); - parent::visit(v->get_body()); - TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* cond_type = v->get_cond()->inferred_type; - try { - unify(cond_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`while` condition value of type " << cond_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - v->get_cond()->mutate()->assign_inferred_type(cond_type); - } - - void visit(V v) override { - parent::visit(v->get_body()); - parent::visit(v->get_cond()); - TypeExpr* cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* cond_type = v->get_cond()->inferred_type; - try { - unify(cond_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`while` condition value of type " << cond_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - v->get_cond()->mutate()->assign_inferred_type(cond_type); - } - - void visit(V v) override { - parent::visit(v->get_return_value()); - if (current_function->does_return_self()) { - if (!is_expr_valid_as_return_self(v->get_return_value())) { - v->error("invalid return from `self` function"); - } - return; - } - TypeExpr* expr_type = v->get_return_value()->inferred_type; - TypeExpr* ret_type = current_function->full_type; - if (ret_type->constr == TypeExpr::te_ForAll) { - ret_type = ret_type->args[0]; - } - tolk_assert(ret_type->constr == TypeExpr::te_Map); - ret_type = ret_type->args[1]; - if (current_function->has_mutate_params()) { - tolk_assert(ret_type->constr == TypeExpr::te_Tensor); - ret_type = ret_type->args[ret_type->args.size() - 1]; - } - try { - unify(expr_type, ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << ret_type - << " cannot be unified with return statement expression type " << expr_type << ": " << ue; - v->error(os.str()); - } - } - - void visit(V v) override { - if (v->var_maybe) { // not underscore - if (const auto* var_ref = v->var_maybe->try_as()) { - v->mutate()->assign_inferred_type(var_ref->declared_type); - } else if (const auto* glob_ref = v->var_maybe->try_as()) { - v->mutate()->assign_inferred_type(glob_ref->declared_type); - } else { - tolk_assert(0); - } - } else if (v->declared_type) { // underscore with type - v->mutate()->assign_inferred_type(v->declared_type); - } else { // just underscore - v->mutate()->assign_inferred_type(TypeExpr::new_hole()); - } - v->get_identifier()->mutate()->assign_inferred_type(v->inferred_type); - } - - void visit(V v) override { - parent::visit(v->get_lhs()); - parent::visit(v->get_assigned_val()); - TypeExpr* lhs = v->get_lhs()->inferred_type; - TypeExpr* rhs = v->get_assigned_val()->inferred_type; - try { - unify(lhs, rhs); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot assign an expression of type " << rhs << " to a variable or pattern of type " << lhs << ": " << ue; - v->error(os.str()); - } - } - - void visit(V v) override { - parent::visit(v->get_try_body()); - parent::visit(v->get_catch_expr()); - - TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); - tolk_assert(v->get_catch_expr()->size() == 2); - TypeExpr* type1 = v->get_catch_expr()->get_item(0)->inferred_type; - unify(type1, tvm_error_type->args[1]); - TypeExpr* type2 = v->get_catch_expr()->get_item(1)->inferred_type; - unify(type2, tvm_error_type->args[0]); - - parent::visit(v->get_catch_body()); - } - - void visit(V v) override { - parent::visit(v->get_thrown_code()); - if (!expect_integer(v->get_thrown_code())) { - v->get_thrown_code()->error("excNo of `throw` must be an integer"); - } - if (v->has_thrown_arg()) { - parent::visit(v->get_thrown_arg()); - } - } - - void visit(V v) override { - parent::visit(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of `assert` must be an integer"); - } - parent::visit(v->get_thrown_code()); - } - -public: - void start_visiting_function(V v_function) override { - current_function = v_function->fun_ref; - parent::visit(v_function->get_body()); - if (current_function->is_implicit_return()) { - if (current_function->does_return_self()) { - throw ParseError(v_function->get_body()->as()->loc_end, "missing return; forgot `return self`?"); - } - TypeExpr* expr_type = TypeExpr::new_unit(); - TypeExpr* ret_type = current_function->full_type; - if (ret_type->constr == TypeExpr::te_ForAll) { - ret_type = ret_type->args[0]; - } - tolk_assert(ret_type->constr == TypeExpr::te_Map); - ret_type = ret_type->args[1]; - if (current_function->has_mutate_params()) { - ret_type = ret_type->args[ret_type->args.size() - 1]; - } - try { - unify(expr_type, ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "implicit function return type " << expr_type - << " cannot be unified with inferred return type " << ret_type << ": " << ue; - v_function->error(os.str()); - } - } - } -}; - -void pipeline_infer_and_check_types(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); -} - -} // namespace tolk diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp new file mode 100644 index 00000000..1d6fbcb0 --- /dev/null +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -0,0 +1,1149 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "src-file.h" +#include "ast.h" +#include "ast-visitor.h" +#include "generics-helpers.h" +#include "type-system.h" + +/* + * This is a complicated and crucial part of the pipeline. It simultaneously does the following: + * * infers types of all expressions; example: `2 + 3` both are TypeDataInt, result is also + * * AND checks types for assignment, arguments passing, etc.; example: `fInt(cs)` is error passing slice to int + * * AND binds function/method calls (assigns fun_ref); example: `globalF()`, fun_ref is assigned to `globalF` (unless generic) + * * AND instantiates generic functions; example: `t.tuplePush(2)` creates `tuplePush` and assigns fun_ref to dot field + * * AND infers return type of functions if it's omitted (`fun f() { ... }` means "auto infer", not "void") + * + * It's important to do all these parts simultaneously, they can't be split or separated. + * For example, we can't bind `f(2)` earlier, because if `f` is a generic `f`, we should instantiate it, + * and in order to do it, we need to know argument types. + * For example, we can't bind `c.cellHash()` earlier, because in the future we'll have overloads (`cell.hash()` and `slice.hash()`), + * and in order to bind it, we need to know object type. + * And vice versa, to infer type of expression in the middle, we need to have inferred all expressions preceding it, + * which may also include generics, etc. + * + * About generics. They are more like "C++ templates". If `f` and `f` called from somewhere, + * there will be TWO new functions, inserted into symtable, and both will be code generated to Fift. + * Body of a generic function is NOT analyzed. Hence, `fun f(v: T) { v.method(); }` we don't know + * whether `v.method()` is a valid call until instantiate it with `f` for example. + * Same for `v + 2`, we don't know whether + operator can be applied until instantiation. + * In other words, we have a closed type system, not open. + * That's why generic functions' bodies aren't traversed here (and in most following pipes). + * Instead, when an instantiated function is created, it follows all the preceding pipeline (registering symbols, etc.), + * and type inferring is done inside instantiated functions (which can recursively instantiate another, etc.). + * + * A noticeable part of inferring is "hints". + * Example: `var a: User = { id: 3, name: "" }`. To infer type of `{...}` we need to know it's `User`. This hint is taken from lhs. + * Example: `fun tupleAt(t: tuple, idx: int):T`, just `t.tupleGet(2)` can't be deduced (T left unspecified), + * but for assignment with left-defined type, or a call to `fInt(t.tupleGet(2))` hint "int" helps deduce T. + * + * Unlike other pipes, inferring can dig recursively on demand. + * Example: + * fun getInt() { return 1; } + * fun main() { var i = getInt(); } + * If `main` is handled the first, it should know the return type if `getInt`. It's not declared, so we need + * to launch type inferring for `getInt` and then proceed back to `main`. + * When a generic function is instantiated, type inferring inside it is also run. + */ + +namespace tolk { + +static void infer_and_save_return_type_of_function(const FunctionData* fun_ref); + +static TypePtr get_or_infer_return_type(const FunctionData* fun_ref) { + if (!fun_ref->inferred_return_type) { + infer_and_save_return_type_of_function(fun_ref); + } + return fun_ref->inferred_return_type; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(TypePtr type) { + return "`" + type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(AnyExprV v_with_type) { + return "`" + v_with_type->inferred_type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(const LocalVarData& var_ref) { + return "`" + var_ref.declared_type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(const FunctionData* fun_ref) { + return "`" + fun_ref->as_human_readable() + "`"; +} + +// fire an error when `fun f(...) asm ...` is called with T=(int,int) or other non-1 width on stack +// asm functions generally can't handle it, they expect T to be a TVM primitive +// (in FunC, `forall` type just couldn't be unified with non-primitives; in Tolk, generic T is expectedly inferred) +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_calling_asm_function_with_non1_stack_width_arg(SrcLocation loc, const FunctionData* fun_ref, const std::vector& substitutions, int arg_idx) { + throw ParseError(loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->calc_width_on_stack()) + " stack slots in TVM, not 1"); +} + +// fire an error on `var n = null` +// technically it's correct, type of `n` is TypeDataNullLiteral, but it's not what the user wanted +// so, it's better to see an error on assignment, that later, on `n` usage and types mismatch +// (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_assign_always_null_to_variable(SrcLocation loc, const LocalVarData* assigned_var, bool is_assigned_null_literal) { + std::string var_name = assigned_var->name; + throw ParseError(loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); +} + +// check correctness of called arguments counts and their type matching +static void check_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { + int delta_self = lhs_of_dot_call ? 1 : 0; + int n_arguments = v->size() + delta_self; + int n_parameters = fun_ref->get_num_params(); + + // Tolk doesn't have optional parameters currently, so just compare counts + if (!n_parameters && lhs_of_dot_call) { + v->error("`" + fun_ref->name + "` has no parameters and can not be called as method"); + } + if (n_parameters < n_arguments) { + v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + if (n_arguments < n_parameters) { + v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + + if (lhs_of_dot_call) { + if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(lhs_of_dot_call->inferred_type)) { + lhs_of_dot_call->error("can not call method for " + to_string(fun_ref->parameters[0]) + " with object of type " + to_string(lhs_of_dot_call)); + } + } + for (int i = 0; i < v->size(); ++i) { + if (!fun_ref->parameters[i + delta_self].declared_type->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { + v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(fun_ref->parameters[i + delta_self])); + } + } +} + +/* + * TypeInferringUnifyStrategy unifies types from various branches to a common result (lca). + * It's used to auto infer function return type based on return statements, like in TypeScript. + * Example: `fun f() { ... return 1; ... return null; }` inferred as `int`. + * + * Besides function returns, it's also useful for ternary `return cond ? 1 : null` and `match` expression. + * If types can't be unified (a function returns int and cell, for example), `unify()` returns false, handled outside. + * BTW, don't confuse this way of inferring with Hindley-Milner, they have nothing in common. + */ +class TypeInferringUnifyStrategy { + TypePtr unified_result = nullptr; + + static TypePtr calculate_type_lca(TypePtr t1, TypePtr t2) { + if (t1 == t2) { + return t1; + } + if (t1->can_rhs_be_assigned(t2)) { + return t1; + } + if (t2->can_rhs_be_assigned(t1)) { + return t2; + } + + const auto* tensor1 = t1->try_as(); + const auto* tensor2 = t2->try_as(); + if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { + std::vector types_lca; + types_lca.reserve(tensor1->size()); + for (int i = 0; i < tensor1->size(); ++i) { + TypePtr next = calculate_type_lca(tensor1->items[i], tensor2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTensor::create(std::move(types_lca)); + } + + const auto* tuple1 = t1->try_as(); + const auto* tuple2 = t2->try_as(); + if (tuple1 && tuple2 && tuple1->size() == tuple2->size()) { + std::vector types_lca; + types_lca.reserve(tuple1->size()); + for (int i = 0; i < tuple1->size(); ++i) { + TypePtr next = calculate_type_lca(tuple1->items[i], tuple2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTypedTuple::create(std::move(types_lca)); + } + + return nullptr; + } + +public: + bool unify_with(TypePtr next) { + if (unified_result == nullptr) { + unified_result = next; + return true; + } + if (unified_result == next) { + return true; + } + + TypePtr combined = calculate_type_lca(unified_result, next); + if (!combined) { + return false; + } + + unified_result = combined; + return true; + } + + bool unify_with_implicit_return_void() { + if (unified_result == nullptr) { + unified_result = TypeDataVoid::create(); + return true; + } + + return unified_result == TypeDataVoid::create(); + } + + TypePtr get_result() const { return unified_result; } +}; + +/* + * This class handles all types of AST vertices and traverses them, filling all AnyExprV::inferred_type. + * Note, that it isn't derived from ASTVisitor, it has manual `switch` over all existing vertex types. + * There are two reasons for this: + * 1) when a new AST node type is introduced, I want it to fail here, not to be left un-inferred with UB at next steps + * 2) easy to maintain a hint (see comments at the top of the file) + */ +class InferCheckTypesAndCallsAndFieldsVisitor final { + const FunctionData* current_function = nullptr; + TypeInferringUnifyStrategy return_unifier; + + GNU_ATTRIBUTE_ALWAYS_INLINE + static void assign_inferred_type(AnyExprV dst, AnyExprV src) { +#ifdef TOLK_DEBUG + tolk_assert(src->inferred_type != nullptr && !src->inferred_type->has_unresolved_inside() && !src->inferred_type->has_genericT_inside()); +#endif + dst->mutate()->assign_inferred_type(src->inferred_type); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE + static void assign_inferred_type(AnyExprV dst, TypePtr inferred_type) { +#ifdef TOLK_DEBUG + tolk_assert(inferred_type != nullptr && !inferred_type->has_unresolved_inside() && !inferred_type->has_genericT_inside()); +#endif + dst->mutate()->assign_inferred_type(inferred_type); + } + + static void assign_inferred_type(const LocalVarData* local_var_or_param, TypePtr inferred_type) { +#ifdef TOLK_DEBUG + tolk_assert(inferred_type != nullptr && !inferred_type->has_unresolved_inside() && !inferred_type->has_genericT_inside()); +#endif + local_var_or_param->mutate()->assign_inferred_type(inferred_type); + } + + static void assign_inferred_type(const FunctionData* fun_ref, TypePtr inferred_return_type, TypePtr inferred_full_type) { +#ifdef TOLK_DEBUG + tolk_assert(inferred_return_type != nullptr && !inferred_return_type->has_unresolved_inside() && !inferred_return_type->has_genericT_inside()); +#endif + fun_ref->mutate()->assign_inferred_type(inferred_return_type, inferred_full_type); + } + + // traverse children in any statement + void process_any_statement(AnyV v) { + switch (v->type) { + case ast_sequence: + return process_sequence(v->as()); + case ast_return_statement: + return process_return_statement(v->as()); + case ast_if_statement: + return process_if_statement(v->as()); + case ast_repeat_statement: + return process_repeat_statement(v->as()); + case ast_while_statement: + return process_while_statement(v->as()); + case ast_do_while_statement: + return process_do_while_statement(v->as()); + case ast_throw_statement: + return process_throw_statement(v->as()); + case ast_assert_statement: + return process_assert_statement(v->as()); + case ast_try_catch_statement: + return process_try_catch_statement(v->as()); + case ast_empty_statement: + return; + default: + infer_any_expr(reinterpret_cast(v)); + } + } + + // assigns inferred_type for any expression (by calling assign_inferred_type) + void infer_any_expr(AnyExprV v, TypePtr hint = nullptr) { + switch (v->type) { + case ast_int_const: + return infer_int_const(v->as()); + case ast_string_const: + return infer_string_const(v->as()); + case ast_bool_const: + return infer_bool_const(v->as()); + case ast_local_vars_declaration: + return infer_local_vars_declaration(v->as()); + case ast_assign: + return infer_assignment(v->as()); + case ast_set_assign: + return infer_set_assign(v->as()); + case ast_unary_operator: + return infer_unary_operator(v->as()); + case ast_binary_operator: + return infer_binary_operator(v->as()); + case ast_ternary_operator: + return infer_ternary_operator(v->as(), hint); + case ast_cast_as_operator: + return infer_cast_as_operator(v->as()); + case ast_parenthesized_expression: + return infer_parenthesized(v->as(), hint); + case ast_reference: + return infer_reference(v->as()); + case ast_dot_access: + return infer_dot_access(v->as(), hint); + case ast_function_call: + return infer_function_call(v->as(), hint); + case ast_tensor: + return infer_tensor(v->as(), hint); + case ast_typed_tuple: + return infer_typed_tuple(v->as(), hint); + case ast_null_keyword: + return infer_null_keyword(v->as()); + case ast_underscore: + return infer_underscore(v->as(), hint); + case ast_empty_expression: + return infer_empty_expression(v->as()); + default: + throw UnexpectedASTNodeType(v, "infer_any_expr"); + } + } + + static bool expect_integer(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataInt::create(); + } + + static void infer_int_const(V v) { + assign_inferred_type(v, TypeDataInt::create()); + } + + static void infer_string_const(V v) { + if (v->is_bitslice()) { + assign_inferred_type(v, TypeDataSlice::create()); + } else { + assign_inferred_type(v, TypeDataInt::create()); + } + } + + static void infer_bool_const(V v) { + // currently, Tolk has no `bool` type; `true` and `false` are integers (-1 and 0) + assign_inferred_type(v, TypeDataInt::create()); + } + + static void infer_local_vars_declaration(V) { + // it can not appear as a standalone expression + // `var ... = rhs` is handled by ast_assign + tolk_assert(false); + } + + void infer_assignment(V v) { + // v is assignment: `x = 5` / `var x = 5` / `var x: slice = 5` / `(cs,_) = f()` / `val (a,[b],_) = (a,t,0)` + // it's a tricky node to handle, because to infer rhs, at first we need to create hint from lhs + // and then to apply/check inferred rhs onto lhs + // about a hint: `var i: int = t.tupleAt(0)` is ok, but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + infer_any_expr(rhs, calc_hint_from_assignment_lhs(lhs)); + process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, rhs); + assign_inferred_type(v, lhs); + } + + // having assignment like `var (i: int, s) = rhs` (its lhs is local vars declaration), + // create a contextual infer hint for rhs, `(int, unknown)` in this case + // this hint helps to deduce generics and to resolve unknown types while inferring rhs + static TypePtr calc_hint_from_assignment_lhs(AnyExprV lhs) { + // `var ... = rhs` - dig into left part + if (auto lhs_decl = lhs->try_as()) { + return calc_hint_from_assignment_lhs(lhs_decl->get_expr()); + } + + // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") + if (auto lhs_var = lhs->try_as()) { + if (lhs_var->marked_as_redef) { + return lhs_var->var_ref->declared_type; + } + if (lhs_var->declared_type) { + return lhs_var->declared_type; + } + return TypeDataUnknown::create(); + } + + // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) + if (auto lhs_ref = lhs->try_as()) { + if (const auto* var_ref = lhs_ref->sym->try_as()) { + return var_ref->declared_type; + } + if (const auto* glob_ref = lhs_ref->sym->try_as()) { + return glob_ref->declared_type; + } + return TypeDataUnknown::create(); + } + + // `(v1, v2) = rhs` / `var (v1, v2) = rhs` + if (auto lhs_tensor = lhs->try_as()) { + std::vector sub_hints; + sub_hints.reserve(lhs_tensor->size()); + for (AnyExprV item : lhs_tensor->get_items()) { + sub_hints.push_back(calc_hint_from_assignment_lhs(item)); + } + return TypeDataTensor::create(std::move(sub_hints)); + } + + // `[v1, v2] = rhs` / `var [v1, v2] = rhs` + if (auto lhs_tuple = lhs->try_as()) { + std::vector sub_hints; + sub_hints.reserve(lhs_tuple->size()); + for (AnyExprV item : lhs_tuple->get_items()) { + sub_hints.push_back(calc_hint_from_assignment_lhs(item)); + } + return TypeDataTypedTuple::create(std::move(sub_hints)); + } + + return TypeDataUnknown::create(); + } + + // handle (and dig recursively) into `var lhs = rhs` + // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` + // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) + // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 + void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; + + // `var ... = rhs` - dig into left part + if (auto lhs_decl = lhs->try_as()) { + process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); + assign_inferred_type(lhs, lhs_decl->get_expr()->inferred_type); + return; + } + + // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") + if (auto lhs_var = lhs->try_as()) { + TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr) + if (lhs_var->marked_as_redef) { + tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type); + declared_type = lhs_var->var_ref->declared_type; + } + if (declared_type) { + if (!declared_type->can_rhs_be_assigned(rhs_type)) { + err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); + } + assign_inferred_type(lhs, declared_type); + } else { + if (rhs_type == TypeDataNullLiteral::create()) { + fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); + } + assign_inferred_type(lhs, rhs_type); + assign_inferred_type(lhs_var->var_ref, lhs_var->inferred_type); + } + return; + } + + // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) + if (lhs->try_as()) { + infer_any_expr(lhs); + if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { + err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); + } + return; + } + + // `(v1, v2) = rhs` / `var (v1, v2) = rhs` (rhs may be `(1,2)` or `tensorVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tensor + if (auto lhs_tensor = lhs->try_as()) { + const TypeDataTensor* rhs_type_tensor = rhs_type->try_as(); + if (!rhs_type_tensor) { + err_loc->error("can not assign " + to_string(rhs_type) + " to a tensor"); + } + if (lhs_tensor->size() != rhs_type_tensor->size()) { + err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tensor->size()); + for (int i = 0; i < lhs_tensor->size(); ++i) { + process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); + types_list.push_back(lhs_tensor->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); + return; + } + + // `[v1, v2] = rhs` / `var [v1, v2] = rhs` (rhs may be `[1,2]` or `tupleVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tuple + if (auto lhs_tuple = lhs->try_as()) { + const TypeDataTypedTuple* rhs_type_tuple = rhs_type->try_as(); + if (!rhs_type_tuple) { + err_loc->error("can not assign " + to_string(rhs_type) + " to a tuple"); + } + if (lhs_tuple->size() != rhs_type_tuple->size()) { + err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tuple->size()); + for (int i = 0; i < lhs_tuple->size(); ++i) { + process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); + types_list.push_back(lhs_tuple->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); + return; + } + + // `_ = rhs` + if (lhs->type == ast_underscore) { + assign_inferred_type(lhs, TypeDataUnknown::create()); + return; + } + + // here is something strange and unhandled, like `f() = rhs` + // it will fail on later compilation steps (like rvalue/lvalue checks), but type inferring should pass + infer_any_expr(lhs, rhs_type); + if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { + err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); + } + } + + void infer_set_assign(V v) { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + infer_any_expr(lhs); + infer_any_expr(rhs, lhs->inferred_type); + + // almost all operators implementation is hardcoded by built-in functions `_+_` and similar + std::string_view builtin_func = v->operator_name; // "+" for operator += + + if (!expect_integer(lhs) || !expect_integer(rhs)) { + v->error("can not apply operator `" + static_cast(v->operator_name) + "` to " + to_string(lhs) + " and " + to_string(rhs)); + } + + assign_inferred_type(v, lhs); + if (!builtin_func.empty()) { + const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); + tolk_assert(builtin_sym); + v->mutate()->assign_fun_ref(builtin_sym); + } + } + + void infer_unary_operator(V v) { + AnyExprV rhs = v->get_rhs(); + infer_any_expr(rhs); + + // all operators implementation is hardcoded by built-in functions `~_` and similar + std::string_view builtin_func = v->operator_name; + + if (!expect_integer(rhs)) { + v->error("can not apply operator `" + static_cast(v->operator_name) + "` to " + to_string(rhs)); + } + assign_inferred_type(v, TypeDataInt::create()); + + if (!builtin_func.empty()) { + const FunctionData* builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->as(); + tolk_assert(builtin_sym); + v->mutate()->assign_fun_ref(builtin_sym); + } + } + + void infer_binary_operator(V v) { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + infer_any_expr(lhs); + infer_any_expr(rhs); + + // almost all operators implementation is hardcoded by built-in functions `_+_` and similar + std::string_view builtin_func = v->operator_name; + + switch (v->tok) { + // == != can compare both integers and booleans, (int == bool) is NOT allowed + case tok_eq: + case tok_neq: + case tok_spaceship: { + if (!expect_integer(lhs) || !expect_integer(rhs)) { + v->error("comparison operators `== !=` can compare only integers, got " + to_string(lhs) + " and " + to_string(rhs)); + } + assign_inferred_type(v, TypeDataInt::create()); + break; + } + case tok_logical_and: + case tok_logical_or: { + if (!expect_integer(lhs) || !expect_integer(rhs)) { + v->error("logical operators `&& ||` expect integer operands, got " + to_string(lhs) + " and " + to_string(rhs)); + } + assign_inferred_type(v, TypeDataInt::create()); + builtin_func = {}; + break; + } + default: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + v->error("can not apply operator `" + static_cast(v->operator_name) + "` to " + to_string(lhs) + " and " + to_string(rhs)); + } + assign_inferred_type(v, TypeDataInt::create()); + } + + if (!builtin_func.empty()) { + const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); + tolk_assert(builtin_sym); + v->mutate()->assign_fun_ref(builtin_sym); + } + } + + void infer_ternary_operator(V v, TypePtr hint) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of ternary operator must be an integer, got " + to_string(v->get_cond())); + } + infer_any_expr(v->get_when_true(), hint); + infer_any_expr(v->get_when_false(), hint); + + TypeInferringUnifyStrategy tern_type; + tern_type.unify_with(v->get_when_true()->inferred_type); + if (!tern_type.unify_with(v->get_when_false()->inferred_type)) { + v->error("types of ternary branches are incompatible"); + } + assign_inferred_type(v, tern_type.get_result()); + } + + void infer_cast_as_operator(V v) { + // for `expr as `, use this type for hint, so that `t.tupleAt(0) as int` is ok + infer_any_expr(v->get_expr(), v->cast_to_type); + if (!v->get_expr()->inferred_type->can_be_casted_with_as_operator(v->cast_to_type)) { + v->error("type " + to_string(v->get_expr()) + " can not be cast to " + to_string(v->cast_to_type)); + } + assign_inferred_type(v, v->cast_to_type); + } + + void infer_parenthesized(V v, TypePtr hint) { + infer_any_expr(v->get_expr(), hint); + assign_inferred_type(v, v->get_expr()); + } + + static void infer_reference(V v) { + if (const auto* var_ref = v->sym->try_as()) { + assign_inferred_type(v, var_ref->declared_type); + + } else if (const auto* const_ref = v->sym->try_as()) { + assign_inferred_type(v, const_ref->is_int_const() ? TypeDataInt::create() : TypeDataSlice::create()); + + } else if (const auto* glob_ref = v->sym->try_as()) { + assign_inferred_type(v, glob_ref->declared_type); + + } else if (const auto* fun_ref = v->sym->try_as()) { + // it's `globalF` / `globalF` - references to functions used as non-call + V v_instantiationTs = v->get_instantiationTs(); + + if (fun_ref->is_generic_function() && !v_instantiationTs) { + // `genericFn` is invalid as non-call, can't be used without + v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + + } else if (fun_ref->is_generic_function()) { + // `genericFn` is valid, it's a reference to instantiation + std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); + v->mutate()->assign_sym(fun_ref); + + } else if (UNLIKELY(v_instantiationTs != nullptr)) { + // non-generic function referenced like `return beginCell;` + v_instantiationTs->error("not generic function used with generic T"); + } + + fun_ref->mutate()->assign_is_used_as_noncall(); + get_or_infer_return_type(fun_ref); + assign_inferred_type(v, fun_ref->inferred_full_type); + return; + + } else { + tolk_assert(false); + } + + // for non-functions: `local_var` and similar not allowed + if (UNLIKELY(v->has_instantiationTs())) { + v->get_instantiationTs()->error("generic T not expected here"); + } + } + + // given `genericF` / `t.tupleFirst` (the user manually specified instantiation Ts), + // validate and collect them + // returns: [int, slice] / [cell] + static std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, const FunctionData* fun_ref, V instantiationT_list) { + if (fun_ref->genericTs->size() != instantiationT_list->get_items().size()) { + throw ParseError(loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); + } + + std::vector substitutions; + substitutions.reserve(instantiationT_list->size()); + for (int i = 0; i < instantiationT_list->size(); ++i) { + substitutions.push_back(instantiationT_list->get_item(i)->substituted_type); + } + + return substitutions; + } + + // when generic Ts have been collected from user-specified or deduced from arguments, + // instantiate a generic function + // example: was `t.tuplePush(2)`, deduced , instantiate `tuplePush` + // example: was `t.tuplePush(2)`, read , instantiate `tuplePush` (will later fail type check) + // example: was `var cb = t.tupleFirst;` (used as reference, as non-call), instantiate `tupleFirst` + // returns fun_ref to instantiated function + static const FunctionData* check_and_instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, std::vector&& substitutionTs) { + // T for asm function must be a TVM primitive (width 1), otherwise, asm would act incorrectly + if (fun_ref->is_asm_function() || fun_ref->is_builtin_function()) { + for (int i = 0; i < static_cast(substitutionTs.size()); ++i) { + if (substitutionTs[i]->calc_width_on_stack() != 1) { + fire_error_calling_asm_function_with_non1_stack_width_arg(loc, fun_ref, substitutionTs, i); + } + } + } + + std::string inst_name = generate_instantiated_name(fun_ref->name, substitutionTs); + try { + // make deep clone of `f` with substitutionTs + // (if `f` was already instantiated, it will be immediately returned from a symbol table) + return instantiate_generic_function(loc, fun_ref, inst_name, std::move(substitutionTs)); + } catch (const ParseError& ex) { + throw ParseError(ex.where, "while instantiating generic function `" + inst_name + "` at " + loc.to_string() + ": " + ex.message); + } + } + + void infer_dot_access(V v, TypePtr hint) { + // it's NOT a method call `t.tupleSize()` (since such cases are handled by infer_function_call) + // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) + infer_any_expr(v->get_obj()); + // our goal is to fill v->target knowing type of obj + V v_ident = v->get_identifier(); // field/method name vertex + V v_instantiationTs = v->get_instantiationTs(); + std::string_view field_name = v_ident->name; + + // for now, Tolk doesn't have structures, properties, and object-scoped methods + // so, only `t.tupleSize` is allowed, look up a global function + const Symbol* sym = lookup_global_symbol(field_name); + if (!sym) { + v_ident->error("undefined symbol `" + static_cast(field_name) + "`"); + } + const FunctionData* fun_ref = sym->try_as(); + if (!fun_ref) { + v_ident->error("referencing a non-function"); + } + + // `t.tupleSize` is ok, `cs.tupleSize` not + if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(v->get_obj()->inferred_type)) { + v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0]) + " with an object of type " + to_string(v->get_obj())); + } + + if (fun_ref->is_generic_function() && !v_instantiationTs) { + // `genericFn` and `t.tupleAt` are invalid as non-call, they can't be used without + v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + + } else if (fun_ref->is_generic_function()) { + // `t.tupleAt` is valid, it's a reference to instantiation + std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); + + } else if (UNLIKELY(v_instantiationTs != nullptr)) { + // non-generic method referenced like `var cb = c.cellHash;` + v_instantiationTs->error("not generic function used with generic T"); + } + + fun_ref->mutate()->assign_is_used_as_noncall(); + v->mutate()->assign_target(fun_ref); + get_or_infer_return_type(fun_ref); + assign_inferred_type(v, fun_ref->inferred_full_type); // type of `t.tupleSize` is TypeDataFunCallable + } + + void infer_function_call(V v, TypePtr hint) { + AnyExprV callee = v->get_callee(); + + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` + int delta_self = 0; + AnyExprV dot_obj = nullptr; + const FunctionData* fun_ref = nullptr; + V v_instantiationTs = nullptr; + + if (auto v_ref = callee->try_as()) { + // `globalF()` / `globalF()` / `local_var()` / `SOME_CONST()` + fun_ref = v_ref->sym->try_as(); // not null for `globalF` + v_instantiationTs = v_ref->get_instantiationTs(); // present for `globalF()` + + } else if (auto v_dot = callee->try_as()) { + // `obj.someMethod()` / `obj.someMethod()` / `getF().someMethod()` / `obj.SOME_CONST()` + delta_self = 1; + dot_obj = v_dot->get_obj(); + v_instantiationTs = v_dot->get_instantiationTs(); // present for `obj.someMethod()` + infer_any_expr(dot_obj); + + // for now, Tolk doesn't have object-scoped methods, so method resolving doesn't depend on obj type + // (in other words, `globalFunction(a)` = `a.globalFunction()`) + std::string_view method_name = v_dot->get_field_name(); + const Symbol* sym = lookup_global_symbol(method_name); + if (!sym) { + v_dot->get_identifier()->error("undefined symbol `" + static_cast(method_name) + "`"); + } + fun_ref = sym->try_as(); + if (!fun_ref) { + v_dot->get_identifier()->error("calling a non-function"); + } + + } else { + // `getF()()` / `5()` + // fun_ref remains nullptr + } + + // infer argument types, looking at fun_ref's parameters as hints + for (int i = 0; i < v->get_num_args(); ++i) { + TypePtr param_type = fun_ref && i < fun_ref->get_num_params() - delta_self ? fun_ref->parameters[delta_self + i].declared_type : nullptr; + auto arg_i = v->get_arg(i); + infer_any_expr(arg_i->get_expr(), param_type && !param_type->has_genericT_inside() ? param_type : nullptr); + assign_inferred_type(arg_i, arg_i->get_expr()); + } + + // handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` + if (!fun_ref) { + // treat callee like a usual expression, which must have "callable" inferred type + infer_any_expr(callee); + const TypeDataFunCallable* f_callable = callee->inferred_type->try_as(); + if (!f_callable) { // `5()` / `SOME_CONST()` / `null()` + v->error("calling a non-function"); + } + // check arguments count and their types + if (v->get_num_args() != static_cast(f_callable->params_types.size())) { + v->error("expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); + } + for (int i = 0; i < v->get_num_args(); ++i) { + if (!f_callable->params_types[i]->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { + v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(f_callable->params_types[i])); + } + } + v->mutate()->assign_fun_ref(nullptr); // no fun_ref to a global function + assign_inferred_type(v, f_callable->return_type); + return; + } + + // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) + // if it's a generic function `f`, we need to instantiate it, like `f` + // same for generic methods `t.tupleAt`, need to achieve `t.tupleAt` + + if (fun_ref->is_generic_function() && v_instantiationTs) { + // if Ts are specified by a user like `f(args)` / `t.tupleAt()`, take them + std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); + + } else if (fun_ref->is_generic_function()) { + // if `f` called like `f(args)`, deduce T from arg types + std::vector arg_types; + arg_types.reserve(delta_self + v->get_num_args()); + if (dot_obj) { + arg_types.push_back(dot_obj->inferred_type); + } + for (int i = 0; i < v->get_num_args(); ++i) { + arg_types.push_back(v->get_arg(i)->inferred_type); + } + + td::Result> deduced = deduce_substitutionTs_on_generic_func_call(fun_ref, std::move(arg_types), hint); + if (deduced.is_error()) { + v->error(deduced.error().message().str() + " for generic function " + to_string(fun_ref)); + } + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, deduced.move_as_ok()); + + } else if (UNLIKELY(v_instantiationTs != nullptr)) { + // non-generic function/method called with type arguments, like `c.cellHash()` / `beginCell()` + v_instantiationTs->error("calling a not generic function with generic T"); + } + + v->mutate()->assign_fun_ref(fun_ref); + // since for `t.tupleAt()`, infer_dot_access() not called for callee = "t.tupleAt", assign its target here + if (v->is_dot_call()) { + v->get_callee()->as()->mutate()->assign_target(fun_ref); + v->get_callee()->as()->mutate()->assign_inferred_type(fun_ref->inferred_full_type); + } + // check arguments count and their types + check_function_arguments(fun_ref, v->get_arg_list(), dot_obj); + // get return type either from user-specified declaration or infer here on demand traversing its body + get_or_infer_return_type(fun_ref); + TypePtr inferred_type = dot_obj && fun_ref->does_return_self() ? dot_obj->inferred_type : fun_ref->inferred_return_type; + assign_inferred_type(v, inferred_type); + assign_inferred_type(callee, fun_ref->inferred_full_type); + // note, that mutate params don't affect typing, they are handled when converting to IR + } + + void infer_tensor(V v, TypePtr hint) { + const TypeDataTensor* tensor_hint = hint ? hint->try_as() : nullptr; + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (int i = 0; i < v->size(); ++i) { + AnyExprV item = v->get_item(i); + infer_any_expr(item, tensor_hint && i < tensor_hint->size() ? tensor_hint->items[i] : nullptr); + types_list.emplace_back(item->inferred_type); + } + assign_inferred_type(v, TypeDataTensor::create(std::move(types_list))); + } + + void infer_typed_tuple(V v, TypePtr hint) { + const TypeDataTypedTuple* tuple_hint = hint ? hint->try_as() : nullptr; + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (int i = 0; i < v->size(); ++i) { + AnyExprV item = v->get_item(i); + infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); + types_list.emplace_back(item->inferred_type); + } + assign_inferred_type(v, TypeDataTypedTuple::create(std::move(types_list))); + } + + static void infer_null_keyword(V v) { + assign_inferred_type(v, TypeDataNullLiteral::create()); + } + + static void infer_underscore(V v, TypePtr hint) { + // if execution is here, underscore is either used as lhs of assignment, or incorrectly, like `f(_)` + // more precise is to always set unknown here, but for incorrect usages, instead of an error + // "can not pass unknown to X" would better be an error it can't be used as a value, at later steps + assign_inferred_type(v, hint ? hint : TypeDataUnknown::create()); + } + + static void infer_empty_expression(V v) { + assign_inferred_type(v, TypeDataUnknown::create()); + } + + void process_sequence(V v) { + for (AnyV item : v->get_items()) { + process_any_statement(item); + } + } + + static bool is_expr_valid_as_return_self(AnyExprV return_expr) { + // `return self` + if (return_expr->type == ast_reference && return_expr->as()->get_name() == "self") { + return true; + } + // `return self.someMethod()` + if (auto v_call = return_expr->try_as(); v_call && v_call->is_dot_call()) { + return v_call->fun_maybe && v_call->fun_maybe->does_return_self() && is_expr_valid_as_return_self(v_call->get_dot_obj()); + } + // `return cond ? ... : ...` + if (auto v_ternary = return_expr->try_as()) { + return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); + } + return false; + } + + void process_return_statement(V v) { + if (v->has_return_value()) { + infer_any_expr(v->get_return_value(), current_function->declared_return_type); + } else { + assign_inferred_type(v->get_return_value(), TypeDataVoid::create()); + } + if (current_function->does_return_self()) { + return_unifier.unify_with(current_function->parameters[0].declared_type); + if (!is_expr_valid_as_return_self(v->get_return_value())) { + v->error("invalid return from `self` function"); + } + return; + } + + TypePtr expr_type = v->get_return_value()->inferred_type; + if (current_function->declared_return_type) { + if (!current_function->declared_return_type->can_rhs_be_assigned(expr_type)) { + v->get_return_value()->error("can not convert type " + to_string(expr_type) + " to return type " + to_string(current_function->declared_return_type)); + } + } else { + if (!return_unifier.unify_with(expr_type)) { + v->get_return_value()->error("can not unify type " + to_string(expr_type) + " with previous return type " + to_string(return_unifier.get_result())); + } + } + } + + void process_if_statement(V v) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `if` must be an integer, got " + to_string(v->get_cond())); + } + process_any_statement(v->get_if_body()); + process_any_statement(v->get_else_body()); + } + + void process_repeat_statement(V v) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `repeat` must be an integer, got " + to_string(v->get_cond())); + } + process_any_statement(v->get_body()); + } + + void process_while_statement(V v) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `while` must be an integer, got " + to_string(v->get_cond())); + } + process_any_statement(v->get_body()); + } + + void process_do_while_statement(V v) { + process_any_statement(v->get_body()); + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `while` must be an integer, got " + to_string(v->get_cond())); + } + } + + void process_throw_statement(V v) { + infer_any_expr(v->get_thrown_code()); + if (!expect_integer(v->get_thrown_code())) { + v->get_thrown_code()->error("excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); + } + infer_any_expr(v->get_thrown_arg()); + if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->calc_width_on_stack() != 1) { + v->get_thrown_arg()->error("can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); + } + } + + void process_assert_statement(V v) { + infer_any_expr(v->get_cond()); + if (!expect_integer(v->get_cond())) { + v->get_cond()->error("condition of `assert` must be an integer, got " + to_string(v->get_cond())); + } + infer_any_expr(v->get_thrown_code()); + if (!expect_integer(v->get_thrown_code())) { + v->get_cond()->error("thrown excNo of `assert` must be an integer, got " + to_string(v->get_cond())); + } + } + + static void process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type) { + if (auto v_ref = catch_var->try_as(); v_ref && v_ref->sym) { // not underscore + assign_inferred_type(v_ref->sym->as(), catch_var_type); + } + assign_inferred_type(catch_var, catch_var_type); + } + + void process_try_catch_statement(V v) { + process_any_statement(v->get_try_body()); + + // `catch` has exactly 2 variables: excNo and arg (when missing, they are implicit underscores) + // `arg` is a curious thing, it can be any TVM primitive, so assign unknown to it + // hence, using `fInt(arg)` (int from parameter is a hint) or `arg as slice` works well + // it's not truly correct, because `arg as (int,int)` also compiles, but can never happen, but let it be user responsibility + tolk_assert(v->get_catch_expr()->size() == 2); + std::vector types_list = {TypeDataInt::create(), TypeDataUnknown::create()}; + process_catch_variable(v->get_catch_expr()->get_item(0), types_list[0]); + process_catch_variable(v->get_catch_expr()->get_item(1), types_list[1]); + assign_inferred_type(v->get_catch_expr(), TypeDataTensor::create(std::move(types_list))); + + process_any_statement(v->get_catch_body()); + } + +public: + static void assign_fun_full_type(const FunctionData* fun_ref, TypePtr inferred_return_type) { + // calculate function full type `fun(params) -> ret_type` + std::vector params_types; + params_types.reserve(fun_ref->get_num_params()); + for (const LocalVarData& param : fun_ref->parameters) { + params_types.push_back(param.declared_type); + } + assign_inferred_type(fun_ref, inferred_return_type, TypeDataFunCallable::create(std::move(params_types), inferred_return_type)); + } + + void start_visiting_function(const FunctionData* fun_ref, V v_function) { + if (fun_ref->is_code_function()) { + current_function = fun_ref; + process_any_statement(v_function->get_body()); + current_function = nullptr; + + if (fun_ref->is_implicit_return()) { + bool is_ok_with_void = fun_ref->declared_return_type + ? fun_ref->declared_return_type->can_rhs_be_assigned(TypeDataVoid::create()) + : return_unifier.unify_with_implicit_return_void(); + if (!is_ok_with_void || fun_ref->does_return_self()) { + throw ParseError(v_function->get_body()->as()->loc_end, "missing return"); + } + } + } else { + // asm functions should be strictly typed, this was checked earlier + tolk_assert(fun_ref->declared_return_type); + } + + TypePtr inferred_return_type = fun_ref->declared_return_type ? fun_ref->declared_return_type : return_unifier.get_result(); + assign_fun_full_type(fun_ref, inferred_return_type); + fun_ref->mutate()->assign_is_type_inferring_done(); + } +}; + +class LaunchInferTypesAndMethodsOnce final { +public: + static bool should_visit_function(const FunctionData* fun_ref) { + // since inferring can be requested on demand, prevent second execution from a regular pipeline launcher + return !fun_ref->is_type_inferring_done() && !fun_ref->is_generic_function(); + } + + static void start_visiting_function(const FunctionData* fun_ref, V v_function) { + InferCheckTypesAndCallsAndFieldsVisitor visitor; + visitor.start_visiting_function(fun_ref, v_function); + } +}; + +// infer return type "on demand" +// example: `fun f() { return g(); } fun g() { ... }` +// when analyzing `f()`, we need to infer what fun_ref=g returns +// (if `g` is generic, it was already instantiated, so fun_ref=g is here) +static void infer_and_save_return_type_of_function(const FunctionData* fun_ref) { + static std::vector called_stack; + + tolk_assert(!fun_ref->is_generic_function() && !fun_ref->is_type_inferring_done()); + // if `g` has return type declared, like `fun g(): int { ... }`, don't traverse its body + if (fun_ref->declared_return_type) { + InferCheckTypesAndCallsAndFieldsVisitor::assign_fun_full_type(fun_ref, fun_ref->declared_return_type); + return; + } + + // prevent recursion of untyped functions, like `fun f() { return g(); } fun g() { return f(); }` + bool contains = std::find(called_stack.begin(), called_stack.end(), fun_ref) != called_stack.end(); + if (contains) { + fun_ref->ast_root->error("could not infer return type of " + to_string(fun_ref) + ", because it appears in a recursive call chain; specify `: ` manually"); + } + + // dig into g's body; it's safe, since the compiler is single-threaded + // on finish, fun_ref->inferred_return_type is filled, and won't be called anymore + called_stack.push_back(fun_ref); + InferCheckTypesAndCallsAndFieldsVisitor visitor; + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + called_stack.pop_back(); +} + +void pipeline_infer_types_and_calls_and_fields() { + visit_ast_of_all_functions(); +} + +void pipeline_infer_types_and_calls_and_fields(const FunctionData* fun_ref) { + InferCheckTypesAndCallsAndFieldsVisitor visitor; + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); +} + +} // namespace tolk diff --git a/tolk/pipe-refine-lvalue-for-mutate.cpp b/tolk/pipe-refine-lvalue-for-mutate.cpp index c4c31b51..45dd3a94 100644 --- a/tolk/pipe-refine-lvalue-for-mutate.cpp +++ b/tolk/pipe-refine-lvalue-for-mutate.cpp @@ -34,8 +34,8 @@ namespace tolk { GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_invalid_mutate_arg_passed(AnyV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { - std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as()->name : "obj"); +static void fire_error_invalid_mutate_arg_passed(AnyExprV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { + std::string arg_str(arg_expr->type == ast_reference ? arg_expr->as()->get_name() : "obj"); // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && p_sym.idx == 0 && fun_ref->does_accept_self()) { @@ -59,7 +59,7 @@ static void fire_error_invalid_mutate_arg_passed(AnyV v, const FunctionData* fun class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` const FunctionData* fun_ref = v->fun_maybe; if (!fun_ref) { parent::visit(v); @@ -72,47 +72,55 @@ class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBod return; } - tolk_assert(static_cast(fun_ref->parameters.size()) == v->get_num_args()); + int delta_self = v->is_dot_call(); + tolk_assert(fun_ref->get_num_params() == delta_self + v->get_num_args()); + + if (v->is_dot_call()) { + if (fun_ref->does_mutate_self()) { + // for `b.storeInt()`, `b` should become lvalue, since `storeInt` is a method mutating self + // but: `beginCell().storeInt()`, then `beginCell()` is not lvalue + // (it will be extracted as tmp var when transforming AST to IR) + AnyExprV leftmost_obj = v->get_dot_obj(); + while (true) { + if (auto as_par = leftmost_obj->try_as()) { + leftmost_obj = as_par->get_expr(); + } else if (auto as_cast = leftmost_obj->try_as()) { + leftmost_obj = as_cast->get_expr(); + } else { + break; + } + } + bool will_be_extracted_as_tmp_var = leftmost_obj->type == ast_function_call; + if (!will_be_extracted_as_tmp_var) { + leftmost_obj->mutate()->assign_lvalue_true(); + v->get_dot_obj()->mutate()->assign_lvalue_true(); + } + } + + if (!fun_ref->does_accept_self() && fun_ref->parameters[0].is_mutate_parameter()) { + fire_error_invalid_mutate_arg_passed(v, fun_ref, fun_ref->parameters[0], true, false, v->get_dot_obj()); + } + } for (int i = 0; i < v->get_num_args(); ++i) { - const LocalVarData& p_sym = fun_ref->parameters[i]; + const LocalVarData& p_sym = fun_ref->parameters[delta_self + i]; auto arg_i = v->get_arg(i); if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) { fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr()); } parent::visit(arg_i); } + parent::visit(v->get_callee()); } - void visit(V v) override { - parent::visit(v); - - const FunctionData* fun_ref = v->fun_ref; - tolk_assert(static_cast(fun_ref->parameters.size()) == 1 + v->get_num_args()); - - if (fun_ref->does_mutate_self()) { - bool will_be_extracted_as_tmp_var = v->get_obj()->type == ast_function_call || v->get_obj()->type == ast_dot_method_call; - if (!will_be_extracted_as_tmp_var) { - v->get_obj()->mutate()->assign_lvalue_true(); - } - } - - if (!fun_ref->does_accept_self() && fun_ref->parameters[0].is_mutate_parameter()) { - fire_error_invalid_mutate_arg_passed(v, fun_ref, fun_ref->parameters[0], true, false, v->get_obj()); - } - - for (int i = 0; i < v->get_num_args(); ++i) { - const LocalVarData& p_sym = fun_ref->parameters[1 + i]; - auto arg_i = v->get_arg(i); - if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) { - fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr()); - } - } +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; -void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); +void pipeline_refine_lvalue_for_mutate_arguments() { + visit_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index 478bc727..2dae0d23 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -20,7 +20,9 @@ #include "ast.h" #include "compiler-state.h" #include "constant-evaluator.h" +#include "generics-helpers.h" #include "td/utils/crypto.h" +#include "type-system.h" #include /* @@ -59,67 +61,69 @@ static int calculate_method_id_by_func_name(std::string_view func_name) { return static_cast(crc & 0xffff) | 0x10000; } -static void calc_arg_ret_order_of_asm_function(V v_body, V param_list, TypeExpr* ret_type, - std::vector& arg_order, std::vector& ret_order) { - int cnt = param_list->size(); - int width = ret_type->get_width(); - if (width < 0 || width > 16) { - v_body->error("return type of an assembler built-in function must have a well-defined fixed width"); +static void validate_arg_ret_order_of_asm_function(V v_body, int n_params, TypePtr ret_type) { + if (!ret_type) { + v_body->error("asm function must declare return type (before asm instructions)"); } - if (cnt > 16) { - v_body->error("assembler built-in function must have at most 16 arguments"); - } - std::vector cum_arg_width; - cum_arg_width.push_back(0); - int tot_width = 0; - for (int i = 0; i < cnt; ++i) { - V v_param = param_list->get_param(i); - int arg_width = v_param->declared_type->get_width(); - if (arg_width < 0 || arg_width > 16) { - v_param->error("parameters of an assembler built-in function must have a well-defined fixed width"); - } - cum_arg_width.push_back(tot_width += arg_width); + if (n_params > 16) { + v_body->error("asm function can have at most 16 parameters"); } + + // asm(param1 ... paramN), param names were previously mapped into indices if (!v_body->arg_order.empty()) { - if (static_cast(v_body->arg_order.size()) != cnt) { + if (static_cast(v_body->arg_order.size()) != n_params) { v_body->error("arg_order of asm function must specify all parameters"); } - std::vector visited(cnt, false); - for (int i = 0; i < cnt; ++i) { - int j = v_body->arg_order[i]; + std::vector visited(v_body->arg_order.size(), false); + for (int j : v_body->arg_order) { if (visited[j]) { v_body->error("arg_order of asm function contains duplicates"); } visited[j] = true; - int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; - while (c1 < c2) { - arg_order.push_back(c1++); - } } - tolk_assert(arg_order.size() == (unsigned)tot_width); } + + // asm(-> 0 2 1 3), check for a shuffled range 0...N + // correctness of N (actual return width onto a stack) will be checked after type inferring and generics instantiation if (!v_body->ret_order.empty()) { - if (static_cast(v_body->ret_order.size()) != width) { - v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width)); - } - std::vector visited(width, false); - for (int i = 0; i < width; ++i) { - int j = v_body->ret_order[i]; - if (j < 0 || j >= width || visited[j]) { - v_body->error("ret_order contains invalid integer, not in range 0 .. width-1"); + std::vector visited(v_body->ret_order.size(), false); + for (int j : v_body->ret_order) { + if (j < 0 || j >= static_cast(v_body->ret_order.size()) || visited[j]) { + v_body->error("ret_order contains invalid integer, not in range 0 .. N"); } visited[j] = true; } - ret_order = v_body->ret_order; } } +static const GenericsDeclaration* construct_genericTs(V v_list) { + std::vector itemsT; + itemsT.reserve(v_list->size()); + + for (int i = 0; i < v_list->size(); ++i) { + auto v_item = v_list->get_item(i); + auto it_existing = std::find_if(itemsT.begin(), itemsT.end(), [v_item](const GenericsDeclaration::GenericsItem& prev) { + return prev.nameT == v_item->nameT; + }); + if (it_existing != itemsT.end()) { + v_item->error("duplicate generic parameter `" + static_cast(v_item->nameT) + "`"); + } + itemsT.emplace_back(v_item->nameT); + } + + return new GenericsDeclaration(std::move(itemsT)); +} + static void register_constant(V v) { ConstantValue init_value = eval_const_init_value(v->get_init_value()); - GlobalConstData* c_sym = new GlobalConstData(static_cast(v->get_identifier()->name), v->loc, std::move(init_value)); + GlobalConstData* c_sym = new GlobalConstData(static_cast(v->get_identifier()->name), v->loc, v->declared_type, std::move(init_value)); - if (v->declared_type && !v->declared_type->equals_to(c_sym->inferred_type)) { - v->error("expression type does not match declared type"); + if (v->declared_type) { + bool ok = (c_sym->is_int_const() && (v->declared_type == TypeDataInt::create())) + || (c_sym->is_slice_const() && (v->declared_type == TypeDataSlice::create())); + if (!ok) { + v->error("expression type does not match declared type"); + } } G.symtable.add_global_const(c_sym); @@ -137,124 +141,82 @@ static void register_global_var(V v) { static LocalVarData register_parameter(V v, int idx) { if (v->is_underscore()) { - return {"", v->loc, idx, v->declared_type}; + return {"", v->loc, v->declared_type, 0, idx}; } - LocalVarData p_sym(static_cast(v->param_name), v->loc, idx, v->declared_type); + int flags = 0; if (v->declared_as_mutate) { - p_sym.flags |= LocalVarData::flagMutateParameter; + flags |= LocalVarData::flagMutateParameter; } if (!v->declared_as_mutate && idx == 0 && v->param_name == "self") { - p_sym.flags |= LocalVarData::flagImmutable; + flags |= LocalVarData::flagImmutable; } - return p_sym; + return LocalVarData(static_cast(v->param_name), v->loc, v->declared_type, flags, idx); } static void register_function(V v) { std::string_view func_name = v->get_identifier()->name; - // calculate TypeExpr of a function: it's a map (params -> ret), probably surrounded by forall - TypeExpr* params_tensor_type = nullptr; + // calculate TypeData of a function + std::vector arg_types; + std::vector parameters; int n_params = v->get_num_params(); int n_mutate_params = 0; - std::vector parameters; - if (n_params) { - std::vector param_tensor_items; - param_tensor_items.reserve(n_params); - parameters.reserve(n_params); - for (int i = 0; i < n_params; ++i) { - auto v_param = v->get_param(i); - n_mutate_params += static_cast(v_param->declared_as_mutate); - param_tensor_items.emplace_back(v_param->declared_type); - parameters.emplace_back(register_parameter(v_param, i)); - } - params_tensor_type = TypeExpr::new_tensor(std::move(param_tensor_items)); - } else { - params_tensor_type = TypeExpr::new_unit(); + arg_types.reserve(n_params); + parameters.reserve(n_params); + for (int i = 0; i < n_params; ++i) { + auto v_param = v->get_param(i); + arg_types.emplace_back(v_param->declared_type); + parameters.emplace_back(register_parameter(v_param, i)); + n_mutate_params += static_cast(v_param->declared_as_mutate); } - TypeExpr* function_type = TypeExpr::new_map(params_tensor_type, v->ret_type); + const GenericsDeclaration* genericTs = nullptr; if (v->genericsT_list) { - std::vector type_vars; - type_vars.reserve(v->genericsT_list->size()); - for (int idx = 0; idx < v->genericsT_list->size(); ++idx) { - type_vars.emplace_back(v->genericsT_list->get_item(idx)->created_type); - } - function_type = TypeExpr::new_forall(std::move(type_vars), function_type); + genericTs = construct_genericTs(v->genericsT_list); } - if (v->marked_as_builtin) { + if (v->is_builtin_function()) { const Symbol* builtin_func = lookup_global_symbol(func_name); - const FunctionData* func_val = builtin_func ? builtin_func->as() : nullptr; - if (!func_val || !func_val->is_builtin_function()) { + const FunctionData* fun_ref = builtin_func ? builtin_func->as() : nullptr; + if (!fun_ref || !fun_ref->is_builtin_function()) { v->error("`builtin` used for non-builtin function"); } -#ifdef TOLK_DEBUG - // in release, we don't need this check, since `builtin` is used only in stdlib, which is our responsibility - if (!func_val->full_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { - v->error("declaration for `builtin` function doesn't match an actual one"); - } -#endif + v->mutate()->assign_fun_ref(fun_ref); return; } - if (G.is_verbosity(1)) { - std::cerr << "fun " << func_name << " : " << function_type << std::endl; - } - if (v->marked_as_pure && v->ret_type->get_width() == 0) { - v->error("a pure function should return something, otherwise it will be optimized out anyway"); + if (G.is_verbosity(1) && v->is_code_function()) { + std::cerr << "fun " << func_name << " : " << v->declared_return_type << std::endl; } FunctionBody f_body = v->get_body()->type == ast_sequence ? static_cast(new FunctionBodyCode) : static_cast(new FunctionBodyAsm); - FunctionData* f_sym = new FunctionData(static_cast(func_name), v->loc, function_type, std::move(parameters), 0, f_body); + FunctionData* f_sym = new FunctionData(static_cast(func_name), v->loc, v->declared_return_type, std::move(parameters), 0, genericTs, nullptr, f_body, v); if (const auto* v_asm = v->get_body()->try_as()) { - calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, f_sym->arg_order, f_sym->ret_order); + validate_arg_ret_order_of_asm_function(v_asm, v->get_num_params(), v->declared_return_type); + f_sym->arg_order = v_asm->arg_order; + f_sym->ret_order = v_asm->ret_order; } - if (v->method_id) { - if (v->method_id->intval.is_null() || !v->method_id->intval->signed_fits_bits(32)) { - v->method_id->error("invalid integer constant"); - } - f_sym->method_id = static_cast(v->method_id->intval->to_long()); - } else if (v->marked_as_get_method) { + if (v->method_id.not_null()) { + f_sym->method_id = static_cast(v->method_id->to_long()); + } else if (v->flags & FunctionData::flagGetMethod) { f_sym->method_id = calculate_method_id_by_func_name(func_name); for (const FunctionData* other : G.all_get_methods) { if (other->method_id == f_sym->method_id) { v->error(PSTRING() << "GET methods hash collision: `" << other->name << "` and `" << f_sym->name << "` produce the same hash. Consider renaming one of these functions."); } } - } else if (v->is_entrypoint) { + } else if (v->flags & FunctionData::flagIsEntrypoint) { f_sym->method_id = calculate_method_id_for_entrypoint(func_name); } - if (v->marked_as_pure) { - f_sym->flags |= FunctionData::flagMarkedAsPure; - } - if (v->marked_as_inline) { - f_sym->flags |= FunctionData::flagInline; - } - if (v->marked_as_inline_ref) { - f_sym->flags |= FunctionData::flagInlineRef; - } - if (v->marked_as_get_method) { - f_sym->flags |= FunctionData::flagGetMethod; - } - if (v->is_entrypoint) { - f_sym->flags |= FunctionData::flagIsEntrypoint; - } + f_sym->flags |= v->flags; if (n_mutate_params) { f_sym->flags |= FunctionData::flagHasMutateParams; } - if (v->accepts_self) { - f_sym->flags |= FunctionData::flagAcceptsSelf; - } - if (v->returns_self) { - f_sym->flags |= FunctionData::flagReturnsSelf; - } G.symtable.add_function(f_sym); - if (f_sym->is_regular_function()) { - G.all_code_functions.push_back(f_sym); - } + G.all_functions.push_back(f_sym); if (f_sym->is_get_method()) { G.all_get_methods.push_back(f_sym); } @@ -270,10 +232,10 @@ static void iterate_through_file_symbols(const SrcFile* file) { for (AnyV v : file->ast->as()->get_toplevel_declarations()) { switch (v->type) { - case ast_import_statement: + case ast_import_directive: // on `import "another-file.tolk"`, register symbols from that file at first // (for instance, it can calculate constants, which are used in init_val of constants in current file below import) - iterate_through_file_symbols(v->as()->file); + iterate_through_file_symbols(v->as()->file); break; case ast_constant_declaration: @@ -291,8 +253,8 @@ static void iterate_through_file_symbols(const SrcFile* file) { } } -void pipeline_register_global_symbols(const AllSrcFiles& all_src_files) { - for (const SrcFile* file : all_src_files) { +void pipeline_register_global_symbols() { + for (const SrcFile* file : G.all_src_files) { iterate_through_file_symbols(file); } } diff --git a/tolk/pipe-resolve-identifiers.cpp b/tolk/pipe-resolve-identifiers.cpp new file mode 100644 index 00000000..299f33be --- /dev/null +++ b/tolk/pipe-resolve-identifiers.cpp @@ -0,0 +1,350 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "platform-utils.h" +#include "compiler-state.h" +#include "src-file.h" +#include "generics-helpers.h" +#include "ast.h" +#include "ast-visitor.h" +#include "type-system.h" +#include + +/* + * This pipe resolves identifiers (local variables and types) in all functions bodies. + * It happens before type inferring, but after all global symbols are registered. + * It means, that for any symbol `x` we can look up whether it's a global name or not. + * + * About resolving variables. + * Example: `var x = 10; x = 20;` both `x` point to one LocalVarData. + * Example: `x = 20` undefined symbol `x` is also here (unless it's a global) + * Variables scoping and redeclaration are also here. + * Note, that `x` is stored as `ast_reference (ast_identifier "x")`. More formally, "references" are resolved. + * "Reference" in AST, besides the identifier, stores optional generics instantiation. `x` is grammar-valid. + * + * About resolving types. At the moment of parsing, `int`, `cell` and other predefined are parsed as TypeDataInt, etc. + * All the others are stored as TypeDataUnresolved, to be resolved here, after global symtable is filled. + * Example: `var x: T = 0` unresolved "T" is replaced by TypeDataGenericT inside `f`. + * Example: `f()` unresolved "MyAlias" is replaced by TypeDataAlias inside the reference. + * Example: `fun f(): KKK` unresolved "KKK" fires an error "unknown type name". + * When structures and type aliases are implemented, their resolving will also be done here. + * See finalize_type_data(). + * + * Note, that functions/methods binding is NOT here. + * In other words, for ast_function_call `beginCell()` and `t.tupleAt(0)`, their fun_ref is NOT filled here. + * Functions/methods binding is done later, simultaneously with type inferring and generics instantiation. + * For instance, to call a generic function `t.tuplePush(1)`, we need types of `t` and `1` to be inferred, + * as well as `tuplePush` to be instantiated, and fun_ref to point at that exact instantiations. + * + * As a result of this step, + * * every V::sym is filled, pointing either to a local var/parameter, or to a global symbol + * (exceptional for function calls and methods, their references are bound later) + * * all TypeData in all symbols is ready for analyzing, TypeDataUnresolved won't occur later in pipeline + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_undefined_symbol(V v) { + if (v->name == "self") { + v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + } else { + v->error("undefined symbol `" + static_cast(v->name) + "`"); + } +} + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_unknown_type_name(SrcLocation loc, const std::string &text) { + throw ParseError(loc, "unknown type name `" + text + "`"); +} + +static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { + SrcLocation sym_loc = used_sym->loc; + if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { + const SrcFile* declared_in = sym_loc.get_src_file(); + bool has_import = false; + for (const SrcFile::ImportDirective& import : v_usage->loc.get_src_file()->imports) { + if (import.imported_file == declared_in) { + has_import = true; + } + } + if (!has_import) { + v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + } + } +} + +struct NameAndScopeResolver { + std::vector> scopes; + + static uint64_t key_hash(std::string_view name_key) { + return std::hash{}(name_key); + } + + void open_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "open_scope " << scopes.size() + 1 << " at " << loc << std::endl; + scopes.emplace_back(); + } + + void close_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "close_scope " << scopes.size() << " at " << loc << std::endl; + if (UNLIKELY(scopes.empty())) { + throw Fatal{"cannot close the outer scope"}; + } + scopes.pop_back(); + } + + const Symbol* lookup_symbol(std::string_view name) const { + uint64_t key = key_hash(name); + for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) { // NOLINT(*-loop-convert) + const auto& scope = *it; + if (auto it_sym = scope.find(key); it_sym != scope.end()) { + return it_sym->second; + } + } + return G.symtable.lookup(name); + } + + void add_local_var(const LocalVarData* v_sym) { + if (UNLIKELY(scopes.empty())) { + throw Fatal("unexpected scope_level = 0"); + } + if (v_sym->name.empty()) { // underscore + return; + } + + uint64_t key = key_hash(v_sym->name); + const auto& [_, inserted] = scopes.rbegin()->emplace(key, v_sym); + if (UNLIKELY(!inserted)) { + throw ParseError(v_sym->loc, "redeclaration of local variable `" + v_sym->name + "`"); + } + } +}; + +struct TypeDataResolver { + GNU_ATTRIBUTE_NOINLINE + static TypePtr resolve_identifiers_in_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { + return type_data->replace_children_custom([genericTs](TypePtr child) { + if (const TypeDataUnresolved* un = child->try_as()) { + if (genericTs && genericTs->has_nameT(un->text)) { + std::string nameT = un->text; + return TypeDataGenericT::create(std::move(nameT)); + } + if (un->text == "auto") { + throw ParseError(un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed"); + } + if (un->text == "self") { + throw ParseError(un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); + } + if (un->text == "bool") { + throw ParseError(un->loc, "bool type is not supported yet"); + } + fire_error_unknown_type_name(un->loc, un->text); + } + return child; + }); + } +}; + +static TypePtr finalize_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { + if (!type_data || !type_data->has_unresolved_inside()) { + return type_data; + } + return TypeDataResolver::resolve_identifiers_in_type_data(type_data, genericTs); +} + + +class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { + // more correctly this field shouldn't be static, but currently there is no need to make it a part of state + static NameAndScopeResolver current_scope; + static const FunctionData* current_function; + + static const LocalVarData* create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) { + LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, declared_type, immutable * LocalVarData::flagImmutable, -1); + current_scope.add_local_var(v_sym); + return v_sym; + } + + static void process_catch_variable(AnyExprV catch_var) { + if (auto v_ref = catch_var->try_as()) { + const LocalVarData* var_ref = create_local_var_sym(v_ref->get_name(), catch_var->loc, nullptr, true); + v_ref->mutate()->assign_sym(var_ref); + } + } + +protected: + void visit(V v) override { + if (v->marked_as_redef) { + const Symbol* sym = current_scope.lookup_symbol(v->get_name()); + if (sym == nullptr) { + v->error("`redef` for unknown variable"); + } + const LocalVarData* var_ref = sym->try_as(); + if (!var_ref) { + v->error("`redef` for unknown variable"); + } + v->mutate()->assign_var_ref(var_ref); + } else { + TypePtr declared_type = finalize_type_data(v->declared_type, current_function->genericTs); + const LocalVarData* var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable); + v->mutate()->assign_resolved_type(declared_type); + v->mutate()->assign_var_ref(var_ref); + } + } + + void visit(V v) override { + parent::visit(v->get_rhs()); // in this order, so that `var x = x` is invalid, "x" on the right unknown + parent::visit(v->get_lhs()); + } + + void visit(V v) override { + const Symbol* sym = current_scope.lookup_symbol(v->get_name()); + if (!sym) { + fire_error_undefined_symbol(v->get_identifier()); + } + v->mutate()->assign_sym(sym); + + // for global functions, global vars and constants, `import` must exist + if (!sym->try_as()) { + check_import_exists_when_using_sym(v, sym); + } + + // for `f` / `f`, resolve "MyAlias" and "T" + // (for function call `f()`, this v (ast_reference `f`) is callee) + if (auto v_instantiationTs = v->get_instantiationTs()) { + for (int i = 0; i < v_instantiationTs->size(); ++i) { + TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); + } + } + } + + void visit(V v) override { + // for `t.tupleAt` / `obj.method`, resolve "MyAlias" and "T" + // (for function call `t.tupleAt()`, this v (ast_dot_access `t.tupleAt`) is callee) + if (auto v_instantiationTs = v->get_instantiationTs()) { + for (int i = 0; i < v_instantiationTs->size(); ++i) { + TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); + } + } + parent::visit(v->get_obj()); + } + + void visit(V v) override { + TypePtr cast_to_type = finalize_type_data(v->cast_to_type, current_function->genericTs); + v->mutate()->assign_resolved_type(cast_to_type); + parent::visit(v->get_expr()); + } + + void visit(V v) override { + if (v->empty()) { + return; + } + current_scope.open_scope(v->loc); + parent::visit(v); + current_scope.close_scope(v->loc_end); + } + + void visit(V v) override { + current_scope.open_scope(v->loc); + parent::visit(v->get_body()); + parent::visit(v->get_cond()); // in 'while' condition it's ok to use variables declared inside do + current_scope.close_scope(v->get_body()->loc_end); + } + + void visit(V v) override { + visit(v->get_try_body()); + current_scope.open_scope(v->get_catch_body()->loc); + const std::vector& catch_items = v->get_catch_expr()->get_items(); + tolk_assert(catch_items.size() == 2); + process_catch_variable(catch_items[1]); + process_catch_variable(catch_items[0]); + parent::visit(v->get_catch_body()); + current_scope.close_scope(v->get_catch_body()->loc_end); + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + // this pipe is done just after parsing + // visit both asm and code functions, resolve identifiers in parameter/return types everywhere + // for generic functions, unresolved "T" will be replaced by TypeDataGenericT + return true; + } + + void start_visiting_function(const FunctionData* fun_ref, V v) override { + current_function = fun_ref; + + for (int i = 0; i < v->get_num_params(); ++i) { + const LocalVarData& param_var = fun_ref->parameters[i]; + TypePtr declared_type = finalize_type_data(param_var.declared_type, fun_ref->genericTs); + v->get_param(i)->mutate()->assign_param_ref(¶m_var); + v->get_param(i)->mutate()->assign_resolved_type(declared_type); + param_var.mutate()->assign_resolved_type(declared_type); + } + TypePtr return_type = finalize_type_data(fun_ref->declared_return_type, fun_ref->genericTs); + v->mutate()->assign_resolved_type(return_type); + fun_ref->mutate()->assign_resolved_type(return_type); + + if (fun_ref->is_code_function()) { + auto v_seq = v->get_body()->as(); + current_scope.open_scope(v->loc); + for (int i = 0; i < v->get_num_params(); ++i) { + current_scope.add_local_var(&fun_ref->parameters[i]); + } + parent::visit(v_seq); + current_scope.close_scope(v_seq->loc_end); + tolk_assert(current_scope.scopes.empty()); + } + + current_function = nullptr; + } +}; + +NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; +const FunctionData* AssignSymInsideFunctionVisitor::current_function = nullptr; + +void pipeline_resolve_identifiers_and_assign_symbols() { + AssignSymInsideFunctionVisitor visitor; + for (const SrcFile* file : G.all_src_files) { + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + tolk_assert(v_func->fun_ref); + visitor.start_visiting_function(v_func->fun_ref, v_func); + + } else if (auto v_global = v->try_as()) { + TypePtr declared_type = finalize_type_data(v_global->var_ref->declared_type, nullptr); + v_global->mutate()->assign_resolved_type(declared_type); + v_global->var_ref->mutate()->assign_resolved_type(declared_type); + + } else if (auto v_const = v->try_as(); v_const && v_const->declared_type) { + TypePtr declared_type = finalize_type_data(v_const->const_ref->declared_type, nullptr); + v_const->mutate()->assign_resolved_type(declared_type); + v_const->const_ref->mutate()->assign_resolved_type(declared_type); + } + } + } +} + +void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData* fun_ref) { + AssignSymInsideFunctionVisitor visitor; + if (visitor.should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + } +} + +} // namespace tolk diff --git a/tolk/pipe-resolve-symbols.cpp b/tolk/pipe-resolve-symbols.cpp deleted file mode 100644 index 31d25229..00000000 --- a/tolk/pipe-resolve-symbols.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" -#include "platform-utils.h" -#include "src-file.h" -#include "ast.h" -#include "ast-visitor.h" -#include "compiler-state.h" -#include - -/* - * This pipe resolves identifiers (local variables) in all functions bodies. - * It happens before type inferring, but after all global symbols are registered. - * It means, that for any symbol `x` we can look up whether it's a global name or not. - * - * Example: `var x = 10; x = 20;` both `x` point to one LocalVarData. - * Example: `x = 20` undefined symbol `x` is also here (unless it's a global) - * Variables scoping and redeclaration are also here. - * - * As a result of this step, every V::sym is filled, pointing either to a local var/parameter, - * or to a global var / constant / function. - */ - -namespace tolk { - -static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { - SrcLocation sym_loc = used_sym->loc; - if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { - const SrcFile* declared_in = sym_loc.get_src_file(); - bool has_import = false; - for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { - if (import_stmt.imported_file == declared_in) { - has_import = true; - } - } - if (!has_import) { - v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); - } - } -} - -struct NameAndScopeResolver { - std::vector> scopes; - - static uint64_t key_hash(std::string_view name_key) { - return std::hash{}(name_key); - } - - void open_scope([[maybe_unused]] SrcLocation loc) { - // std::cerr << "open_scope " << scopes.size() + 1 << " at " << loc << std::endl; - scopes.emplace_back(); - } - - void close_scope([[maybe_unused]] SrcLocation loc) { - // std::cerr << "close_scope " << scopes.size() << " at " << loc << std::endl; - if (UNLIKELY(scopes.empty())) { - throw Fatal{"cannot close the outer scope"}; - } - scopes.pop_back(); - } - - const Symbol* lookup_symbol(std::string_view name) const { - uint64_t key = key_hash(name); - for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) { // NOLINT(*-loop-convert) - const auto& scope = *it; - if (auto it_sym = scope.find(key); it_sym != scope.end()) { - return it_sym->second; - } - } - return G.symtable.lookup(name); - } - - const Symbol* add_local_var(const LocalVarData* v_sym) { - if (UNLIKELY(scopes.empty())) { - throw Fatal("unexpected scope_level = 0"); - } - if (v_sym->name.empty()) { // underscore - return v_sym; - } - - uint64_t key = key_hash(v_sym->name); - const auto& [_, inserted] = scopes.rbegin()->emplace(key, v_sym); - if (UNLIKELY(!inserted)) { - throw ParseError(v_sym->loc, "redeclaration of local variable `" + v_sym->name + "`"); - } - return v_sym; - } -}; - - -class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { - // more correctly this field shouldn't be static, but currently there is no need to make it a part of state - static NameAndScopeResolver current_scope; - - static const Symbol* create_local_var_sym(std::string_view name, SrcLocation loc, TypeExpr* var_type, bool immutable) { - LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, -1, var_type); - if (immutable) { - v_sym->flags |= LocalVarData::flagImmutable; - } - return current_scope.add_local_var(v_sym); - } - - static void process_catch_variable(AnyV catch_var) { - if (auto v_ident = catch_var->try_as()) { - const Symbol* sym = create_local_var_sym(v_ident->name, catch_var->loc, TypeExpr::new_hole(), true); - v_ident->mutate()->assign_sym(sym); - } - } - - static void process_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { - int delta_self = lhs_of_dot_call ? 1 : 0; - int n_arguments = static_cast(v->get_arguments().size()) + delta_self; - int n_parameters = static_cast(fun_ref->parameters.size()); - - // Tolk doesn't have optional parameters currently, so just compare counts - if (n_parameters < n_arguments) { - v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - if (n_arguments < n_parameters) { - v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - } - - void visit(V v) override { - if (v->marked_as_redef) { - auto v_ident = v->get_identifier()->as(); - const Symbol* sym = current_scope.lookup_symbol(v_ident->name); - if (sym == nullptr) { - v->error("`redef` for unknown variable"); - } - if (!sym->try_as() && !sym->try_as()) { - v->error("`redef` for unknown variable"); - } - v->mutate()->assign_var_ref(sym); - v_ident->mutate()->assign_sym(sym); - } else if (auto v_ident = v->get_identifier()->try_as()) { - TypeExpr* var_type = v->declared_type ? v->declared_type : TypeExpr::new_hole(); - const Symbol* sym = create_local_var_sym(v_ident->name, v->loc, var_type, v->is_immutable); - v->mutate()->assign_var_ref(sym); - v_ident->mutate()->assign_sym(sym); - } else { - // underscore, do nothing, v->sym remains nullptr - } - } - - void visit(V v) override { - parent::visit(v->get_assigned_val()); - parent::visit(v->get_lhs()); - } - - void visit(V v) override { - const Symbol* sym = current_scope.lookup_symbol(v->name); - if (!sym) { - v->error("undefined symbol `" + static_cast(v->name) + "`"); - } - v->mutate()->assign_sym(sym); - - // for global functions, global vars and constants, `import` must exist - if (!sym->try_as()) { - check_import_exists_when_using_sym(v, sym); - } - } - - void visit(V v) override { - parent::visit(v->get_called_f()); - parent::visit(v->get_arg_list()); - - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` - // for such corner cases, sym remains nullptr - if (auto v_ident = v->get_called_f()->try_as()) { - if (const auto* fun_ref = v_ident->sym->try_as()) { - v->mutate()->assign_fun_ref(fun_ref); - process_function_arguments(fun_ref, v->get_arg_list(), nullptr); - } - } - // for `some_var(args)`, if it's called with wrong arguments count, the error is not here - // it will be fired later, it's a type checking error - } - - void visit(V v) override { - const Symbol* sym = lookup_global_symbol(v->method_name); - if (!sym) { - v->error("undefined symbol `" + static_cast(v->method_name) + "`"); - } - const auto* fun_ref = sym->try_as(); - if (!fun_ref) { - v->error("`" + static_cast(v->method_name) + "` is not a method"); - } - - if (fun_ref->parameters.empty()) { - v->error("`" + static_cast(v->method_name) + "` has no parameters and can not be called as method"); - } - - v->mutate()->assign_fun_ref(fun_ref); - parent::visit(v); - process_function_arguments(fun_ref, v->get_arg_list(), v->get_obj()); - } - - void visit(V v) override { - const Symbol* sym = current_scope.lookup_symbol("self"); - if (!sym) { - v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); - } - v->mutate()->assign_param_ref(sym->as()); - } - - void visit(V v) override { - if (v->empty()) { - return; - } - current_scope.open_scope(v->loc); - parent::visit(v); - current_scope.close_scope(v->loc_end); - } - - void visit(V v) override { - current_scope.open_scope(v->loc); - parent::visit(v->get_body()); - parent::visit(v->get_cond()); // in 'while' condition it's ok to use variables declared inside do - current_scope.close_scope(v->get_body()->loc_end); - } - - void visit(V v) override { - visit(v->get_try_body()); - current_scope.open_scope(v->get_catch_body()->loc); - const std::vector& catch_items = v->get_catch_expr()->get_items(); - tolk_assert(catch_items.size() == 2); - process_catch_variable(catch_items[1]); - process_catch_variable(catch_items[0]); - parent::visit(v->get_catch_body()); - current_scope.close_scope(v->get_catch_body()->loc_end); - } - -public: - void start_visiting_function(V v_function) override { - auto v_seq = v_function->get_body()->try_as(); - tolk_assert(v_seq != nullptr); - - current_scope.open_scope(v_function->loc); - - for (int i = 0; i < v_function->get_num_params(); ++i) { - current_scope.add_local_var(&v_function->fun_ref->parameters[i]); - v_function->get_param(i)->mutate()->assign_param_ref(&v_function->fun_ref->parameters[i]); - } - parent::visit(v_seq); - - current_scope.close_scope(v_seq->loc_end); - tolk_assert(current_scope.scopes.empty()); - } -}; - -NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; - -void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles& all_src_files) { - visit_ast_of_all_functions(all_src_files); -} - -} // namespace tolk diff --git a/tolk/pipeline.h b/tolk/pipeline.h index 5c0fe656..afea66d6 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -25,25 +25,33 @@ */ #pragma once -#include "src-file.h" +#include "fwd-declarations.h" #include namespace tolk { -AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); +void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); -void pipeline_register_global_symbols(const AllSrcFiles&); -void pipeline_resolve_identifiers_and_assign_symbols(const AllSrcFiles&); -void pipeline_calculate_rvalue_lvalue(const AllSrcFiles&); -void pipeline_detect_unreachable_statements(const AllSrcFiles&); -void pipeline_infer_and_check_types(const AllSrcFiles&); -void pipeline_refine_lvalue_for_mutate_arguments(const AllSrcFiles&); -void pipeline_check_rvalue_lvalue(const AllSrcFiles&); -void pipeline_check_pure_impure_operations(const AllSrcFiles&); -void pipeline_constant_folding(const AllSrcFiles&); -void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&); +void pipeline_register_global_symbols(); +void pipeline_resolve_identifiers_and_assign_symbols(); +void pipeline_calculate_rvalue_lvalue(); +void pipeline_detect_unreachable_statements(); +void pipeline_infer_types_and_calls_and_fields(); +void pipeline_refine_lvalue_for_mutate_arguments(); +void pipeline_check_rvalue_lvalue(); +void pipeline_check_pure_impure_operations(); +void pipeline_constant_folding(); +void pipeline_convert_ast_to_legacy_Expr_Op(); void pipeline_find_unused_symbols(); -void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles&); +void pipeline_generate_fif_output_to_std_cout(); + +// these pipes also can be called per-function individually +// they are called for instantiated generics functions, when `f` is deeply cloned as `f` +void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData*); +void pipeline_calculate_rvalue_lvalue(const FunctionData*); +void pipeline_detect_unreachable_statements(const FunctionData*); +void pipeline_infer_types_and_calls_and_fields(const FunctionData*); + } // namespace tolk diff --git a/tolk/platform-utils.h b/tolk/platform-utils.h index 7b16226e..5ab01220 100644 --- a/tolk/platform-utils.h +++ b/tolk/platform-utils.h @@ -27,11 +27,15 @@ #if __GNUC__ #define GNU_ATTRIBUTE_COLD [[gnu::cold]] +#define GNU_ATTRIBUTE_FLATTEN [[gnu::flatten]] #define GNU_ATTRIBUTE_NORETURN [[gnu::noreturn]] +#define GNU_ATTRIBUTE_NOINLINE [[gnu::noinline]] #define GNU_ATTRIBUTE_ALWAYS_INLINE [[gnu::always_inline]] #else #define GNU_ATTRIBUTE_COLD +#define GNU_ATTRIBUTE_FLATTEN #define GNU_ATTRIBUTE_NORETURN [[noreturn]] +#define GNU_ATTRIBUTE_NOINLINE [[noinline]] #define GNU_ATTRIBUTE_ALWAYS_INLINE #endif diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index e5533f69..52ac3821 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -23,8 +23,8 @@ namespace tolk { static_assert(sizeof(SrcLocation) == 8); -SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { - for (SrcFile* file : all_src_files) { +const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { + for (const SrcFile* file : all_src_files) { if (file->file_id == file_id) { return file; } @@ -32,8 +32,8 @@ SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { return nullptr; } -SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { - for (SrcFile* file : all_src_files) { +const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { + for (const SrcFile* file : all_src_files) { if (file->abs_filename == abs_filename) { return file; } @@ -41,7 +41,7 @@ SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const return nullptr; } -SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) { +const SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) { td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); if (path.is_error()) { if (included_from.is_defined()) { @@ -51,7 +51,7 @@ SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::strin } std::string abs_filename = path.move_as_ok(); - if (SrcFile* file = find_file(abs_filename)) { + if (const SrcFile* file = find_file(abs_filename)) { return file; } @@ -75,16 +75,7 @@ SrcFile* AllRegisteredSrcFiles::get_next_unparsed_file() { if (last_parsed_file_id >= last_registered_file_id) { return nullptr; } - return all_src_files[++last_parsed_file_id]; -} - -AllSrcFiles AllRegisteredSrcFiles::get_all_files() const { - AllSrcFiles src_files_immutable; - src_files_immutable.reserve(all_src_files.size()); - for (const SrcFile* file : all_src_files) { - src_files_immutable.push_back(file); - } - return src_files_immutable; + return const_cast(all_src_files[++last_parsed_file_id]); } bool SrcFile::is_stdlib_file() const { diff --git a/tolk/src-file.h b/tolk/src-file.h index 9fbbfbb4..0c82bf18 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -30,7 +30,7 @@ struct SrcFile { std::string_view line_str; }; - struct ImportStatement { + struct ImportDirective { const SrcFile* imported_file; }; @@ -39,7 +39,7 @@ struct SrcFile { std::string abs_filename; // absolute from root std::string text; // file contents loaded into memory, every Token::str_val points inside it AnyV ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here - std::vector imports; // to check strictness (can't use a symbol without importing its file) + std::vector imports; // to check strictness (can't use a symbol without importing its file) SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text) : file_id(file_id) @@ -95,21 +95,20 @@ public: std::ostream& operator<<(std::ostream& os, SrcLocation loc); -using AllSrcFiles = std::vector; - class AllRegisteredSrcFiles { - std::vector all_src_files; + std::vector all_src_files; int last_registered_file_id = -1; int last_parsed_file_id = -1; public: - SrcFile *find_file(int file_id) const; - SrcFile* find_file(const std::string& abs_filename) const; + const SrcFile* find_file(int file_id) const; + const SrcFile* find_file(const std::string& abs_filename) const; - SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from); + const SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from); SrcFile* get_next_unparsed_file(); - AllSrcFiles get_all_files() const; + auto begin() const { return all_src_files.begin(); } + auto end() const { return all_src_files.end(); } }; struct Fatal final : std::exception { diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index dc715031..918fdab3 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -17,28 +17,48 @@ #include "symtable.h" #include "compiler-state.h" #include "platform-utils.h" -#include -#include +#include "generics-helpers.h" namespace tolk { +std::string FunctionData::as_human_readable() const { + if (!genericTs) { + return name; // if it's generic instantiation like `f`, its name is "f", not "f" + } + return name + genericTs->as_human_readable(); +} + bool FunctionData::does_need_codegen() const { // when a function is declared, but not referenced from code in any way, don't generate its body if (!is_really_used() && G.settings.remove_unused_functions) { return false; } + // functions with asm body don't need code generation + // (even if used as non-call: `var a = beginCell;` inserts TVM continuation inline) + if (is_asm_function() || is_builtin_function()) { + return false; + } // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist if (is_used_as_noncall()) { return true; } + // generic functions also don't need code generation, only generic instantiations do + if (is_generic_function()) { + return false; + } // currently, there is no inlining, all functions are codegenerated // (but actually, unused ones are later removed by Fift) // in the future, we may want to implement a true AST inlining for "simple" functions return true; } -void FunctionData::assign_is_really_used() { - this->flags |= flagReallyUsed; +void FunctionData::assign_resolved_type(TypePtr declared_return_type) { + this->declared_return_type = declared_return_type; +} + +void FunctionData::assign_inferred_type(TypePtr inferred_return_type, TypePtr inferred_full_type) { + this->inferred_return_type = inferred_return_type; + this->inferred_full_type = inferred_full_type; } void FunctionData::assign_is_used_as_noncall() { @@ -49,14 +69,45 @@ void FunctionData::assign_is_implicit_return() { this->flags |= flagImplicitReturn; } +void FunctionData::assign_is_type_inferring_done() { + this->flags |= flagTypeInferringDone; +} + +void FunctionData::assign_is_really_used() { + this->flags |= flagReallyUsed; +} + +void FunctionData::assign_arg_order(std::vector&& arg_order) { + this->arg_order = std::move(arg_order); +} + +void GlobalVarData::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + void GlobalVarData::assign_is_really_used() { this->flags |= flagReallyUsed; } +void GlobalConstData::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + void LocalVarData::assign_idx(int idx) { this->idx = idx; } +void LocalVarData::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void LocalVarData::assign_inferred_type(TypePtr inferred_type) { +#ifdef TOLK_DEBUG + assert(this->declared_type == nullptr); // called when type declaration omitted, inferred from assigned value +#endif + this->declared_type = inferred_type; +} + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD static void fire_error_redefinition_of_symbol(SrcLocation loc, const Symbol* previous) { SrcLocation prev_loc = previous->loc; diff --git a/tolk/symtable.h b/tolk/symtable.h index 53a5e52e..3cda24ed 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -17,7 +17,7 @@ #pragma once #include "src-file.h" -#include "type-expr.h" +#include "fwd-declarations.h" #include "constant-evaluator.h" #include "crypto/common/refint.h" #include @@ -57,27 +57,31 @@ struct LocalVarData final : Symbol { flagImmutable = 2, // variable was declared via `val` (not `var`) }; - TypeExpr* declared_type; - int flags = 0; + TypePtr declared_type; // either at declaration `var x:int`, or if omitted, from assigned value `var x=2` + int flags; int idx; - LocalVarData(std::string name, SrcLocation loc, int idx, TypeExpr* declared_type) + LocalVarData(std::string name, SrcLocation loc, TypePtr declared_type, int flags, int idx) : Symbol(std::move(name), loc) , declared_type(declared_type) + , flags(flags) , idx(idx) { } - bool is_underscore() const { return name.empty(); } bool is_immutable() const { return flags & flagImmutable; } bool is_mutate_parameter() const { return flags & flagMutateParameter; } LocalVarData* mutate() const { return const_cast(this); } void assign_idx(int idx); + void assign_resolved_type(TypePtr declared_type); + void assign_inferred_type(TypePtr inferred_type); }; struct FunctionBodyCode; struct FunctionBodyAsm; struct FunctionBodyBuiltin; +struct GenericsDeclaration; +struct GenericsInstantiation; typedef std::variant< FunctionBodyCode*, @@ -91,7 +95,7 @@ struct FunctionData final : Symbol { enum { flagInline = 1, // marked `@inline` flagInlineRef = 2, // marked `@inline_ref` - flagReallyUsed = 4, // calculated via dfs from used functions; declared but unused functions are not codegenerated + flagTypeInferringDone = 4, // type inferring step of function's body (all AST nodes assigning v->inferred_type) is done flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out flagImplicitReturn = 32, // control flow reaches end of function, so it needs implicit return at the end @@ -100,25 +104,36 @@ struct FunctionData final : Symbol { flagHasMutateParams = 256, // has parameters declared as `mutate` flagAcceptsSelf = 512, // is a member function (has `self` first parameter) flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable + flagReallyUsed = 2048, // calculated via dfs from used functions; declared but unused functions are not codegenerated }; int method_id = EMPTY_METHOD_ID; int flags; - TypeExpr* full_type; // currently, TypeExpr::_Map, probably wrapped with forall std::vector parameters; std::vector arg_order, ret_order; + TypePtr declared_return_type; // may be nullptr, meaning "auto infer" + TypePtr inferred_return_type = nullptr; // assigned on type inferring + TypePtr inferred_full_type = nullptr; // assigned on type inferring, it's TypeDataFunCallable(params -> return) + const GenericsDeclaration* genericTs; + const GenericsInstantiation* instantiationTs; FunctionBody body; + AnyV ast_root; // V for user-defined (not builtin) - FunctionData(std::string name, SrcLocation loc, TypeExpr* full_type, std::vector parameters, int initial_flags, FunctionBody body) + FunctionData(std::string name, SrcLocation loc, TypePtr declared_return_type, std::vector parameters, int initial_flags, const GenericsDeclaration* genericTs, const GenericsInstantiation* instantiationTs, FunctionBody body, AnyV ast_root) : Symbol(std::move(name), loc) , flags(initial_flags) - , full_type(full_type) , parameters(std::move(parameters)) - , body(body) { + , declared_return_type(declared_return_type) + , genericTs(genericTs) + , instantiationTs(instantiationTs) + , body(body) + , ast_root(ast_root) { } + std::string as_human_readable() const; + const std::vector* get_arg_order() const { return arg_order.empty() ? nullptr : &arg_order; } @@ -126,13 +141,19 @@ struct FunctionData final : Symbol { return ret_order.empty() ? nullptr : &ret_order; } - bool is_regular_function() const { return std::holds_alternative(body); } + int get_num_params() const { return static_cast(parameters.size()); } + const LocalVarData& get_param(int idx) const { return parameters[idx]; } + + bool is_code_function() const { return std::holds_alternative(body); } bool is_asm_function() const { return std::holds_alternative(body); } - bool is_builtin_function() const { return std::holds_alternative(body); } + bool is_builtin_function() const { return ast_root == nullptr; } + + bool is_generic_function() const { return genericTs != nullptr; } + bool is_instantiation_of_generic_function() const { return instantiationTs != nullptr; } bool is_inline() const { return flags & flagInline; } bool is_inline_ref() const { return flags & flagInlineRef; } - bool is_really_used() const { return flags & flagReallyUsed; } + bool is_type_inferring_done() const { return flags & flagTypeInferringDone; } bool is_used_as_noncall() const { return flags & flagUsedAsNonCall; } bool is_marked_as_pure() const { return flags & flagMarkedAsPure; } bool is_implicit_return() const { return flags & flagImplicitReturn; } @@ -143,13 +164,18 @@ struct FunctionData final : Symbol { bool does_accept_self() const { return flags & flagAcceptsSelf; } bool does_return_self() const { return flags & flagReturnsSelf; } bool does_mutate_self() const { return (flags & flagAcceptsSelf) && parameters[0].is_mutate_parameter(); } + bool is_really_used() const { return flags & flagReallyUsed; } bool does_need_codegen() const; FunctionData* mutate() const { return const_cast(this); } - void assign_is_really_used(); + void assign_resolved_type(TypePtr declared_return_type); + void assign_inferred_type(TypePtr inferred_return_type, TypePtr inferred_full_type); void assign_is_used_as_noncall(); void assign_is_implicit_return(); + void assign_is_type_inferring_done(); + void assign_is_really_used(); + void assign_arg_order(std::vector&& arg_order); }; struct GlobalVarData final : Symbol { @@ -157,10 +183,10 @@ struct GlobalVarData final : Symbol { flagReallyUsed = 1, // calculated via dfs from used functions; unused globals are not codegenerated }; - TypeExpr* declared_type; + TypePtr declared_type; // always exists, declaring globals without type is prohibited int flags = 0; - GlobalVarData(std::string name, SrcLocation loc, TypeExpr* declared_type) + GlobalVarData(std::string name, SrcLocation loc, TypePtr declared_type) : Symbol(std::move(name), loc) , declared_type(declared_type) { } @@ -168,17 +194,18 @@ struct GlobalVarData final : Symbol { bool is_really_used() const { return flags & flagReallyUsed; } GlobalVarData* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr declared_type); void assign_is_really_used(); }; struct GlobalConstData final : Symbol { ConstantValue value; - TypeExpr* inferred_type; + TypePtr declared_type; // may be nullptr - GlobalConstData(std::string name, SrcLocation loc, ConstantValue&& value) + GlobalConstData(std::string name, SrcLocation loc, TypePtr declared_type, ConstantValue&& value) : Symbol(std::move(name), loc) , value(std::move(value)) - , inferred_type(TypeExpr::new_atomic(this->value.is_int() ? TypeExpr::_Int : TypeExpr::_Slice)) { + , declared_type(declared_type) { } bool is_int_const() const { return value.is_int(); } @@ -186,6 +213,9 @@ struct GlobalConstData final : Symbol { td::RefInt256 as_int_const() const { return value.as_int(); } const std::string& as_slice_const() const { return value.as_slice(); } + + GlobalConstData* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr declared_type); }; class GlobalSymbolTable { diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index f2255ce3..d1159d3c 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -28,6 +28,7 @@ #include "compiler-state.h" #include "lexer.h" #include "ast.h" +#include "type-system.h" namespace tolk { @@ -45,27 +46,28 @@ void on_assertion_failed(const char *description, const char *file_name, int lin } int tolk_proceed(const std::string &entrypoint_filename) { + type_system_init(); define_builtins(); lexer_init(); // on any error, an exception is thrown, and the message is printed out below // (currently, only a single error can be printed) try { - AllSrcFiles all_files = pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); + pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); - pipeline_register_global_symbols(all_files); - pipeline_resolve_identifiers_and_assign_symbols(all_files); - pipeline_calculate_rvalue_lvalue(all_files); - pipeline_detect_unreachable_statements(all_files); - pipeline_infer_and_check_types(all_files); - pipeline_refine_lvalue_for_mutate_arguments(all_files); - pipeline_check_rvalue_lvalue(all_files); - pipeline_check_pure_impure_operations(all_files); - pipeline_constant_folding(all_files); - pipeline_convert_ast_to_legacy_Expr_Op(all_files); + pipeline_register_global_symbols(); + pipeline_resolve_identifiers_and_assign_symbols(); + pipeline_calculate_rvalue_lvalue(); + pipeline_detect_unreachable_statements(); + pipeline_infer_types_and_calls_and_fields(); + pipeline_refine_lvalue_for_mutate_arguments(); + pipeline_check_rvalue_lvalue(); + pipeline_check_pure_impure_operations(); + pipeline_constant_folding(); + pipeline_convert_ast_to_legacy_Expr_Op(); pipeline_find_unused_symbols(); - pipeline_generate_fif_output_to_std_cout(all_files); + pipeline_generate_fif_output_to_std_cout(); return 0; } catch (Fatal& fatal) { @@ -74,11 +76,6 @@ int tolk_proceed(const std::string &entrypoint_filename) { } catch (ParseError& error) { std::cerr << error << std::endl; return 2; - } catch (UnifyError& unif_err) { - std::cerr << "fatal: "; - unif_err.print_message(std::cerr); - std::cerr << std::endl; - return 2; } catch (UnexpectedASTNodeType& error) { std::cerr << "fatal: " << error.what() << std::endl; std::cerr << "It's a compiler bug, please report to developers" << std::endl; diff --git a/tolk/tolk.h b/tolk/tolk.h index 0408648f..5ec4d3e0 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -18,7 +18,6 @@ #include "platform-utils.h" #include "src-file.h" -#include "type-expr.h" #include "symtable.h" #include "crypto/common/refint.h" #include "td/utils/Status.h" @@ -35,30 +34,6 @@ namespace tolk { GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN void on_assertion_failed(const char *description, const char *file_name, int line_number); -/* - * - * TYPE EXPRESSIONS - * - */ - -struct UnifyError : std::exception { - TypeExpr* te1; - TypeExpr* te2; - std::string msg; - - UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(std::move(_msg)) { - } - - void print_message(std::ostream& os) const; - const char* what() const noexcept override { - return msg.c_str(); - } -}; - -std::ostream& operator<<(std::ostream& os, const UnifyError& ue); - -void unify(TypeExpr*& te1, TypeExpr*& te2); - /* * * ABSTRACT CODE @@ -69,15 +44,15 @@ typedef int var_idx_t; typedef int const_idx_t; struct TmpVar { - TypeExpr* v_type; + TypePtr v_type; var_idx_t idx; const LocalVarData* v_sym; // points to var defined in code; nullptr for implicitly created tmp vars int coord; SrcLocation where; std::vector> on_modification; - TmpVar(var_idx_t _idx, TypeExpr* _type, const LocalVarData* v_sym, SrcLocation loc) - : v_type(_type) + TmpVar(var_idx_t _idx, TypePtr type, const LocalVarData* v_sym, SrcLocation loc) + : v_type(type) , idx(_idx) , v_sym(v_sym) , coord(0) @@ -410,13 +385,13 @@ inline ListIterator end(const Op* op_list) { return ListIterator{}; } -typedef std::tuple FormalArg; +typedef std::tuple FormalArg; typedef std::vector FormalArgList; struct AsmOpList; struct FunctionBodyCode { - CodeBlob* code; + CodeBlob* code = nullptr; void set_code(CodeBlob* code); }; @@ -597,6 +572,7 @@ inline std::ostream& operator<<(std::ostream& os, const AsmOp& op) { } std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg); +std::ostream& operator<<(std::ostream& os, TypePtr type_data); struct AsmOpList { std::vector list_; @@ -1116,7 +1092,6 @@ struct FunctionBodyAsm { struct CodeBlob { int var_cnt, in_var_cnt; - TypeExpr* ret_type; const FunctionData* fun_ref; std::string name; SrcLocation loc; @@ -1128,8 +1103,8 @@ struct CodeBlob { #endif std::stack*> cur_ops_stack; bool require_callxargs = false; - CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref, TypeExpr* ret_type) - : var_cnt(0), in_var_cnt(0), ret_type(ret_type), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) { + CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref) + : var_cnt(0), in_var_cnt(0), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) { } template Op& emplace_back(Args&&... args) { @@ -1141,8 +1116,8 @@ struct CodeBlob { return res; } bool import_params(FormalArgList&& arg_list); - var_idx_t create_var(TypeExpr* var_type, const LocalVarData* v_sym, SrcLocation loc); - var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { + var_idx_t create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc); + var_idx_t create_tmp_var(TypePtr var_type, SrcLocation loc) { return create_var(var_type, nullptr, loc); } int split_vars(bool strict = false); @@ -1164,7 +1139,6 @@ struct CodeBlob { close_blk(location); pop_cur(); } - void simplify_var_types(); void prune_unreachable_code(); void fwd_analyze(); void mark_noreturn(); diff --git a/tolk/type-expr.h b/tolk/type-expr.h deleted file mode 100644 index 21a35a8e..00000000 --- a/tolk/type-expr.h +++ /dev/null @@ -1,131 +0,0 @@ -#pragma once - -#include -#include - -namespace tolk { - -struct TypeExpr { - enum Kind { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll }; - enum AtomicType { _Int, _Cell, _Slice, _Builder, _Continutaion, _Tuple }; - Kind constr; - int value; - int minw, maxw; - static constexpr int w_inf = 1023; - std::vector args; - bool was_forall_var = false; - - explicit TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { - } - TypeExpr(Kind _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { - } - TypeExpr(Kind _constr, std::vector list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(Kind _constr, std::initializer_list list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(Kind _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { - compute_width(); - } - TypeExpr(Kind _constr, TypeExpr* elem0, std::vector list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - TypeExpr(Kind _constr, TypeExpr* elem0, std::initializer_list list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - - bool is_atomic() const { - return constr == te_Atomic; - } - bool is_atomic(int v) const { - return constr == te_Atomic && value == v; - } - bool is_int() const { - return is_atomic(_Int); - } - bool is_var() const { - return constr == te_Var; - } - bool is_map() const { - return constr == te_Map; - } - bool is_tuple() const { - return constr == te_Tuple; - } - bool has_fixed_width() const { - return minw == maxw; - } - int get_width() const { - return has_fixed_width() ? minw : -1; - } - void compute_width(); - bool recompute_width(); - void show_width(std::ostream& os); - std::ostream& print(std::ostream& os, int prio = 0) const; - void replace_with(TypeExpr* te2); - int extract_components(std::vector& comp_list); - bool equals_to(const TypeExpr* rhs) const; - bool has_unknown_inside() const; - static int holes, type_vars; - static TypeExpr* new_hole() { - return new TypeExpr{te_Unknown, ++holes}; - } - static TypeExpr* new_hole(int width) { - return new TypeExpr{te_Unknown, ++holes, width}; - } - static TypeExpr* new_unit() { - return new TypeExpr{te_Tensor, 0, 0}; - } - static TypeExpr* new_atomic(int value) { - return new TypeExpr{te_Atomic, value, 1}; - } - static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); - static TypeExpr* new_func() { - return new_map(new_hole(), new_hole()); - } - static TypeExpr* new_tensor(std::vector list, bool red = true) { - return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(std::initializer_list list) { - return new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { - return new_tensor({te1, te2}); - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { - return new_tensor({te1, te2, te3}); - } - static TypeExpr* new_tuple(TypeExpr* arg0) { - return new TypeExpr{te_Tuple, arg0}; - } - static TypeExpr* new_tuple(std::vector list, bool red = false) { - return new_tuple(new_tensor(std::move(list), red)); - } - static TypeExpr* new_tuple(std::initializer_list list) { - return new_tuple(new_tensor(list)); - } - static TypeExpr* new_var() { - return new TypeExpr{te_Var, --type_vars, 1}; - } - static TypeExpr* new_var(int idx) { - return new TypeExpr{te_Var, idx, 1}; - } - static TypeExpr* new_forall(std::vector list, TypeExpr* body) { - return new TypeExpr{te_ForAll, body, std::move(list)}; - } - - static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); - static std::vector remove_forall(TypeExpr*& te); - static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); -}; - -std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); - -} // namespace tolk diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp new file mode 100644 index 00000000..31f84e75 --- /dev/null +++ b/tolk/type-system.cpp @@ -0,0 +1,684 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "type-system.h" +#include "lexer.h" +#include "platform-utils.h" +#include "compiler-state.h" +#include + +namespace tolk { + +/* + * This class stores a big hashtable [hash => TypeData] + * Every non-trivial TypeData*::create() method at first looks here, and allocates an object only if not found. + * That's why all allocated TypeData objects are unique, storing unique type_id. + */ +class TypeDataTypeIdCalculation { + uint64_t cur_hash; + int children_flags_mask = 0; + + static std::unordered_map all_unique_occurred_types; + +public: + explicit TypeDataTypeIdCalculation(uint64_t initial_arbitrary_unique_number) + : cur_hash(initial_arbitrary_unique_number) {} + + void feed_hash(uint64_t val) { + cur_hash = cur_hash * 56235515617499ULL + val; + } + + void feed_string(const std::string& s) { + feed_hash(std::hash{}(s)); + } + + void feed_child(TypePtr inner) { + feed_hash(inner->type_id); + children_flags_mask |= inner->flags; + } + + uint64_t type_id() const { + return cur_hash; + } + + int children_flags() const { + return children_flags_mask; + } + + GNU_ATTRIBUTE_FLATTEN + TypePtr get_existing() const { + auto it = all_unique_occurred_types.find(cur_hash); + return it != all_unique_occurred_types.end() ? it->second : nullptr; + } + + GNU_ATTRIBUTE_NOINLINE + TypePtr register_unique(TypePtr newly_created) const { +#ifdef TOLK_DEBUG + assert(newly_created->type_id == cur_hash); +#endif + all_unique_occurred_types[cur_hash] = newly_created; + return newly_created; + } +}; + +std::unordered_map TypeDataTypeIdCalculation::all_unique_occurred_types; +TypePtr TypeDataInt::singleton; +TypePtr TypeDataCell::singleton; +TypePtr TypeDataSlice::singleton; +TypePtr TypeDataBuilder::singleton; +TypePtr TypeDataTuple::singleton; +TypePtr TypeDataContinuation::singleton; +TypePtr TypeDataNullLiteral::singleton; +TypePtr TypeDataUnknown::singleton; +TypePtr TypeDataVoid::singleton; + +void type_system_init() { + TypeDataInt::singleton = new TypeDataInt; + TypeDataCell::singleton = new TypeDataCell; + TypeDataSlice::singleton = new TypeDataSlice; + TypeDataBuilder::singleton = new TypeDataBuilder; + TypeDataTuple::singleton = new TypeDataTuple; + TypeDataContinuation::singleton = new TypeDataContinuation; + TypeDataNullLiteral::singleton = new TypeDataNullLiteral; + TypeDataUnknown::singleton = new TypeDataUnknown; + TypeDataVoid::singleton = new TypeDataVoid; +} + + +// -------------------------------------------- +// create() +// +// all constructors of TypeData classes are private, only TypeData*::create() is allowed +// each non-trivial create() method calculates hash (type_id) +// and creates an object only if it isn't found in a global hashtable +// + +TypePtr TypeDataFunCallable::create(std::vector&& params_types, TypePtr return_type) { + TypeDataTypeIdCalculation hash(3184039965511020991ULL); + for (TypePtr param : params_types) { + hash.feed_child(param); + hash.feed_hash(767721); + } + hash.feed_child(return_type); + hash.feed_hash(767722); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataFunCallable(hash.type_id(), hash.children_flags(), std::move(params_types), return_type)); +} + +TypePtr TypeDataGenericT::create(std::string&& nameT) { + TypeDataTypeIdCalculation hash(9145033724911680012ULL); + hash.feed_string(nameT); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataGenericT(hash.type_id(), std::move(nameT))); +} + +TypePtr TypeDataTensor::create(std::vector&& items) { + TypeDataTypeIdCalculation hash(3159238551239480381ULL); + for (TypePtr item : items) { + hash.feed_child(item); + hash.feed_hash(819613); + } + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataTensor(hash.type_id(), hash.children_flags(), std::move(items))); +} + +TypePtr TypeDataTypedTuple::create(std::vector&& items) { + TypeDataTypeIdCalculation hash(9189266157349499320ULL); + for (TypePtr item : items) { + hash.feed_child(item); + hash.feed_hash(735911); + } + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataTypedTuple(hash.type_id(), hash.children_flags(), std::move(items))); +} + +TypePtr TypeDataUnresolved::create(std::string&& text, SrcLocation loc) { + TypeDataTypeIdCalculation hash(3680147223540048162ULL); + hash.feed_string(text); + // hash.feed_hash(*reinterpret_cast(&loc)); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataUnresolved(hash.type_id(), std::move(text), loc)); +} + + +// -------------------------------------------- +// as_human_readable() +// +// is used only for error messages and debugging, therefore no optimizations for simplicity +// only non-trivial implementations are here; trivial are defined in .h file +// + +std::string TypeDataFunCallable::as_human_readable() const { + std::string result = "("; + for (TypePtr param : params_types) { + if (result.size() > 1) { + result += ", "; + } + result += param->as_human_readable(); + } + result += ") -> "; + result += return_type->as_human_readable(); + return result; +} + +std::string TypeDataTensor::as_human_readable() const { + std::string result = "("; + for (TypePtr item : items) { + if (result.size() > 1) { + result += ", "; + } + result += item->as_human_readable(); + } + result += ')'; + return result; +} + +std::string TypeDataTypedTuple::as_human_readable() const { + std::string result = "["; + for (TypePtr item : items) { + if (result.size() > 1) { + result += ", "; + } + result += item->as_human_readable(); + } + result += ']'; + return result; +} + + +// -------------------------------------------- +// traverse() +// +// invokes a callback for TypeData itself and all its children +// only non-trivial implementations are here; by default (no children), `callback(this)` is executed +// + +void TypeDataFunCallable::traverse(const TraverserCallbackT& callback) const { + callback(this); + for (TypePtr param : params_types) { + param->traverse(callback); + } + return_type->traverse(callback); +} + +void TypeDataTensor::traverse(const TraverserCallbackT& callback) const { + callback(this); + for (TypePtr item : items) { + item->traverse(callback); + } +} + +void TypeDataTypedTuple::traverse(const TraverserCallbackT& callback) const { + callback(this); + for (TypePtr item : items) { + item->traverse(callback); + } +} + + +// -------------------------------------------- +// replace_children_custom() +// +// returns new TypeData with children replaced by a custom callback +// used to replace generic T on generics expansion — to convert `f` to `f` +// only non-trivial implementations are here; by default (no children), `return callback(this)` is executed +// + +TypePtr TypeDataFunCallable::replace_children_custom(const ReplacerCallbackT& callback) const { + std::vector mapped; + mapped.reserve(params_types.size()); + for (TypePtr param : params_types) { + mapped.push_back(param->replace_children_custom(callback)); + } + return callback(create(std::move(mapped), return_type->replace_children_custom(callback))); +} + +TypePtr TypeDataTensor::replace_children_custom(const ReplacerCallbackT& callback) const { + std::vector mapped; + mapped.reserve(items.size()); + for (TypePtr item : items) { + mapped.push_back(item->replace_children_custom(callback)); + } + return callback(create(std::move(mapped))); +} + +TypePtr TypeDataTypedTuple::replace_children_custom(const ReplacerCallbackT& callback) const { + std::vector mapped; + mapped.reserve(items.size()); + for (TypePtr item : items) { + mapped.push_back(item->replace_children_custom(callback)); + } + return callback(create(std::move(mapped))); +} + + +// -------------------------------------------- +// calc_width_on_stack() +// +// returns the number of stack slots occupied by a variable of this type +// only non-trivial implementations are here; by default (most types) occupy 1 stack slot +// + +int TypeDataGenericT::calc_width_on_stack() const { + // this function is invoked only in functions with generics already instantiated + assert(false); + return -999999; +} + +int TypeDataTensor::calc_width_on_stack() const { + int sum = 0; + for (TypePtr item : items) { + sum += item->calc_width_on_stack(); + } + return sum; +} + +int TypeDataUnresolved::calc_width_on_stack() const { + // since early pipeline stages, no unresolved types left + assert(false); + return -999999; +} + +int TypeDataVoid::calc_width_on_stack() const { + return 0; +} + + +// -------------------------------------------- +// can_rhs_be_assigned() +// +// on `var lhs: = rhs`, having inferred rhs_type, check that it can be assigned without any casts +// the same goes for passing arguments, returning values, etc. — where the "receiver" (lhs) checks "applier" (rhs) +// for now, `null` can be assigned to any TVM primitive, be later we'll have T? types and null safety +// + +bool TypeDataInt::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataCell::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataSlice::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataBuilder::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataTuple::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataContinuation::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataNullLiteral::can_rhs_be_assigned(TypePtr rhs) const { + return rhs == this; +} + +bool TypeDataFunCallable::can_rhs_be_assigned(TypePtr rhs) const { + return rhs == this; +} + +bool TypeDataGenericT::can_rhs_be_assigned(TypePtr rhs) const { + assert(false); + return false; +} + +bool TypeDataTensor::can_rhs_be_assigned(TypePtr rhs) const { + if (const auto* as_tensor = rhs->try_as(); as_tensor && as_tensor->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_rhs_be_assigned(as_tensor->items[i])) { + return false; + } + } + return true; + } + // note, that tensors can not accept null + return false; +} + +bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { + if (const auto* as_tuple = rhs->try_as(); as_tuple && as_tuple->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_rhs_be_assigned(as_tuple->items[i])) { + return false; + } + } + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataUnknown::can_rhs_be_assigned(TypePtr rhs) const { + return true; +} + +bool TypeDataUnresolved::can_rhs_be_assigned(TypePtr rhs) const { + assert(false); + return false; +} + +bool TypeDataVoid::can_rhs_be_assigned(TypePtr rhs) const { + return rhs == this; +} + + +// -------------------------------------------- +// can_be_casted_with_as_operator() +// +// on `expr as `, check whether casting is applicable +// note, that it's not auto-casts `var lhs: = rhs`, it's an expression `rhs as ` +// + +bool TypeDataInt::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataCell::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataSlice::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataBuilder::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataContinuation::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataNullLiteral::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this + || cast_to == TypeDataInt::create() || cast_to == TypeDataCell::create() || cast_to == TypeDataSlice::create() + || cast_to == TypeDataBuilder::create() || cast_to == TypeDataContinuation::create() || cast_to == TypeDataTuple::create() + || cast_to->try_as(); +} + +bool TypeDataFunCallable::can_be_casted_with_as_operator(TypePtr cast_to) const { + return this == cast_to; +} + +bool TypeDataGenericT::can_be_casted_with_as_operator(TypePtr cast_to) const { + return true; +} + +bool TypeDataTensor::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_tensor = cast_to->try_as(); to_tensor && to_tensor->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_be_casted_with_as_operator(to_tensor->items[i])) { + return false; + } + } + return true; + } + return false; +} + +bool TypeDataTypedTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_tuple = cast_to->try_as(); to_tuple && to_tuple->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_be_casted_with_as_operator(to_tuple->items[i])) { + return false; + } + } + return true; + } + return false; +} + +bool TypeDataUnknown::can_be_casted_with_as_operator(TypePtr cast_to) const { + // 'unknown' can be cast to any type + // (though it's not valid for exception arguments when casting them to non-1 stack width, + // but to ensure it, we need a special type "unknown TVM primitive", which is overwhelming I think) + return true; +} + +bool TypeDataUnresolved::can_be_casted_with_as_operator(TypePtr cast_to) const { + return false; +} + +bool TypeDataVoid::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + + +// -------------------------------------------- +// extract_components() +// +// used in code generation (transforming Ops to other Ops) +// to be removed in the future +// + +void TypeDataGenericT::extract_components(std::vector& comp_types) const { + assert(false); +} + +void TypeDataTensor::extract_components(std::vector& comp_types) const { + for (TypePtr item : items) { + item->extract_components(comp_types); + } +} + +void TypeDataUnresolved::extract_components(std::vector& comp_types) const { + assert(false); +} + +void TypeDataVoid::extract_components(std::vector& comp_types) const { +} + + +// -------------------------------------------- +// parsing type from tokens +// +// here we implement parsing types (mostly after colon) to TypeData +// example: `var v: int` is TypeDataInt +// example: `var v: (builder, [cell])` is TypeDataTensor(TypeDataBuilder, TypeDataTypedTuple(TypeDataCell)) +// example: `fun f(): ()` is TypeDataTensor() (an empty one) +// +// note, that unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved, +// and later, when all files are parsed and all symbols registered, such identifiers are resolved +// example: `fun f(v: T)` at first v is TypeDataUnresolved("T"), later becomes TypeDataGenericT +// see finalize_type_data() +// +// note, that `self` does not name a type, it can appear only as a return value of a function (parsed specially) +// when `self` appears as a type, it's parsed as TypeDataUnresolved, and later an error is emitted +// + +static TypePtr parse_type_expression(Lexer& lex); + +std::vector parse_nested_type_list(Lexer& lex, TokenType tok_op, const char* s_op, TokenType tok_cl, const char* s_cl) { + lex.expect(tok_op, s_op); + std::vector sub_types; + while (true) { + if (lex.tok() == tok_cl) { // empty lists allowed + lex.next(); + break; + } + + sub_types.emplace_back(parse_type_expression(lex)); + if (lex.tok() == tok_comma) { + lex.next(); + } else if (lex.tok() != tok_cl) { + lex.unexpected(s_cl); + } + } + return sub_types; +} + +std::vector parse_nested_type_list_in_parenthesis(Lexer& lex) { + return parse_nested_type_list(lex, tok_oppar, "`(`", tok_clpar, "`)` or `,`"); +} + +static TypePtr parse_simple_type(Lexer& lex) { + switch (lex.tok()) { + case tok_int: + lex.next(); + return TypeDataInt::create(); + case tok_cell: + lex.next(); + return TypeDataCell::create(); + case tok_builder: + lex.next(); + return TypeDataBuilder::create(); + case tok_slice: + lex.next(); + return TypeDataSlice::create(); + case tok_tuple: + lex.next(); + return TypeDataTuple::create(); + case tok_continuation: + lex.next(); + return TypeDataContinuation::create(); + case tok_null: + lex.next(); + return TypeDataNullLiteral::create(); + case tok_void: + lex.next(); + return TypeDataVoid::create(); + case tok_bool: + case tok_self: + case tok_identifier: { + SrcLocation loc = lex.cur_location(); + std::string text = static_cast(lex.cur_str()); + lex.next(); + return TypeDataUnresolved::create(std::move(text), loc); + } + case tok_oppar: { + std::vector items = parse_nested_type_list_in_parenthesis(lex); + if (items.size() == 1) { + return items.front(); + } + return TypeDataTensor::create(std::move(items)); + } + case tok_opbracket: { + std::vector items = parse_nested_type_list(lex, tok_opbracket, "`[`", tok_clbracket, "`]` or `,`"); + return TypeDataTypedTuple::create(std::move(items)); + } + case tok_fun: { + lex.next(); + std::vector params_types = parse_nested_type_list_in_parenthesis(lex); + lex.expect(tok_arrow, "`->`"); + } + default: + lex.unexpected(""); + } +} + +static TypePtr parse_type_nullable(Lexer& lex) { + TypePtr result = parse_simple_type(lex); + + if (lex.tok() == tok_question) { + lex.error("nullable types are not supported yet"); + } + + return result; +} + +static TypePtr parse_type_expression(Lexer& lex) { + TypePtr result = parse_type_nullable(lex); + + if (lex.tok() == tok_arrow) { // `int -> int`, `(cell, slice) -> void` + lex.next(); + TypePtr return_type = parse_type_expression(lex); + std::vector params_types = {result}; + if (const auto* as_tensor = result->try_as()) { + params_types = as_tensor->items; + } + return TypeDataFunCallable::create(std::move(params_types), return_type); + } + + if (lex.tok() != tok_bitwise_or) { + return result; + } + + lex.error("union types are not supported yet"); +} + +TypePtr parse_type_from_tokens(Lexer& lex) { + return parse_type_expression(lex); +} + +std::ostream& operator<<(std::ostream& os, TypePtr type_data) { + return os << (type_data ? type_data->as_human_readable() : "(nullptr-type)"); +} + +} // namespace tolk diff --git a/tolk/type-system.h b/tolk/type-system.h new file mode 100644 index 00000000..2805bb34 --- /dev/null +++ b/tolk/type-system.h @@ -0,0 +1,405 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" +#include +#include +#include + +namespace tolk { + +/* + * TypeData is both a user-given and an inferred type representation. + * `int`, `cell`, `T`, `(int, [tuple])` are instances of TypeData. + * Every unique TypeData is created only once, so for example TypeDataTensor::create(int, int) + * returns one and the same pointer always. This "uniqueness" is called type_id, calculated before creation. + * + * In Tolk code, types after colon `var v: (int, T)` are parsed to TypeData. + * See parse_type_from_tokens(). + * So, AST nodes which can have declared types (local/global variables and others) store a pointer to TypeData. + * + * Type inferring also creates TypeData for inferred expressions. All AST expression nodes have inferred_type. + * For example, `1 + 2`, both operands are TypeDataInt, its result is also TypeDataInt. + * Type checking also uses TypeData. For example, `var i: slice = 1 + 2`, at first rhs (TypeDataInt) is inferred, + * then lhs (TypeDataSlice from declaration) is checked whether rhs can be assigned. + * See can_rhs_be_assigned(). + * + * Note, that while initial parsing Tolk files to AST, known types (`int`, `cell`, etc.) are created as-is, + * but user-defined types (`T`, `MyStruct`, `MyAlias`) are saved as TypeDataUnresolved. + * After all symbols have been registered, resolving identifiers step is executed, where particularly + * all TypeDataUnresolved instances are converted to a resolved type. At inferring, no unresolved remain. + * For instance, `fun f(v: T)`, at first "T" of `v` is unresolved, and then converted to TypeDataGenericT. + */ +class TypeData { + // all unique types have unique type_id; it's used both for allocating memory once and for tagged unions + const uint64_t type_id; + // bits of flag_mask, to store often-used properties and return them without tree traversing + const int flags; + + friend class TypeDataTypeIdCalculation; + +protected: + enum flag_mask { + flag_contains_unknown_inside = 1 << 1, + flag_contains_genericT_inside = 1 << 2, + flag_contains_unresolved_inside = 1 << 3, + }; + + explicit TypeData(uint64_t type_id, int flags_with_children) + : type_id(type_id) + , flags(flags_with_children) { + } + +public: + virtual ~TypeData() = default; + + template + const Derived* try_as() const { + return dynamic_cast(this); + } + + uint64_t get_type_id() const { return type_id; } + + bool has_unknown_inside() const { return flags & flag_contains_unknown_inside; } + bool has_genericT_inside() const { return flags & flag_contains_genericT_inside; } + bool has_unresolved_inside() const { return flags & flag_contains_unresolved_inside; } + + using TraverserCallbackT = std::function; + using ReplacerCallbackT = std::function; + + virtual std::string as_human_readable() const = 0; + virtual bool can_rhs_be_assigned(TypePtr rhs) const = 0; + virtual bool can_be_casted_with_as_operator(TypePtr cast_to) const = 0; + + virtual void traverse(const TraverserCallbackT& callback) const { + callback(this); + } + + virtual TypePtr replace_children_custom(const ReplacerCallbackT& callback) const { + return callback(this); + } + + virtual int calc_width_on_stack() const { + return 1; + } + + virtual void extract_components(std::vector& comp_types) const { + comp_types.push_back(this); + } +}; + +/* + * `int` is TypeDataInt, representation of TVM int. + */ +class TypeDataInt final : public TypeData { + TypeDataInt() : TypeData(1ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "int"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `cell` is TypeDataCell, representation of TVM cell. + */ +class TypeDataCell final : public TypeData { + TypeDataCell() : TypeData(3ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "cell"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `slice` is TypeDataSlice, representation of TVM slice. + */ +class TypeDataSlice final : public TypeData { + TypeDataSlice() : TypeData(4ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "slice"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `builder` is TypeDataBuilder, representation of TVM builder. + */ +class TypeDataBuilder final : public TypeData { + TypeDataBuilder() : TypeData(5ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "builder"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `tuple` is TypeDataTuple, representation of TVM tuple. + * Note, that it's UNTYPED tuple. It occupies 1 stack slot in TVM. Its elements are any TVM values at runtime, + * so getting its element results in TypeDataUnknown (which must be assigned/cast explicitly). + */ +class TypeDataTuple final : public TypeData { + TypeDataTuple() : TypeData(6ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "tuple"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `continuation` is TypeDataContinuation, representation of TVM continuation. + * It's like "untyped callable", not compatible with other types. + */ +class TypeDataContinuation final : public TypeData { + TypeDataContinuation() : TypeData(7ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "continuation"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `null` has TypeDataNullLiteral type. + * Currently, it can be assigned to int/slice/etc., but later Tolk will have T? types and null safety. + * Note, that `var i = null`, though valid (i would be constant null), fires an "always-null" compilation error + * (it's much better for user to see an error here than when he passes this variable somewhere). + */ +class TypeDataNullLiteral final : public TypeData { + TypeDataNullLiteral() : TypeData(8ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "null"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `fun(int, int) -> void` is TypeDataFunCallable, think of is as a typed continuation. + * A type of function `fun f(x: int) { return x; }` is actually `fun(int) -> int`. + * So, when assigning it to a variable `var cb = f`, this variable also has this type. + */ +class TypeDataFunCallable final : public TypeData { + TypeDataFunCallable(uint64_t type_id, int children_flags, std::vector&& params_types, TypePtr return_type) + : TypeData(type_id, children_flags) + , params_types(std::move(params_types)) + , return_type(return_type) {} + +public: + const std::vector params_types; + const TypePtr return_type; + + static TypePtr create(std::vector&& params_types, TypePtr return_type); + + int params_size() const { return static_cast(params_types.size()); } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; +}; + +/* + * `T` inside generic functions is TypeDataGenericT. + * Example: `fun f(a: X, b: Y): [X, Y]` (here X and Y are). + * On instantiation like `f(1,"")`, a new function `f` is created with type `fun(int,slice)->[int,slice]`. + */ +class TypeDataGenericT final : public TypeData { + TypeDataGenericT(uint64_t type_id, std::string&& nameT) + : TypeData(type_id, flag_contains_genericT_inside) + , nameT(std::move(nameT)) {} + +public: + const std::string nameT; + + static TypePtr create(std::string&& nameT); + + std::string as_human_readable() const override { return nameT; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + +/* + * `(int, slice)` is TypeDataTensor of 2 elements. Tensor of N elements occupies N stack slots. + * Of course, there may be nested tensors, like `(int, (int, slice), cell)`. + * Arguments, variables, globals, return values, etc. can be tensors. + * A tensor can be empty. + */ +class TypeDataTensor final : public TypeData { + TypeDataTensor(uint64_t type_id, int children_flags, std::vector&& items) + : TypeData(type_id, children_flags) + , items(std::move(items)) {} + +public: + const std::vector items; + + static TypePtr create(std::vector&& items); + + int size() const { return static_cast(items.size()); } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + +/* + * `[int, slice]` is TypeDataTypedTuple, a TVM 'tuple' under the hood, contained in 1 stack slot. + * Unlike TypeDataTuple (untyped tuples), it has a predefined inner structure and can be assigned as + * `var [i, cs] = [0, ""]` (where a and b become two separate variables on a stack, int and slice). + */ +class TypeDataTypedTuple final : public TypeData { + TypeDataTypedTuple(uint64_t type_id, int children_flags, std::vector&& items) + : TypeData(type_id, children_flags) + , items(std::move(items)) {} + +public: + const std::vector items; + + static TypePtr create(std::vector&& items); + + int size() const { return static_cast(items.size()); } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; +}; + +/* + * `unknown` is a special type, which can appear in corner cases. + * The type of exception argument (which can hold any TVM value at runtime) is unknown. + * The type of `_` used as rvalue is unknown. + * The only thing available to do with unknown is to cast it: `catch (excNo, arg) { var i = arg as int; }` + */ +class TypeDataUnknown final : public TypeData { + TypeDataUnknown() : TypeData(20ULL, flag_contains_unknown_inside) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "unknown"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * "Unresolved" is not actually a type — it's an intermediate state between parsing and resolving. + * At parsing to AST, unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved, + * and after all source files parsed and global symbols registered, they are replaced by actual ones. + * Example: `fun f(v: T)` at first v is TypeDataUnresolved("T"), later becomes TypeDataGenericT. + */ +class TypeDataUnresolved final : public TypeData { + TypeDataUnresolved(uint64_t type_id, std::string&& text, SrcLocation loc) + : TypeData(type_id, flag_contains_unresolved_inside) + , text(std::move(text)) + , loc(loc) {} + +public: + const std::string text; + const SrcLocation loc; + + static TypePtr create(std::string&& text, SrcLocation loc); + + std::string as_human_readable() const override { return text + "*"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + +/* + * `void` is TypeDataVoid. + * From the type system point of view, `void` functions return nothing. + * Empty tensor is not compatible with void, although at IR level they are similar, 0 stack slots. + */ +class TypeDataVoid final : public TypeData { + TypeDataVoid() : TypeData(10ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "void"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + + +// -------------------------------------------- + + +class Lexer; +TypePtr parse_type_from_tokens(Lexer& lex); + +void type_system_init(); + +} // namespace tolk diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp deleted file mode 100644 index 3712c6f5..00000000 --- a/tolk/unify-types.cpp +++ /dev/null @@ -1,454 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" - -namespace tolk { - -/* - * - * TYPE EXPRESSIONS - * - */ - -int TypeExpr::holes = 0, TypeExpr::type_vars = 0; // not thread safe, but it is ok for now - -void TypeExpr::compute_width() { - switch (constr) { - case te_Atomic: - case te_Map: - minw = maxw = 1; - break; - case te_Tensor: - minw = maxw = 0; - for (TypeExpr* arg : args) { - minw += arg->minw; - maxw += arg->maxw; - } - if (minw > w_inf) { - minw = w_inf; - } - if (maxw > w_inf) { - maxw = w_inf; - } - break; - case te_Tuple: - minw = maxw = 1; - for (TypeExpr* arg : args) { - arg->compute_width(); - } - break; - case te_Indirect: - minw = args[0]->minw; - maxw = args[0]->maxw; - break; - default: - minw = 0; - maxw = w_inf; - break; - } -} - -bool TypeExpr::recompute_width() { - switch (constr) { - case te_Tensor: - case te_Indirect: { - int min = 0, max = 0; - for (TypeExpr* arg : args) { - min += arg->minw; - max += arg->maxw; - } - if (min > maxw || max < minw) { - return false; - } - if (min > w_inf) { - min = w_inf; - } - if (max > w_inf) { - max = w_inf; - } - if (minw < min) { - minw = min; - } - if (maxw > max) { - maxw = max; - } - return true; - } - case te_Tuple: { - for (TypeExpr* arg : args) { - if (arg->minw > 1 || arg->maxw < 1 || arg->minw > arg->maxw) { - return false; - } - } - return true; - } - default: - return false; - } -} - -int TypeExpr::extract_components(std::vector& comp_list) { - if (constr != te_Indirect && constr != te_Tensor) { - comp_list.push_back(this); - return 1; - } - int res = 0; - for (TypeExpr* arg : args) { - res += arg->extract_components(comp_list); - } - return res; -} - -bool TypeExpr::equals_to(const TypeExpr *rhs) const { - const TypeExpr *l = this; - const TypeExpr *r = rhs; - while (l->constr == te_Indirect) - l = l->args[0]; - while (r->constr == te_Indirect) - r = r->args[0]; - - bool eq = l->constr == r->constr && (l->constr == te_Unknown || l->value == r->value) && - l->minw == r->minw && l->maxw == r->maxw && - l->was_forall_var == r->was_forall_var && - l->args.size() == r->args.size(); - if (!eq) - return false; - - for (int i = 0; i < static_cast(l->args.size()); ++i) { - if (!l->args[i]->equals_to(r->args[i])) - return false; - } - return true; -} - -bool TypeExpr::has_unknown_inside() const { - if (constr == te_Unknown) - return true; - - for (const TypeExpr* inner : args) { - if (inner->has_unknown_inside()) - return true; - } - return false; -} - -TypeExpr* TypeExpr::new_map(TypeExpr* from, TypeExpr* to) { - return new TypeExpr{te_Map, std::vector{from, to}}; -} - -void TypeExpr::replace_with(TypeExpr* te2) { - if (te2 == this) { - return; - } - constr = te_Indirect; - value = 0; - minw = te2->minw; - maxw = te2->maxw; - args.clear(); - args.push_back(te2); -} - -bool TypeExpr::remove_indirect(TypeExpr*& te, TypeExpr* forbidden) { - tolk_assert(te); - while (te->constr == te_Indirect) { - te = te->args[0]; - } - if (te->constr == te_Unknown) { - return te != forbidden; - } - bool res = true; - for (auto& x : te->args) { - res &= remove_indirect(x, forbidden); - } - return res; -} - -std::vector TypeExpr::remove_forall(TypeExpr*& te) { - tolk_assert(te && te->constr == te_ForAll); - tolk_assert(te->args.size() >= 1); - std::vector new_vars; - for (std::size_t i = 1; i < te->args.size(); i++) { - new_vars.push_back(new_hole(1)); - } - TypeExpr* te2 = te; - // std::cerr << "removing universal quantifier in " << te << std::endl; - te = te->args[0]; - remove_forall_in(te, te2, new_vars); - // std::cerr << "-> " << te << std::endl; - return new_vars; -} - -bool TypeExpr::remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars) { - tolk_assert(te); - tolk_assert(te2 && te2->constr == te_ForAll); - if (te->constr == te_Var) { - for (std::size_t i = 0; i < new_vars.size(); i++) { - if (te == te2->args[i + 1]) { - te = new_vars[i]; - return true; - } - } - return false; - } - if (te->constr == te_ForAll) { - return false; - } - if (te->args.empty()) { - return false; - } - auto te1 = new TypeExpr(*te); - bool res = false; - for (auto& arg : te1->args) { - res |= remove_forall_in(arg, te2, new_vars); - } - if (res) { - te = te1; - } else { - delete te1; - } - return res; -} - -void TypeExpr::show_width(std::ostream& os) { - os << minw; - if (maxw != minw) { - os << ".."; - if (maxw < w_inf) { - os << maxw; - } - } -} - -std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr) { - if (!type_expr) { - return os << "(null-type-ptr)"; - } - return type_expr->print(os); -} - -std::ostream& TypeExpr::print(std::ostream& os, int lex_level) const { - switch (constr) { - case te_Unknown: - return os << "??" << value; - case te_Var: - if (value >= -26 && value < 0) { - return os << "_" << (char)(91 + value); - } else if (value >= 0 && value < 26) { - return os << (char)(65 + value); - } else { - return os << "TVAR" << value; - } - case te_Indirect: - return os << args[0]; - case te_Atomic: { - switch (value) { - case _Int: - return os << "int"; - case _Cell: - return os << "cell"; - case _Slice: - return os << "slice"; - case _Builder: - return os << "builder"; - case _Continutaion: - return os << "cont"; - case _Tuple: - return os << "tuple"; - default: - return os << "atomic-type-" << value; - } - } - case te_Tensor: { - if (lex_level > -127) { - os << "("; - } - auto c = args.size(); - if (c) { - for (const auto& x : args) { - x->print(os); - if (--c) { - os << ", "; - } - } - } - if (lex_level > -127) { - os << ")"; - } - return os; - } - case te_Tuple: { - os << "["; - auto c = args.size(); - if (c == 1 && args[0]->constr == te_Tensor) { - args[0]->print(os, -127); - } else if (c) { - for (const auto& x : args) { - x->print(os); - if (--c) { - os << ", "; - } - } - } - return os << "]"; - } - case te_Map: { - tolk_assert(args.size() == 2); - if (lex_level > 0) { - os << "("; - } - args[0]->print(os, 1); - os << " -> "; - args[1]->print(os); - if (lex_level > 0) { - os << ")"; - } - return os; - } - case te_ForAll: { - tolk_assert(args.size() >= 1); - if (lex_level > 0) { - os << '('; - } - os << "Forall "; - for (std::size_t i = 1; i < args.size(); i++) { - os << (i > 1 ? ' ' : '('); - args[i]->print(os); - } - os << ") "; - args[0]->print(os); - if (lex_level > 0) { - os << ')'; - } - return os; - } - default: - return os << "unknown-type-expr-" << constr; - } -} - -void UnifyError::print_message(std::ostream& os) const { - os << "cannot unify type " << te1 << " with " << te2; - if (!msg.empty()) { - os << ": " << msg; - } -} - -std::ostream& operator<<(std::ostream& os, const UnifyError& ue) { - ue.print_message(os); - return os; -} - -void check_width_compat(TypeExpr* te1, TypeExpr* te2) { - if (te1->minw > te2->maxw || te2->minw > te1->maxw) { - std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate}; - te1->show_width(os); - os << " and "; - te2->show_width(os); - throw UnifyError{te1, te2, os.str()}; - } -} - -void check_update_widths(TypeExpr* te1, TypeExpr* te2) { - check_width_compat(te1, te2); - te1->minw = te2->minw = std::max(te1->minw, te2->minw); - te1->maxw = te2->maxw = std::min(te1->maxw, te2->maxw); - tolk_assert(te1->minw <= te1->maxw); -} - -void unify(TypeExpr*& te1, TypeExpr*& te2) { - tolk_assert(te1 && te2); - // std::cerr << "unify( " << te1 << " , " << te2 << " )\n"; - while (te1->constr == TypeExpr::te_Indirect) { - te1 = te1->args[0]; - } - while (te2->constr == TypeExpr::te_Indirect) { - te2 = te2->args[0]; - } - if (te1 == te2) { - return; - } - if (te1->constr == TypeExpr::te_ForAll) { - TypeExpr* te = te1; - std::vector new_vars = TypeExpr::remove_forall(te); - for (TypeExpr* t : new_vars) { - t->was_forall_var = true; - } - unify(te, te2); - for (TypeExpr* t : new_vars) { - t->was_forall_var = false; - } - return; - } - if (te2->constr == TypeExpr::te_ForAll) { - TypeExpr* te = te2; - std::vector new_vars = TypeExpr::remove_forall(te); - for (TypeExpr* t : new_vars) { - t->was_forall_var = true; - } - unify(te1, te); - for (TypeExpr* t : new_vars) { - t->was_forall_var = false; - } - return; - } - if (te1->was_forall_var && te2->constr == TypeExpr::te_Tensor) { - throw UnifyError{te1, te2, "cannot unify generic type and tensor"}; - } - if (te2->was_forall_var && te1->constr == TypeExpr::te_Tensor) { - throw UnifyError{te2, te1, "cannot unify generic type and tensor"}; - } - if (te1->constr == TypeExpr::te_Unknown) { - if (te2->constr == TypeExpr::te_Unknown) { - tolk_assert(te1->value != te2->value); - } - if (!TypeExpr::remove_indirect(te2, te1)) { - throw UnifyError{te1, te2, "type unification results in an infinite cyclic type"}; - } - check_update_widths(te1, te2); - te1->replace_with(te2); - te1 = te2; - return; - } - if (te2->constr == TypeExpr::te_Unknown) { - if (!TypeExpr::remove_indirect(te1, te2)) { - throw UnifyError{te2, te1, "type unification results in an infinite cyclic type"}; - } - check_update_widths(te2, te1); - te2->replace_with(te1); - te2 = te1; - return; - } - if (te1->constr != te2->constr || te1->value != te2->value || te1->args.size() != te2->args.size()) { - throw UnifyError{te1, te2}; - } - for (std::size_t i = 0; i < te1->args.size(); i++) { - unify(te1->args[i], te2->args[i]); - } - if (te1->constr == TypeExpr::te_Tensor) { - if (!te1->recompute_width()) { - throw UnifyError{te1, te2, "type unification incompatible with known width of first type"}; - } - if (!te2->recompute_width()) { - throw UnifyError{te2, te1, "type unification incompatible with known width of first type"}; - } - check_update_widths(te1, te2); - } - te1->replace_with(te2); - te1 = te2; -} - -} // namespace tolk From 974d76c5f6e7597cde562219bc069c05e9313ea3 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 13 Jan 2025 15:21:24 +0700 Subject: [PATCH 17/61] [Tolk] `bool` type (-1/0 int under the hood) Comparison operators `== / >= /...` return `bool`. Logical operators `&& ||` return bool. Constants `true` and `false` have the `bool` type. Lots of stdlib functions return `bool`, not `int`. Operator `!x` supports both `int` and `bool`. Condition of `if` accepts both `int` and `bool`. Arithmetic operators are restricted to integers. Logical `&&` and `||` accept both `bool` and `int`. No arithmetic operations with bools allowed (only bitwise and logical). --- crypto/smartcont/tolk-stdlib/common.tolk | 28 +-- crypto/smartcont/tolk-stdlib/tvm-dicts.tolk | 106 +++++------ .../tests/allow_post_modification.tolk | 2 +- tolk-tester/tests/bit-operators.tolk | 163 ++++++++++++++++- tolk-tester/tests/c2.tolk | 5 +- tolk-tester/tests/c2_1.tolk | 2 +- tolk-tester/tests/cells-slices.tolk | 10 +- tolk-tester/tests/codegen_check_demo.tolk | 4 +- tolk-tester/tests/imports/use-dicts.tolk | 2 +- tolk-tester/tests/invalid-typing-10.tolk | 8 + tolk-tester/tests/invalid-typing-11.tolk | 11 ++ tolk-tester/tests/invalid-typing-2.tolk | 4 +- tolk-tester/tests/invalid-typing-6.tolk | 2 +- tolk-tester/tests/logical-operators.tolk | 71 +++++++- tolk-tester/tests/no-spaces.tolk | 2 +- tolk-tester/tests/null-keyword.tolk | 4 +- .../{op_priority.tolk => op-priority.tolk} | 30 +-- tolk-tester/tests/test-math.tolk | 6 +- tolk-tester/tests/unbalanced_ret_loops.tolk | 2 +- tolk-tester/tests/w6.tolk | 2 +- tolk-tester/tests/w7.tolk | 2 +- tolk/CMakeLists.txt | 1 + tolk/ast-from-tokens.cpp | 13 +- tolk/builtins.cpp | 105 ++++++----- tolk/pipe-ast-to-legacy.cpp | 2 + tolk/pipe-constant-folding.cpp | 19 +- tolk/pipe-infer-types-and-calls.cpp | 153 ++++++++++++---- tolk/pipe-optimize-boolean-expr.cpp | 172 ++++++++++++++++++ tolk/pipe-resolve-identifiers.cpp | 3 - tolk/pipeline.h | 1 + tolk/tolk.cpp | 1 + tolk/type-system.cpp | 22 ++- tolk/type-system.h | 18 ++ 33 files changed, 764 insertions(+), 212 deletions(-) create mode 100644 tolk-tester/tests/invalid-typing-10.tolk create mode 100644 tolk-tester/tests/invalid-typing-11.tolk rename tolk-tester/tests/{op_priority.tolk => op-priority.tolk} (73%) create mode 100644 tolk/pipe-optimize-boolean-expr.cpp diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index 03638f0a..3aae9d3e 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -205,7 +205,7 @@ fun stringHash(s: slice): int /// That is, if [hash] is computed as the hash of some data, these data are hashed twice, /// the second hashing occurring inside `CHKSIGNS`. @pure -fun isSignatureValid(hash: int, signature: slice, publicKey: int): int +fun isSignatureValid(hash: int, signature: slice, publicKey: int): bool asm "CHKSIGNU"; /// Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `publicKey`, @@ -214,7 +214,7 @@ fun isSignatureValid(hash: int, signature: slice, publicKey: int): int /// The verification of Ed25519 signatures is the standard one, /// with sha256 used to reduce [data] to the 256-bit number that is actually signed. @pure -fun isSliceSignatureValid(data: slice, signature: slice, publicKey: int): int +fun isSliceSignatureValid(data: slice, signature: slice, publicKey: int): bool asm "CHKSIGNS"; /// Generates a new pseudo-random unsigned 256-bit integer x. @@ -259,14 +259,14 @@ fun randomizeByLogicalTime(): void /// otherwise the computation is aborted before visiting the `(maxCells + 1)`-st cell and /// a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. @pure -fun calculateCellSize(c: cell, maxCells: int): (int, int, int, int) +fun calculateCellSize(c: cell, maxCells: int): (int, int, int, bool) asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; /// Similar to [calculateCellSize], but accepting a `slice` [s] instead of a `cell`. /// The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; /// however, the data bits and the cell references of [s] are accounted for in `y` and `z`. @pure -fun calculateSliceSize(s: slice, maxCells: int): (int, int, int, int) +fun calculateSliceSize(s: slice, maxCells: int): (int, int, int, bool) asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; /// A non-quiet version of [calculateCellSize] that throws a cell overflow exception (`8`) on failure. @@ -382,7 +382,7 @@ fun loadCoins(mutate self: slice): int /// Loads bool (-1 or 0) from a slice @pure -fun loadBool(mutate self: slice): int +fun loadBool(mutate self: slice): bool asm( -> 1 0) "1 LDI"; /// Shifts a slice pointer to [len] bits forward, mutating the slice. @@ -482,7 +482,7 @@ fun storeCoins(mutate self: builder, x: int): self /// Stores bool (-1 or 0) into a builder. /// Attention: true value is `-1`, not 1! If you pass `1` here, TVM will throw an exception. @pure -fun storeBool(mutate self: builder, x: int): self +fun storeBool(mutate self: builder, x: bool): self asm(x self) "1 STI"; /// Stores dictionary (represented by TVM `cell` or `null`) into a builder. @@ -529,22 +529,22 @@ fun getRemainingBitsAndRefsCount(self: slice): (int, int) /// Checks whether a slice is empty (i.e., contains no bits of data and no cell references). @pure -fun isEndOfSlice(self: slice): int +fun isEndOfSlice(self: slice): bool asm "SEMPTY"; /// Checks whether a slice has no bits of data. @pure -fun isEndOfSliceBits(self: slice): int +fun isEndOfSliceBits(self: slice): bool asm "SDEMPTY"; /// Checks whether a slice has no references. @pure -fun isEndOfSliceRefs(self: slice): int +fun isEndOfSliceRefs(self: slice): bool asm "SREMPTY"; /// Checks whether data parts of two slices coinside. @pure -fun isSliceBitsEqual(self: slice, b: slice): int +fun isSliceBitsEqual(self: slice, b: slice): bool asm "SDEQ"; /// Returns the number of cell references already stored in a builder. @@ -621,10 +621,10 @@ fun parseStandardAddress(s: slice): (int, int) fun createAddressNone(): slice asm "b{00} PUSHSLICE"; -/// Returns if a slice pointer contains an empty address (`-1` for true, `0` for false, as always). +/// Returns if a slice pointer contains an empty address. /// In other words, a slice starts with two `0` bits (TL addr_none$00). @pure -fun addressIsNone(s: slice): int +fun addressIsNone(s: slice): bool asm "2 PLDU" "0 EQINT"; @@ -677,8 +677,8 @@ fun loadMessageFlags(mutate self: slice): int /// Having msgFlags (4 bits), check that a message is bounced. /// Effectively, it's `msgFlags & 1` (the lowest bit present). @pure -fun isMessageBounced(msgFlags: int): int - asm "1 PUSHINT" "AND"; +fun isMessageBounced(msgFlags: int): bool + asm "2 PUSHINT" "MODR"; /// Skip 0xFFFFFFFF prefix (when a message is bounced). @pure diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk index 9fba24d9..5c9c3152 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -19,20 +19,20 @@ fun createEmptyDict(): cell /// Checks whether a dictionary is empty. @pure -fun dictIsEmpty(self: cell): int +fun dictIsEmpty(self: cell): bool asm "DICTEMPTY"; @pure -fun iDictGet(self: cell, keyLen: int, key: int): (slice, int) +fun iDictGet(self: cell, keyLen: int, key: int): (slice, bool) asm(key self keyLen) "DICTIGET" "NULLSWAPIFNOT"; @pure -fun uDictGet(self: cell, keyLen: int, key: int): (slice, int) +fun uDictGet(self: cell, keyLen: int, key: int): (slice, bool) asm(key self keyLen) "DICTUGET" "NULLSWAPIFNOT"; @pure -fun sDictGet(self: cell, keyLen: int, key: slice): (slice, int) +fun sDictGet(self: cell, keyLen: int, key: slice): (slice, bool) asm(key self keyLen) "DICTGET" "NULLSWAPIFNOT"; @@ -63,33 +63,33 @@ fun sDictSetRef(mutate self: cell, keyLen: int, key: slice, value: cell): void @pure -fun iDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): int +fun iDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTIADD"; @pure -fun uDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): int +fun uDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTUADD"; @pure -fun iDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): int +fun iDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTIREPLACE"; @pure -fun uDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): int +fun uDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTUREPLACE"; @pure -fun iDictGetRef(self: cell, keyLen: int, key: int): (cell, int) +fun iDictGetRef(self: cell, keyLen: int, key: int): (cell, bool) asm(key self keyLen) "DICTIGETREF" "NULLSWAPIFNOT"; @pure -fun uDictGetRef(self: cell, keyLen: int, key: int): (cell, int) +fun uDictGetRef(self: cell, keyLen: int, key: int): (cell, bool) asm(key self keyLen) "DICTUGETREF" "NULLSWAPIFNOT"; @pure -fun sDictGetRef(self: cell, keyLen: int, key: slice): (cell, int) +fun sDictGetRef(self: cell, keyLen: int, key: slice): (cell, bool) asm(key self keyLen) "DICTGETREF" "NULLSWAPIFNOT"; @@ -107,28 +107,28 @@ fun sDictGetRefOrNull(self: cell, keyLen: int, key: slice): cell @pure -fun iDictDelete(mutate self: cell, keyLen: int, key: int): int +fun iDictDelete(mutate self: cell, keyLen: int, key: int): bool asm(key self keyLen) "DICTIDEL"; @pure -fun uDictDelete(mutate self: cell, keyLen: int, key: int): int +fun uDictDelete(mutate self: cell, keyLen: int, key: int): bool asm(key self keyLen) "DICTUDEL"; @pure -fun sDictDelete(mutate self: cell, keyLen: int, key: slice): int +fun sDictDelete(mutate self: cell, keyLen: int, key: slice): bool asm(key self keyLen) "DICTDEL"; @pure -fun iDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, int) +fun iDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, bool) asm(value key self keyLen) "DICTISETGET" "NULLSWAPIFNOT"; @pure -fun uDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, int) +fun uDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, bool) asm(value key self keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; @pure -fun sDictSetAndGet(mutate self: cell, keyLen: int, key: slice, value: slice): (slice, int) +fun sDictSetAndGet(mutate self: cell, keyLen: int, key: slice, value: slice): (slice, bool) asm(value key self keyLen) "DICTSETGET" "NULLSWAPIFNOT"; @@ -142,15 +142,15 @@ fun uDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cel @pure -fun iDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, int) +fun iDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, bool) asm(key self keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; @pure -fun uDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, int) +fun uDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, bool) asm(key self keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; @pure -fun sDictDeleteAndGet(mutate self: cell, keyLen: int, key: slice): (slice, int) +fun sDictDeleteAndGet(mutate self: cell, keyLen: int, key: slice): (slice, bool) asm(key self keyLen) "DICTDELGET" "NULLSWAPIFNOT"; @@ -168,129 +168,129 @@ fun sDictSetBuilder(mutate self: cell, keyLen: int, key: slice, value: builder): @pure -fun iDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): int +fun iDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTIADDB"; @pure -fun uDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): int +fun uDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTUADDB"; @pure -fun iDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): int +fun iDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTIREPLACEB"; @pure -fun uDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): int +fun uDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTUREPLACEB"; @pure -fun iDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, int) +fun iDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, bool) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; @pure -fun uDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, int) +fun uDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, bool) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; @pure -fun sDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (slice, slice, int) +fun sDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (slice, slice, bool) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; @pure -fun iDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, int) +fun iDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, bool) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; @pure -fun uDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, int) +fun uDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, bool) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; @pure -fun sDictDeleteLastAndGet(mutate self: cell, keyLen: int): (slice, slice, int) +fun sDictDeleteLastAndGet(mutate self: cell, keyLen: int): (slice, slice, bool) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirst(self: cell, keyLen: int): (int, slice, int) +fun iDictGetFirst(self: cell, keyLen: int): (int, slice, bool) asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirst(self: cell, keyLen: int): (int, slice, int) +fun uDictGetFirst(self: cell, keyLen: int): (int, slice, bool) asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirst(self: cell, keyLen: int): (slice, slice, int) +fun sDictGetFirst(self: cell, keyLen: int): (slice, slice, bool) asm (-> 1 0 2) "DICTMIN" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, int) +fun iDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, bool) asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, int) +fun uDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, bool) asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirstAsRef(self: cell, keyLen: int): (slice, cell, int) +fun sDictGetFirstAsRef(self: cell, keyLen: int): (slice, cell, bool) asm (-> 1 0 2) "DICTMINREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetLast(self: cell, keyLen: int): (int, slice, int) +fun iDictGetLast(self: cell, keyLen: int): (int, slice, bool) asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; @pure -fun uDictGetLast(self: cell, keyLen: int): (int, slice, int) +fun uDictGetLast(self: cell, keyLen: int): (int, slice, bool) asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; @pure -fun sDictGetLast(self: cell, keyLen: int): (slice, slice, int) +fun sDictGetLast(self: cell, keyLen: int): (slice, slice, bool) asm (-> 1 0 2) "DICTMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetLastAsRef(self: cell, keyLen: int): (int, cell, int) +fun iDictGetLastAsRef(self: cell, keyLen: int): (int, cell, bool) asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetLastAsRef(self: cell, keyLen: int): (int, cell, int) +fun uDictGetLastAsRef(self: cell, keyLen: int): (int, cell, bool) asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetLastAsRef(self: cell, keyLen: int): (slice, cell, int) +fun sDictGetLastAsRef(self: cell, keyLen: int): (slice, cell, bool) asm (-> 1 0 2) "DICTMAXREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun iDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; @pure -fun uDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun uDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; @pure -fun iDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun iDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun uDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun iDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun uDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun iDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; @@ -299,13 +299,13 @@ fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) */ @pure -fun prefixDictGet(self: cell, keyLen: int, key: slice): (slice, slice, slice, int) +fun prefixDictGet(self: cell, keyLen: int, key: slice): (slice, slice, slice, bool) asm(key self keyLen) "PFXDICTGETQ" "NULLSWAPIFNOT2"; @pure -fun prefixDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): int +fun prefixDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): bool asm(value key self keyLen) "PFXDICTSET"; @pure -fun prefixDictDelete(mutate self: cell, keyLen: int, key: slice): int +fun prefixDictDelete(mutate self: cell, keyLen: int, key: slice): bool asm(key self keyLen) "PFXDICTDEL"; diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index 191bcf08..e374f62b 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -83,7 +83,7 @@ fun test_if_else(x: int): (int, int, int, int, int) { return (x.`~inc`(8), x + 1, x = 1, x <<= 3, x); } else { xx = 9; - return (x, x.`~inc`(-4), x.`~inc`(-1), x >= 1, x = x + xx); + return (x, x.`~inc`(-4), x.`~inc`(-1), (x >= 1) as int, x = x + xx); } } diff --git a/tolk-tester/tests/bit-operators.tolk b/tolk-tester/tests/bit-operators.tolk index 049406af..4cb8e1ba 100644 --- a/tolk-tester/tests/bit-operators.tolk +++ b/tolk-tester/tests/bit-operators.tolk @@ -1,20 +1,20 @@ -fun lshift(): int { +fun lshift(): bool { return (1 << 0) == 1; } -fun rshift(): int { +fun rshift(): bool { return (1 >> 0) == 1; } -fun lshift_var(i: int): int { +fun lshift_var(i: int): bool { return (1 << i) == 1; } -fun rshift_var(i: int): int { +fun rshift_var(i: int): bool { return (1 >> i) == 1; } -fun main(x: int): int { +fun main(x: int): bool { if (x == 0) { return lshift(); } else if (x == 1) { @@ -31,12 +31,71 @@ fun main(x: int): int { } @method_id(11) -fun is_claimed(index: int): int { +fun is_claimed(index: int): bool { var claim_bit_index: int = index % 256; var mask: int = 1 << claim_bit_index; return (255 & mask) == mask; } +@method_id(12) +fun bit_not(i: int, b: bool): (int, bool, bool, bool, int, bool) { + var i2 = ~i; + var b2 = !b; + var (i3: int, b3: bool) = (i2, b2); + return (i3, b3, !i, !b, ~~~i, !!!b); +} + +@method_id(13) +fun boolWithBitwiseConst() { + var found = true; + return (found & false, found | true, found ^ true, found & found); +} + +global g14: int; +fun getBool() { return (g14 += 1) > 2; } + +@method_id(14) +fun boolWithBitwise(b: bool) { + g14 = 0; + return (b & getBool(), !b & getBool(), b | getBool(), !b | getBool(), b ^ getBool(), !b & getBool(), g14); +} + +@method_id(15) +fun boolWithBitwiseSet(b1: bool, b2: bool) { + b1 &= b2; + b2 |= true; + b1 |= b1 == false; + b2 ^= (b1 ^= b2); + return (b1, b2); +} + +@method_id(16) +fun testDoUntilCodegen(i: bool, n: int) { + var cnt = 0; + do { cnt += 1; } while (i); + do { cnt += 1; } while (!!i); + do { cnt += 1; } while (n); + return (cnt, !i, !n); +} + +@method_id(17) +fun testConstNegateCodegen() { + return (!0, !1, !true, !false, !!true, !!false); +} + +@method_id(18) +fun testBoolNegateOptimized(x: bool) { + return (x, !x, !!x, !!!x, !!!!true); +} + +fun eqX(x: bool) { return x; } + +@method_id(19) +fun testBoolCompareOptimized(x: bool) { + return (x == true, x != true, eqX(x) == false, eqX(x) != false, !!(x == !false)); +} + + /** method_id | in | out @@ -50,4 +109,96 @@ fun is_claimed(index: int): int { @testcase | 11 | 1 | -1 @testcase | 11 | 256 | -1 @testcase | 11 | 8 | 0 +@testcase | 12 | 0 0 | -1 -1 -1 -1 -1 -1 +@testcase | 12 | -1 -1 | 0 0 0 0 0 0 +@testcase | 12 | 7 0 | -8 -1 0 -1 -8 -1 +@testcase | 14 | -1 | 0 0 -1 -1 0 0 6 +@testcase | 14 | 0 | 0 0 -1 -1 -1 -1 6 +@testcase | 15 | -1 -1 | 0 -1 +@testcase | 15 | -1 0 | 0 -1 +@testcase | 16 | 0 0 | 3 -1 -1 +@testcase | 17 | | -1 0 0 -1 -1 0 +@testcase | 18 | 0 | 0 -1 0 -1 -1 +@testcase | 18 | -1 | -1 0 -1 0 -1 +@testcase | 19 | 0 | 0 -1 -1 0 0 +@testcase | 19 | -1 | -1 0 0 -1 -1 + +@fif_codegen +""" + boolWithBitwiseConst PROC:<{ + // + 0 PUSHINT // _3 + -1 PUSHINT // _3 _5 + 0 PUSHINT // _3 _5 _7 + -1 PUSHINT // _3 _5 _7 _8 + }> +""" + +@fif_codegen +""" + testDoUntilCodegen PROC:<{ + // i n + 0 PUSHINT // i n cnt=0 + UNTIL:<{ + INC // i n cnt + s2 PUSH // i n cnt i + NOT // i n cnt _6 + }> // i n cnt + UNTIL:<{ + INC // i n cnt + s2 PUSH // i n cnt i + NOT // i n cnt _9 + }> // i n cnt + UNTIL:<{ + INC // i n cnt + OVER // i n cnt n + 0 EQINT // i n cnt _12 + }> // i n cnt + s0 s2 XCHG // cnt n i + NOT // cnt n _13 + SWAP // cnt _13 n + 0 EQINT // cnt _13 _14 + }> +""" + +@fif_codegen +""" + testConstNegateCodegen PROC:<{ + // + TRUE // _0 + FALSE // _0 _1 + FALSE // _0 _1 _2 + TRUE // _0 _1 _2 _3 + TRUE // _0 _1 _2 _3 _4 + FALSE // _0 _1 _2 _3 _4 _5 + }> +""" + +@fif_codegen +""" + testBoolNegateOptimized PROC:<{ + // x + DUP // x x + NOT // x _1 + OVER // x _1 x + NOT // x _1 _2 + s2 s(-1) PUXC + TRUE // x _1 x _2 _3 + }> +""" + +@fif_codegen +""" + testBoolCompareOptimized PROC:<{ + // x + DUP // x x + NOT // x _1 + OVER // x _1 x + eqX CALLDICT // x _1 _2 + NOT // x _1 _3 + s2 PUSH // x _1 _3 x + eqX CALLDICT // x _1 _3 _4 + s3 PUSH // x _1 _3 _4 x + }> +""" */ diff --git a/tolk-tester/tests/c2.tolk b/tolk-tester/tests/c2.tolk index 9b56a9c5..257aba5b 100644 --- a/tolk-tester/tests/c2.tolk +++ b/tolk-tester/tests/c2.tolk @@ -1,6 +1,6 @@ global op: (int, int) -> int; -fun check_assoc(a: int, b: int, c: int): int { +fun check_assoc(a: int, b: int, c: int): bool { return op(op(a, b), c) == op(a, op(b, c)); } @@ -8,8 +8,9 @@ fun unnamed_args(_: int, _: slice, _: int) { return true; } -fun main(x: int, y: int, z: int): int { +fun main(x: int, y: int, z: int): bool { op = `_+_`; + if (0) { return null; } return check_assoc(x, y, z); } diff --git a/tolk-tester/tests/c2_1.tolk b/tolk-tester/tests/c2_1.tolk index fc16b436..ef1e589a 100644 --- a/tolk-tester/tests/c2_1.tolk +++ b/tolk-tester/tests/c2_1.tolk @@ -2,7 +2,7 @@ fun check_assoc(op: (int, int) -> int, a: int, b: int, c: int) { return op(op(a, b), c) == op(a, op(b, c)); } -fun main(x: int, y: int, z: int): int { +fun main(x: int, y: int, z: int): bool { return check_assoc(`_+_`, x, y, z); } diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index adb5ad22..6f316f2e 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -162,8 +162,8 @@ fun test13() { } @method_id(114) -fun test110(x: int) { - var s = beginCell().storeBool(x < 0).storeBool(0).storeBool(x).endCell().beginParse(); +fun test110(x: bool) { + var s = beginCell().storeBool(x == true).storeBool(false).storeBool(x).endCell().beginParse(); return (s.loadBool(), s.loadBool(), s.loadBool()); } @@ -179,15 +179,15 @@ fun test111() { if (s.addressIsNone()) { s.skipBits(2); } - if (s.loadBool() == 0) { - assert(s.loadBool() == 0) throw 444; + if (s.loadBool() == false) { + assert(!s.loadBool()) throw 444; s.skipBouncedPrefix(); } var op2 = s.loadMessageOp(); var q2 = s.loadMessageQueryId(); s.skipBits(64); s.assertEndOfSlice(); - assert(isMessageBounced(0x001)) throw 444; + assert(isMessageBounced(0x001) && !isMessageBounced(0x002)) throw 444; return (op1, q1, op2, q2); } diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk index dc78abf8..e40f0377 100644 --- a/tolk-tester/tests/codegen_check_demo.tolk +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -1,7 +1,7 @@ @method_id(101) fun test1(): int { - var x = false; - if (x == true) { + var x: int = false as int; + if (x == true as int) { x= 100500; } return x; diff --git a/tolk-tester/tests/imports/use-dicts.tolk b/tolk-tester/tests/imports/use-dicts.tolk index 26a9a9cc..c9d5dcfe 100644 --- a/tolk-tester/tests/imports/use-dicts.tolk +++ b/tolk-tester/tests/imports/use-dicts.tolk @@ -11,7 +11,7 @@ fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { fun lookupIdxByValue(idict32: cell, value: int): int { var cur_key = -1; do { - var (cur_key redef, cs: slice, found: int) = idict32.iDictGetNext(32, cur_key); + var (cur_key redef, cs: slice, found: bool) = idict32.iDictGetNext(32, cur_key); // one-line condition (via &) doesn't work, since right side is calculated immediately if (found) { if (cs.loadInt(32) == value) { diff --git a/tolk-tester/tests/invalid-typing-10.tolk b/tolk-tester/tests/invalid-typing-10.tolk new file mode 100644 index 00000000..8c1df4a2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-10.tolk @@ -0,0 +1,8 @@ +fun failMathOnBoolean(c: cell) { + return (null == c) * 10; +} + +/** +@compilation_should_fail +@stderr can not apply operator `*` to `bool` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-11.tolk b/tolk-tester/tests/invalid-typing-11.tolk new file mode 100644 index 00000000..d6aa09c3 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-11.tolk @@ -0,0 +1,11 @@ +fun failBitwiseNotOnBool() { + var eq = 1 == 0; + if (~eq) { + return 0; + } +} + +/** +@compilation_should_fail +@stderr can not apply operator `~` to `bool` + */ diff --git a/tolk-tester/tests/invalid-typing-2.tolk b/tolk-tester/tests/invalid-typing-2.tolk index d7c6745f..052596e4 100644 --- a/tolk-tester/tests/invalid-typing-2.tolk +++ b/tolk-tester/tests/invalid-typing-2.tolk @@ -1,9 +1,9 @@ fun main() { - var tri: (int, bool) = (10, false); + var tri: (int, int) = (10, false); return; } /** @compilation_should_fail -@stderr bool type is not supported yet +@stderr can not assign `(int, bool)` to variable of type `(int, int)` */ diff --git a/tolk-tester/tests/invalid-typing-6.tolk b/tolk-tester/tests/invalid-typing-6.tolk index da0ac9bb..f2e99c7d 100644 --- a/tolk-tester/tests/invalid-typing-6.tolk +++ b/tolk-tester/tests/invalid-typing-6.tolk @@ -4,5 +4,5 @@ fun failWhenTernaryConditionNotInt(cs: slice) { /** @compilation_should_fail -@stderr condition of ternary operator must be an integer +@stderr can not use `slice` as a boolean condition */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index 9e21a968..fb437bb3 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -1,14 +1,14 @@ import "imports/use-dicts.tolk" fun simpleAllConst() { - return (!0, !!0 & !false, !!!0, !1, !!1, !-1, !!-1, (!5 == 0) == !0, !0 == true); + return (!0, !!0 & !false, !!!0, !1, !!1, !-1, !!-1, (!5 as int == 0) == !0, !0 == true); } fun compileTimeEval1(x: int) { // todo now compiler doesn't understand that bool can't be equal to number other than 0/-1 // (but understands that it can't be positive) // that's why for now, the last condition is evaluated at runtime - return (!x, !x > 10, !x < 10, !!x == 5, !x == -10); + return (!x, !x as int > 10, (!x as int) < 10, !!x as int == 5, !x as int == -10); } @method_id(101) @@ -23,13 +23,13 @@ fun withAndOr(x: int, y: int, z: int) { var return_at_end = -1; if (!x & !y) { if (!z & !y) { return 10; } - else if (z | !!y) { return_at_end = 20; } + else if ((z != 0) | !!y) { return_at_end = 20; } } else if (!!x & !!y & !z) { if (!z & (x > 10)) { return_at_end = 30; } if ((x != 11) & !z) { return 40; } return_at_end = 50; } else { - return_at_end = !x ? !y : !z | 1; + return_at_end = !x ? !y as int : (!z as int) | 1; } return return_at_end; } @@ -124,6 +124,31 @@ fun testLogicalOps2(first: int) { return (s.getRemainingBitsCount(), sum); } +@method_id(112) +fun mixLogicalIntsAndBools(first: int, cond: bool) { + return ( + (first && cond) || (!first && cond), + ((first & -1) & cond as int) == ((first && true) && cond) as int, + 7 && cond, + first || cond || !cond || alwaysThrows(), + cond || first || !first || alwaysThrows() + ); +} + +@method_id(113) +fun testConvertIfToIfnot(x: bool) { + assert(!!(x == false), 100); + assert(!x, 100); + if (x == !!false) { + return 1; + } + if (!!(x != !false)) { + return 1; + } + assert(!!x, 100); + return -4; +} + fun main() { } @@ -160,18 +185,21 @@ fun main() { @testcase | 110 | 500 | -1 -1 0 -1 -1 3 @testcase | 111 | 0 | 32 4 @testcase | 111 | -1 | 0 8 +@testcase | 112 | 5 0 | 0 -1 0 -1 -1 +@testcase | 112 | 0 -1 | -1 -1 -1 -1 -1 +@testcase | 113 | 0 | 1 @fif_codegen """ simpleAllConst PROC:<{ // - -1 PUSHINT + TRUE 0 PUSHINT - -1 PUSHINT - 0 PUSHINT - -1 PUSHINT - 0 PUSHINT - -1 PUSHINT + TRUE + FALSE + TRUE + FALSE + TRUE TRUE TRUE }> @@ -293,4 +321,27 @@ These are moments of future optimizations. For now, it's more than enough. }> """ +@fif_codegen +""" + testConvertIfToIfnot PROC:<{ + // x + DUP // x x + 100 THROWIF + DUP // x x + 100 THROWIF + DUP // x x + IFNOTJMP:<{ // x + DROP // + 1 PUSHINT // _7=1 + }> // x + DUP // x x + IFNOTJMP:<{ // x + DROP // + 1 PUSHINT // _8=1 + }> // x + 100 THROWIFNOT + -4 PUSHINT // _12=-4 + }> +""" + */ diff --git a/tolk-tester/tests/no-spaces.tolk b/tolk-tester/tests/no-spaces.tolk index 0d4c3b67..da733898 100644 --- a/tolk-tester/tests/no-spaces.tolk +++ b/tolk-tester/tests/no-spaces.tolk @@ -22,7 +22,7 @@ global `some()var`:int; return `a`*-1*-(1)*---(1)*+just10()+-`just10`()*m1*-m1+-eq(m1)----0x1; } -@method_id(112) fun `bitwise~ops`(flags:int):[int,int] { +@method_id(112) fun `bitwise~ops`(flags:int):[bool,bool] { return[ (just10()-3==just10()-(4)--1)|((2==2)&(eq(eq(10)) -3==just10()--13)), ((flags&0xFF)!=0) diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index c4bd0acc..9ace9995 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -73,7 +73,7 @@ fun test7() { var b = beginCell().storeMaybeRef(null); var s = b.endCell().beginParse(); var c = s.loadMaybeRef(); - return (null == c) * 10 + (b != null); + return (null == c) as int * 10 + (b != null) as int; } fun main() { @@ -139,7 +139,7 @@ fun main() { 10 MULCONST // b _13 SWAP // _13 b ISNULL // _13 _14 - 0 EQINT // _13 _15 + NOT // _13 _15 ADD // _16 }> """ diff --git a/tolk-tester/tests/op_priority.tolk b/tolk-tester/tests/op-priority.tolk similarity index 73% rename from tolk-tester/tests/op_priority.tolk rename to tolk-tester/tests/op-priority.tolk index 95209c99..8a57b394 100644 --- a/tolk-tester/tests/op_priority.tolk +++ b/tolk-tester/tests/op-priority.tolk @@ -1,4 +1,4 @@ -fun justTrue(): int { return true; } +fun justTrue(): bool { return true; } fun unary_minus_1(a: int, b: int, c: int): int{return -(a+b) *c;} fun unary_minus_2(a: int, b: int, c: int): int{return(-(a+b))*c;} @@ -6,17 +6,17 @@ fun unary_minus_3(a: int, b: int, c: int): int{return-((a+b) *c);} @method_id(101) -fun test1(x: int, y: int, z: int): int { +fun test1(x: int, y: int, z: int): bool { return (x > 0) & (y > 0) & (z > 0); } @method_id(102) -fun test2(x: int, y: int, z: int): int { - return x > (0 & (y > 0) & (z > 0)); +fun test2(x: int, y: int, z: int): bool { + return x > (0 & (y > 0) as int & (z > 0) as int); } @method_id(103) -fun test3(x: int, y: int, z: int): int { +fun test3(x: int, y: int, z: int): bool { if ((x < 0) | (y < 0)) { return z < 0; } @@ -24,29 +24,29 @@ fun test3(x: int, y: int, z: int): int { } @method_id(104) -fun test4(x: int, y: int, mode: int): int { +fun test4(x: int, y: int, mode: int): bool { if (mode == 1) { return (x == 10) | (y == 20); } if (mode == 2) { return (x == 10) | (y == 20); } else { - return x == (10 | (y == 20)); + return x == (10 | (y == 20) as int); } } @method_id(105) -fun test5(status: int): int { - return justTrue() & (status == 1) & ((justTrue() & status) == 1); +fun test5(status: int): bool { + return justTrue() & (status == 1) & ((justTrue() as int & status) == 1); } @method_id(106) -fun test6(a: int, b: int, c: int): int { +fun test6(a: int, b: int, c: int): bool { return (unary_minus_1(a,b,c) == unary_minus_2(a,b,c)) & (unary_minus_1(a,b,c) == unary_minus_3(a,b,c)); } @method_id(107) fun test7(b: int): int { - var a = b == 3 ? 3 : b == 4 ? 4 : (b == 5) & 1 ? 5 : 100; + var a = b == 3 ? 3 : b == 4 ? 4 : (b == 5) & true ? 5 : 100; return a; } @@ -56,14 +56,14 @@ fun test8(b: int): int { return a; } -fun `_ 0, 3 & (3 > 0), 3 & (`_<_`(3, 0)), - 3 & `_ 0, 3 & (3 > 0) as int, 3 & (`_<_`(3, 0)), + 3 & `_> bad; // 0^y = 0 if x=0 and y>=0; "out of range" exception otherwise } var (l, s) = log2_aux_f256(x); @@ -677,7 +677,7 @@ fun fixed248_pow(x: int, y: int): int { // now log_2(x^y) = y*log_2(x) = q + ll, ss integer, ll fixed257, -1/2<=ll<1/2 var sq: int = q + 248; if (sq <= 0) { - return -(sq == 0); // underflow + return -((sq == 0) as int); // underflow } y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); return (y ~>> (9 - q)) - (-1 << sq); @@ -986,7 +986,7 @@ fun tset(mutate self: tuple, idx: int, value: X): void // fixed256 acos_prepare_slow(fixed255 x); @inline fun acos_prepare_slow_f255(x: int): int { - x -= (x == 0); + x -= (x == 0) as int; var t: int = 1; repeat (255) { t = t * sign(x) * 2 + 1; // decode Gray code (sign(x_0), sign(x_1), ...) diff --git a/tolk-tester/tests/unbalanced_ret_loops.tolk b/tolk-tester/tests/unbalanced_ret_loops.tolk index 9b59339d..292b48da 100644 --- a/tolk-tester/tests/unbalanced_ret_loops.tolk +++ b/tolk-tester/tests/unbalanced_ret_loops.tolk @@ -38,7 +38,7 @@ fun foo_until(x: int): int { } @method_id(4) -fun test4(x: int): (int, int) { +fun test4(x: int): (int, bool) { var s = 0; var reached = false; do { diff --git a/tolk-tester/tests/w6.tolk b/tolk-tester/tests/w6.tolk index 2f895644..489ffa8c 100644 --- a/tolk-tester/tests/w6.tolk +++ b/tolk-tester/tests/w6.tolk @@ -6,7 +6,7 @@ fun main(x: int): int { if (i > 5) { return 1; } - var f: int = (i * i == 64); + var f: bool = (i * i == 64); } while (!f); return -1; } diff --git a/tolk-tester/tests/w7.tolk b/tolk-tester/tests/w7.tolk index 85081fbb..3d68c775 100644 --- a/tolk-tester/tests/w7.tolk +++ b/tolk-tester/tests/w7.tolk @@ -4,7 +4,7 @@ fun test(y: int): int { if (y > 0) { return 1; } - return x > 0; + return x > 0 ? -1 : 0; } @method_id(2) diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 2ee69686..9d720024 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -18,6 +18,7 @@ set(TOLK_SOURCE pipe-check-rvalue-lvalue.cpp pipe-check-pure-impure.cpp pipe-constant-folding.cpp + pipe-optimize-boolean-expr.cpp pipe-ast-to-legacy.cpp pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 767e6066..58592011 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -673,25 +673,20 @@ static AnyV parse_return_statement(Lexer& lex) { return createV(loc, child); } -static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { +static AnyV parse_if_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_if, "`if`"); lex.expect(tok_oppar, "`(`"); AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); - // replace if(!expr) with ifnot(expr) (this should be done later, but for now, let this be right at parsing time) - if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { - is_ifnot = !is_ifnot; - cond = v_not->get_rhs(); - } V if_body = parse_sequence(lex); V else_body = nullptr; if (lex.tok() == tok_else) { // else if(e) { } or else { } lex.next(); if (lex.tok() == tok_if) { - AnyV v_inner_if = parse_if_statement(lex, false); + AnyV v_inner_if = parse_if_statement(lex); else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); } else { else_body = parse_sequence(lex); @@ -699,7 +694,7 @@ static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { } else { // no 'else', create empty block else_body = createV(lex.cur_location(), lex.cur_location(), {}); } - return createV(loc, is_ifnot, cond, if_body, else_body); + return createV(loc, false, cond, if_body, else_body); } static AnyV parse_repeat_statement(Lexer& lex) { @@ -838,7 +833,7 @@ AnyV parse_statement(Lexer& lex) { case tok_return: return parse_return_statement(lex); case tok_if: - return parse_if_statement(lex, false); + return parse_if_statement(lex); case tok_repeat: return parse_repeat_statement(lex); case tok_do: diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 68c3b998..d704ec4d 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -475,7 +475,7 @@ AsmOp compile_unary_plus(std::vector& res, std::vector& args return AsmOp::Nop(); } -AsmOp compile_logical_not(std::vector& res, std::vector& args, SrcLocation where) { +AsmOp compile_logical_not(std::vector& res, std::vector& args, SrcLocation where, bool for_int_arg) { tolk_assert(res.size() == 1 && args.size() == 1); VarDescr &r = res[0], &x = args[0]; if (x.is_int_const()) { @@ -484,7 +484,9 @@ AsmOp compile_logical_not(std::vector& res, std::vector& arg return push_const(r.int_const); } r.val = VarDescr::ValBool; - return exec_op("0 EQINT", 1); + // for integers, `!var` is `var != 0` + // for booleans, `!var` can be shortened to `~var` (works the same for 0/-1 but consumes less) + return for_int_arg ? exec_op("0 EQINT", 1) : exec_op("NOT", 1); } AsmOp compile_bitwise_and(std::vector& res, std::vector& args, SrcLocation where) { @@ -1047,7 +1049,7 @@ AsmOp compile_fetch_slice(std::vector& res, std::vector& arg return exec_op(fetch ? "LDSLICEX" : "PLDSLICEX", 2, 1 + (unsigned)fetch); } -// fun at(t: tuple, index: int): X asm "INDEXVAR"; +// fun tupleAt(t: tuple, index: int): X asm "INDEXVAR"; AsmOp compile_tuple_at(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(args.size() == 2 && res.size() == 1); auto& y = args[1]; @@ -1058,7 +1060,7 @@ AsmOp compile_tuple_at(std::vector& res, std::vector& args, return exec_op("INDEXVAR", 2, 1); } -// fun __isNull(X arg): int +// fun __isNull(X arg): bool AsmOp compile_is_null(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(args.size() == 1 && res.size() == 1); res[0].val = VarDescr::ValBool; @@ -1071,6 +1073,7 @@ void define_builtins() { TypePtr Unit = TypeDataVoid::create(); TypePtr Int = TypeDataInt::create(); + TypePtr Bool = TypeDataBool::create(); TypePtr Slice = TypeDataSlice::create(); TypePtr Builder = TypeDataBuilder::create(); TypePtr Tuple = TypeDataTuple::create(); @@ -1085,18 +1088,36 @@ void define_builtins() { std::vector ParamsInt3 = {Int, Int, Int}; std::vector ParamsSliceInt = {Slice, Int}; - define_builtin_func("_+_", ParamsInt2, Int, nullptr, - compile_add, - FunctionData::flagMarkedAsPure); - define_builtin_func("_-_", ParamsInt2, Int, nullptr, - compile_sub, - FunctionData::flagMarkedAsPure); + // builtin operators + // they are internally stored as functions, because at IR level, there is no difference + // between calling `userAdd(a,b)` and `_+_(a,b)` + // since they are registered in a global symtable, technically, they can even be referenced from Tolk code, + // though it's a "hidden feature" and won't work well for overloads (`==` for int and bool, for example) + + // unary operators define_builtin_func("-_", ParamsInt1, Int, nullptr, compile_unary_minus, FunctionData::flagMarkedAsPure); define_builtin_func("+_", ParamsInt1, Int, nullptr, compile_unary_plus, FunctionData::flagMarkedAsPure); + define_builtin_func("!_", ParamsInt1, Bool, nullptr, + std::bind(compile_logical_not, _1, _2, _3, true), + FunctionData::flagMarkedAsPure); + define_builtin_func("!b_", {Bool}, Bool, nullptr, // "overloaded" separate version for bool + std::bind(compile_logical_not, _1, _2, _3, false), + FunctionData::flagMarkedAsPure); + define_builtin_func("~_", ParamsInt1, Int, nullptr, + compile_bitwise_not, + FunctionData::flagMarkedAsPure); + + // binary operators + define_builtin_func("_+_", ParamsInt2, Int, nullptr, + compile_add, + FunctionData::flagMarkedAsPure); + define_builtin_func("_-_", ParamsInt2, Int, nullptr, + compile_sub, + FunctionData::flagMarkedAsPure); define_builtin_func("_*_", ParamsInt2, Int, nullptr, compile_mul, FunctionData::flagMarkedAsPure); @@ -1124,25 +1145,19 @@ void define_builtins() { define_builtin_func("_^>>_", ParamsInt2, Int, nullptr, std::bind(compile_rshift, _1, _2, _3, 1), FunctionData::flagMarkedAsPure); - define_builtin_func("!_", ParamsInt1, Int, nullptr, - compile_logical_not, - FunctionData::flagMarkedAsPure); - define_builtin_func("~_", ParamsInt1, Int, nullptr, - compile_bitwise_not, - FunctionData::flagMarkedAsPure); - define_builtin_func("_&_", ParamsInt2, Int, nullptr, + define_builtin_func("_&_", ParamsInt2, Int, nullptr, // also works for bool compile_bitwise_and, FunctionData::flagMarkedAsPure); - define_builtin_func("_|_", ParamsInt2, Int, nullptr, + define_builtin_func("_|_", ParamsInt2, Int, nullptr, // also works for bool compile_bitwise_or, FunctionData::flagMarkedAsPure); - define_builtin_func("_^_", ParamsInt2, Int, nullptr, + define_builtin_func("_^_", ParamsInt2, Int, nullptr, // also works for bool compile_bitwise_xor, FunctionData::flagMarkedAsPure); - define_builtin_func("_==_", ParamsInt2, Int, nullptr, + define_builtin_func("_==_", ParamsInt2, Int, nullptr, // also works for bool std::bind(compile_cmp_int, _1, _2, 2), FunctionData::flagMarkedAsPure); - define_builtin_func("_!=_", ParamsInt2, Int, nullptr, + define_builtin_func("_!=_", ParamsInt2, Int, nullptr, // also works for bool std::bind(compile_cmp_int, _1, _2, 5), FunctionData::flagMarkedAsPure); define_builtin_func("_<_", ParamsInt2, Int, nullptr, @@ -1160,6 +1175,33 @@ void define_builtins() { define_builtin_func("_<=>_", ParamsInt2, Int, nullptr, std::bind(compile_cmp_int, _1, _2, 7), FunctionData::flagMarkedAsPure); + + // special function used for internal compilation of some lexical constructs + // for example, `throw 123;` is actually calling `__throw(123)` + define_builtin_func("__true", {}, Bool, nullptr, /* AsmOp::Const("TRUE") */ + std::bind(compile_bool_const, _1, _2, true), + FunctionData::flagMarkedAsPure); + define_builtin_func("__false", {}, Bool, nullptr, /* AsmOp::Const("FALSE") */ + std::bind(compile_bool_const, _1, _2, false), + FunctionData::flagMarkedAsPure); + define_builtin_func("__null", {}, typeT, declGenericT, + AsmOp::Const("PUSHNULL"), + FunctionData::flagMarkedAsPure); + define_builtin_func("__isNull", {typeT}, Bool, declGenericT, + compile_is_null, + FunctionData::flagMarkedAsPure); + define_builtin_func("__throw", ParamsInt1, Unit, nullptr, + compile_throw, + 0); + define_builtin_func("__throw_arg", {typeT, Int}, Unit, declGenericT, + compile_throw_arg, + 0); + define_builtin_func("__throw_if_unless", ParamsInt3, Unit, nullptr, + compile_throw_if_unless, + 0); + + // functions from stdlib marked as `builtin`, implemented at compiler level for optimizations + // (for example, `loadInt(1)` is `1 LDI`, but `loadInt(n)` for non-constant requires it be on a stack and `LDIX`) define_builtin_func("mulDivFloor", ParamsInt3, Int, nullptr, std::bind(compile_muldiv, _1, _2, _3, -1), FunctionData::flagMarkedAsPure); @@ -1172,27 +1214,6 @@ void define_builtins() { define_builtin_func("mulDivMod", ParamsInt3, TypeDataTensor::create({Int, Int}), nullptr, AsmOp::Custom("MULDIVMOD", 3, 2), FunctionData::flagMarkedAsPure); - define_builtin_func("__true", {}, Int, nullptr, /* AsmOp::Const("TRUE") */ - std::bind(compile_bool_const, _1, _2, true), - FunctionData::flagMarkedAsPure); - define_builtin_func("__false", {}, Int, nullptr, /* AsmOp::Const("FALSE") */ - std::bind(compile_bool_const, _1, _2, false), - FunctionData::flagMarkedAsPure); - define_builtin_func("__null", {}, typeT, declGenericT, - AsmOp::Const("PUSHNULL"), - FunctionData::flagMarkedAsPure); - define_builtin_func("__isNull", {typeT}, Int, declGenericT, - compile_is_null, - FunctionData::flagMarkedAsPure); - define_builtin_func("__throw", ParamsInt1, Unit, nullptr, - compile_throw, - 0); - define_builtin_func("__throw_arg", {typeT, Int}, Unit, declGenericT, - compile_throw_arg, - 0); - define_builtin_func("__throw_if_unless", ParamsInt3, Unit, nullptr, - compile_throw_if_unless, - 0); define_builtin_func("loadInt", ParamsSliceInt, Int, nullptr, std::bind(compile_fetch_int, _1, _2, true, true), FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 77dbee41..d60bb8b3 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -586,6 +586,8 @@ static void process_do_while_statement(V v, CodeBlob& co until_cond = createV(cond->loc, "<", tok_lt, v_geq->get_lhs(), v_geq->get_rhs()); } else if (auto v_gt = cond->try_as(); v_gt && v_gt->tok == tok_gt) { until_cond = createV(cond->loc, "<=", tok_geq, v_gt->get_lhs(), v_gt->get_rhs()); + } else if (cond->inferred_type == TypeDataBool::create()) { + until_cond = createV(cond->loc, "!b", tok_logical_not, cond); } else { until_cond = createV(cond->loc, "!", tok_logical_not, cond); } diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp index 4090d247..98996c28 100644 --- a/tolk/pipe-constant-folding.cpp +++ b/tolk/pipe-constant-folding.cpp @@ -23,7 +23,8 @@ * This pipe is supposed to do constant folding, like replacing `2 + 3` with `5`. * It happens after type inferring and validity checks, one of the last ones. * - * Currently, it just replaces `-1` (ast_unary_operator ast_int_const) with a number -1. + * Currently, it just replaces `-1` (ast_unary_operator ast_int_const) with a number -1 + * and `!true` with false. * More rich constant folding should be done some day, but even without this, IR optimizations * (operating low-level stack variables) pretty manage to do all related optimizations. * Constant folding in the future, done at AST level, just would slightly reduce amount of work for optimizer. @@ -39,6 +40,13 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { return v_int; } + static V create_bool_const(SrcLocation loc, bool bool_val) { + auto v_bool = createV(loc, bool_val); + v_bool->assign_inferred_type(TypeDataBool::create()); + v_bool->assign_rvalue_true(); + return v_bool; + } + AnyExprV replace(V v) override { parent::replace(v); @@ -58,6 +66,15 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { return v->get_rhs(); } + // `!true` / `!false` + if (t == tok_logical_not && v->get_rhs()->type == ast_bool_const) { + return create_bool_const(v->loc, !v->get_rhs()->as()->bool_val); + } + // `!0` + if (t == tok_logical_not && v->get_rhs()->type == ast_int_const) { + return create_bool_const(v->loc, v->get_rhs()->as()->intval == 0); + } + return v; } diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index 1d6fbcb0..d8a7d41b 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -110,6 +110,20 @@ static void fire_error_assign_always_null_to_variable(SrcLocation loc, const Loc throw ParseError(loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); } +// fire an error on `!cell` / `+slice` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV unary_expr) { + std::string op = static_cast(operator_name); + throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(unary_expr->inferred_type)); +} + +// fire an error on `int + cell` / `slice & int` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { + std::string op = static_cast(operator_name); + throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type)); +} + // check correctness of called arguments counts and their type matching static void check_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { int delta_self = lhs_of_dot_call ? 1 : 0; @@ -345,6 +359,10 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { return v_inferred->inferred_type == TypeDataInt::create(); } + static bool expect_boolean(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataBool::create(); + } + static void infer_int_const(V v) { assign_inferred_type(v, TypeDataInt::create()); } @@ -358,8 +376,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } static void infer_bool_const(V v) { - // currently, Tolk has no `bool` type; `true` and `false` are integers (-1 and 0) - assign_inferred_type(v, TypeDataInt::create()); + assign_inferred_type(v, TypeDataBool::create()); } static void infer_local_vars_declaration(V) { @@ -544,8 +561,23 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // almost all operators implementation is hardcoded by built-in functions `_+_` and similar std::string_view builtin_func = v->operator_name; // "+" for operator += - if (!expect_integer(lhs) || !expect_integer(rhs)) { - v->error("can not apply operator `" + static_cast(v->operator_name) + "` to " + to_string(lhs) + " and " + to_string(rhs)); + switch (v->tok) { + // &= |= ^= are "overloaded" both for integers and booleans, (int &= bool) is NOT allowed + case tok_set_bitwise_and: + case tok_set_bitwise_or: + case tok_set_bitwise_xor: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + break; + } + // others are mathematical: += *= ... + default: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } } assign_inferred_type(v, lhs); @@ -563,10 +595,26 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // all operators implementation is hardcoded by built-in functions `~_` and similar std::string_view builtin_func = v->operator_name; - if (!expect_integer(rhs)) { - v->error("can not apply operator `" + static_cast(v->operator_name) + "` to " + to_string(rhs)); + switch (v->tok) { + case tok_minus: + case tok_plus: + case tok_bitwise_not: + if (!expect_integer(rhs)) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); + } + assign_inferred_type(v, TypeDataInt::create()); + break; + case tok_logical_not: + if (expect_boolean(rhs)) { + builtin_func = "!b"; // "overloaded" for bool + } else if (!expect_integer(rhs)) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); + } + assign_inferred_type(v, TypeDataBool::create()); + break; + default: + tolk_assert(false); } - assign_inferred_type(v, TypeDataInt::create()); if (!builtin_func.empty()) { const FunctionData* builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->as(); @@ -587,26 +635,59 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { switch (v->tok) { // == != can compare both integers and booleans, (int == bool) is NOT allowed case tok_eq: - case tok_neq: + case tok_neq: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + if (lhs->inferred_type == rhs->inferred_type) { // compare slice with slice + v->error("type " + to_string(lhs) + " can not be compared with `== !=`"); + } else { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + } + assign_inferred_type(v, TypeDataBool::create()); + break; + } + // < > can compare only integers + case tok_lt: + case tok_gt: + case tok_leq: + case tok_geq: case tok_spaceship: { if (!expect_integer(lhs) || !expect_integer(rhs)) { - v->error("comparison operators `== !=` can compare only integers, got " + to_string(lhs) + " and " + to_string(rhs)); + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); } - assign_inferred_type(v, TypeDataInt::create()); + assign_inferred_type(v, TypeDataBool::create()); break; } + // & | ^ are "overloaded" both for integers and booleans, (int & bool) is NOT allowed + case tok_bitwise_and: + case tok_bitwise_or: + case tok_bitwise_xor: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + assign_inferred_type(v, rhs); // (int & int) is int, (bool & bool) is bool + break; + } + // && || can work with integers and booleans, (int && bool) is allowed case tok_logical_and: case tok_logical_or: { - if (!expect_integer(lhs) || !expect_integer(rhs)) { - v->error("logical operators `&& ||` expect integer operands, got " + to_string(lhs) + " and " + to_string(rhs)); + bool lhs_ok = expect_integer(lhs) || expect_boolean(lhs); + bool rhs_ok = expect_integer(rhs) || expect_boolean(rhs); + if (!lhs_ok || !rhs_ok) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); } - assign_inferred_type(v, TypeDataInt::create()); - builtin_func = {}; + assign_inferred_type(v, TypeDataBool::create()); + builtin_func = {}; // no built-in functions, logical operators are expressed as IFs at IR level break; } + // others are mathematical: + * ... default: if (!expect_integer(lhs) || !expect_integer(rhs)) { - v->error("can not apply operator `" + static_cast(v->operator_name) + "` to " + to_string(lhs) + " and " + to_string(rhs)); + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); } assign_inferred_type(v, TypeDataInt::create()); } @@ -619,9 +700,10 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } void infer_ternary_operator(V v, TypePtr hint) { - infer_any_expr(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of ternary operator must be an integer, got " + to_string(v->get_cond())); + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); } infer_any_expr(v->get_when_true(), hint); infer_any_expr(v->get_when_false(), hint); @@ -983,35 +1065,39 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } void process_if_statement(V v) { - infer_any_expr(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of `if` must be an integer, got " + to_string(v->get_cond())); + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); } process_any_statement(v->get_if_body()); process_any_statement(v->get_else_body()); } void process_repeat_statement(V v) { - infer_any_expr(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of `repeat` must be an integer, got " + to_string(v->get_cond())); + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond)) { + cond->error("condition of `repeat` must be an integer, got " + to_string(cond)); } process_any_statement(v->get_body()); } void process_while_statement(V v) { - infer_any_expr(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of `while` must be an integer, got " + to_string(v->get_cond())); + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); } process_any_statement(v->get_body()); } void process_do_while_statement(V v) { process_any_statement(v->get_body()); - infer_any_expr(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of `while` must be an integer, got " + to_string(v->get_cond())); + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); } } @@ -1027,9 +1113,10 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } void process_assert_statement(V v) { - infer_any_expr(v->get_cond()); - if (!expect_integer(v->get_cond())) { - v->get_cond()->error("condition of `assert` must be an integer, got " + to_string(v->get_cond())); + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); } infer_any_expr(v->get_thrown_code()); if (!expect_integer(v->get_thrown_code())) { diff --git a/tolk/pipe-optimize-boolean-expr.cpp b/tolk/pipe-optimize-boolean-expr.cpp new file mode 100644 index 00000000..03750256 --- /dev/null +++ b/tolk/pipe-optimize-boolean-expr.cpp @@ -0,0 +1,172 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-replacer.h" +#include "type-system.h" + +/* + * This pipe does some optimizations related to booleans. + * It happens after type inferring, when we know types of all expressions. + * + * Example: `boolVar == true` -> `boolVar`. + * Example: `!!boolVar` -> `boolVar`. + * Also in unwraps parenthesis inside if condition and similar: `assert(((x)), 404)` -> `assert(x, 404)` + * + * todo some day, replace && || with & | when it's safe (currently, && always produces IFs in Fift) + * It's tricky to implement whether replacing is safe. + * For example, safe: `a > 0 && a < 10` / `a != 3 && a != 5` + * For example, unsafe: `cached && calc()` / `a > 0 && log(a)` / `b != 0 && a / b > 1` / `i >= 0 && arr[idx]` / `f != null && close(f)` + */ + +namespace tolk { + +static AnyExprV unwrap_parenthesis(AnyExprV v) { + while (v->type == ast_parenthesized_expression) { + v = v->as()->get_expr(); + } + return v; +} + +struct OptimizerBooleanExpressionsReplacer final : ASTReplacerInFunctionBody { + static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { + auto v_int = createV(loc, std::move(intval), {}); + v_int->assign_inferred_type(TypeDataInt::create()); + v_int->assign_rvalue_true(); + return v_int; + } + + static V create_bool_const(SrcLocation loc, bool bool_val) { + auto v_bool = createV(loc, bool_val); + v_bool->assign_inferred_type(TypeDataInt::create()); + v_bool->assign_rvalue_true(); + return v_bool; + } + + static V create_logical_not_for_bool(SrcLocation loc, AnyExprV rhs) { + auto v_not = createV(loc, "!", tok_logical_not, rhs); + v_not->assign_inferred_type(TypeDataBool::create()); + v_not->assign_rvalue_true(); + v_not->assign_fun_ref(lookup_global_symbol("!b_")->as()); + return v_not; + } + +protected: + + AnyExprV replace(V v) override { + parent::replace(v); + + if (v->tok == tok_logical_not) { + if (auto inner_not = v->get_rhs()->try_as(); inner_not && inner_not->tok == tok_logical_not) { + AnyExprV cond_not_not = inner_not->get_rhs(); + // `!!boolVar` => `boolVar` + if (cond_not_not->inferred_type == TypeDataBool::create()) { + return cond_not_not; + } + // `!!intVar` => `intVar != 0` + if (cond_not_not->inferred_type == TypeDataInt::create()) { + auto v_zero = create_int_const(v->loc, td::make_refint(0)); + auto v_neq = createV(v->loc, "!=", tok_neq, cond_not_not, v_zero); + v_neq->mutate()->assign_rvalue_true(); + v_neq->mutate()->assign_inferred_type(TypeDataBool::create()); + v_neq->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); + return v_neq; + } + } + if (auto inner_bool = v->get_rhs()->try_as()) { + // `!true` / `!false` + return create_bool_const(v->loc, !inner_bool->bool_val); + } + } + + return v; + } + + AnyExprV replace(V v) override { + parent::replace(v); + + if (v->tok == tok_eq || v->tok == tok_neq) { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + if (lhs->inferred_type == TypeDataBool::create() && rhs->type == ast_bool_const) { + // `boolVar == true` / `boolVar != false` + if (rhs->as()->bool_val ^ (v->tok == tok_neq)) { + return lhs; + } + // `boolVar != true` / `boolVar == false` + return create_logical_not_for_bool(v->loc, lhs); + } + } + + return v; + } + + AnyV replace(V v) override { + parent::replace(v); + if (v->get_cond()->type == ast_parenthesized_expression) { + v = createV(v->loc, v->is_ifnot, unwrap_parenthesis(v->get_cond()), v->get_if_body(), v->get_else_body()); + } + + // `if (!x)` -> ifnot(x) + while (auto v_cond_unary = v->get_cond()->try_as()) { + if (v_cond_unary->tok != tok_logical_not) { + break; + } + v = createV(v->loc, !v->is_ifnot, v_cond_unary->get_rhs(), v->get_if_body(), v->get_else_body()); + } + + return v; + } + + AnyV replace(V v) override { + parent::replace(v); + + if (v->get_cond()->type == ast_parenthesized_expression) { + v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_body()); + } + return v; + } + + AnyV replace(V v) override { + parent::replace(v); + + if (v->get_cond()->type == ast_parenthesized_expression) { + v = createV(v->loc, v->get_body(), unwrap_parenthesis(v->get_cond())); + } + return v; + } + + AnyV replace(V v) override { + parent::replace(v); + + if (v->get_cond()->type == ast_parenthesized_expression) { + v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_thrown_code()); + } + return v; + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } +}; + +void pipeline_optimize_boolean_expressions() { + replace_ast_of_all_functions(); +} + +} // namespace tolk diff --git a/tolk/pipe-resolve-identifiers.cpp b/tolk/pipe-resolve-identifiers.cpp index 299f33be..03b23c3c 100644 --- a/tolk/pipe-resolve-identifiers.cpp +++ b/tolk/pipe-resolve-identifiers.cpp @@ -150,9 +150,6 @@ struct TypeDataResolver { if (un->text == "self") { throw ParseError(un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); } - if (un->text == "bool") { - throw ParseError(un->loc, "bool type is not supported yet"); - } fire_error_unknown_type_name(un->loc, un->text); } return child; diff --git a/tolk/pipeline.h b/tolk/pipeline.h index afea66d6..6aec2b5e 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -41,6 +41,7 @@ void pipeline_refine_lvalue_for_mutate_arguments(); void pipeline_check_rvalue_lvalue(); void pipeline_check_pure_impure_operations(); void pipeline_constant_folding(); +void pipeline_optimize_boolean_expressions(); void pipeline_convert_ast_to_legacy_Expr_Op(); void pipeline_find_unused_symbols(); diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index d1159d3c..cc867c52 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -64,6 +64,7 @@ int tolk_proceed(const std::string &entrypoint_filename) { pipeline_check_rvalue_lvalue(); pipeline_check_pure_impure_operations(); pipeline_constant_folding(); + pipeline_optimize_boolean_expressions(); pipeline_convert_ast_to_legacy_Expr_Op(); pipeline_find_unused_symbols(); diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp index 31f84e75..b21bd0ee 100644 --- a/tolk/type-system.cpp +++ b/tolk/type-system.cpp @@ -76,6 +76,7 @@ public: std::unordered_map TypeDataTypeIdCalculation::all_unique_occurred_types; TypePtr TypeDataInt::singleton; +TypePtr TypeDataBool::singleton; TypePtr TypeDataCell::singleton; TypePtr TypeDataSlice::singleton; TypePtr TypeDataBuilder::singleton; @@ -87,6 +88,7 @@ TypePtr TypeDataVoid::singleton; void type_system_init() { TypeDataInt::singleton = new TypeDataInt; + TypeDataBool::singleton = new TypeDataBool; TypeDataCell::singleton = new TypeDataCell; TypeDataSlice::singleton = new TypeDataSlice; TypeDataBuilder::singleton = new TypeDataBuilder; @@ -330,6 +332,16 @@ bool TypeDataInt::can_rhs_be_assigned(TypePtr rhs) const { return false; } +bool TypeDataBool::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + bool TypeDataCell::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; @@ -446,6 +458,10 @@ bool TypeDataInt::can_be_casted_with_as_operator(TypePtr cast_to) const { return cast_to == this; } +bool TypeDataBool::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this || cast_to == TypeDataInt::create(); +} + bool TypeDataCell::can_be_casted_with_as_operator(TypePtr cast_to) const { return cast_to == this; } @@ -468,7 +484,7 @@ bool TypeDataContinuation::can_be_casted_with_as_operator(TypePtr cast_to) const bool TypeDataNullLiteral::can_be_casted_with_as_operator(TypePtr cast_to) const { return cast_to == this - || cast_to == TypeDataInt::create() || cast_to == TypeDataCell::create() || cast_to == TypeDataSlice::create() + || cast_to == TypeDataInt::create() || cast_to == TypeDataBool::create() || cast_to == TypeDataCell::create() || cast_to == TypeDataSlice::create() || cast_to == TypeDataBuilder::create() || cast_to == TypeDataContinuation::create() || cast_to == TypeDataTuple::create() || cast_to->try_as(); } @@ -593,6 +609,9 @@ static TypePtr parse_simple_type(Lexer& lex) { case tok_int: lex.next(); return TypeDataInt::create(); + case tok_bool: + lex.next(); + return TypeDataBool::create(); case tok_cell: lex.next(); return TypeDataCell::create(); @@ -614,7 +633,6 @@ static TypePtr parse_simple_type(Lexer& lex) { case tok_void: lex.next(); return TypeDataVoid::create(); - case tok_bool: case tok_self: case tok_identifier: { SrcLocation loc = lex.cur_location(); diff --git a/tolk/type-system.h b/tolk/type-system.h index 2805bb34..13c0e4b0 100644 --- a/tolk/type-system.h +++ b/tolk/type-system.h @@ -120,6 +120,24 @@ public: bool can_be_casted_with_as_operator(TypePtr cast_to) const override; }; +/* + * `bool` is TypeDataBool. TVM has no bool, only integers. Under the hood, -1 is true, 0 is false. + * From the type system point of view, int and bool are different, not-autocastable types. + */ +class TypeDataBool final : public TypeData { + TypeDataBool() : TypeData(2ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "bool"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + /* * `cell` is TypeDataCell, representation of TVM cell. */ From 2997c027a26c73820d1fe76fe2a39aed25bf94eb Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 13 Jan 2025 15:37:00 +0700 Subject: [PATCH 18/61] [Tolk] Bump version to v0.7 Totally, v0.7 will include: - AST-level semantic kernel, transform AST to Ops directly - fully rewritten type system, drop Hindley-Milner - `bool` type support --- crypto/smartcont/tolk-stdlib/common.tolk | 2 +- crypto/smartcont/tolk-stdlib/gas-payments.tolk | 2 +- crypto/smartcont/tolk-stdlib/lisp-lists.tolk | 2 +- crypto/smartcont/tolk-stdlib/tvm-dicts.tolk | 2 +- crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk | 2 +- tolk/tolk-version.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index 3aae9d3e..46068a20 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -1,7 +1,7 @@ // Standard library for Tolk (LGPL licence). // It contains common functions that are available out of the box, the user doesn't have to import anything. // More specific functions are required to be imported explicitly, like "@stdlib/tvm-dicts". -tolk 0.6 +tolk 0.7 /** Tuple manipulation primitives. diff --git a/crypto/smartcont/tolk-stdlib/gas-payments.tolk b/crypto/smartcont/tolk-stdlib/gas-payments.tolk index 1965cc6a..83893354 100644 --- a/crypto/smartcont/tolk-stdlib/gas-payments.tolk +++ b/crypto/smartcont/tolk-stdlib/gas-payments.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.6 +tolk 0.7 /** Gas and payment related primitives. diff --git a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk index f7a72191..429f0cbf 100644 --- a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk +++ b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.6 +tolk 0.7 /** Lisp-style lists are nested 2-elements tuples: `(1, (2, (3, null)))` represents list `[1, 2, 3]`. diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk index 5c9c3152..a47fe542 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.6 +tolk 0.7 /** Dictionaries are represented as `cell` data type (cells can store anything, dicts in particular). diff --git a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk index 91b35f2b..ef7c2afe 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.6 +tolk 0.7 /// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. /// The primitive returns the current value of `c3`. diff --git a/tolk/tolk-version.h b/tolk/tolk-version.h index 6e5b764c..7eaf55a7 100644 --- a/tolk/tolk-version.h +++ b/tolk/tolk-version.h @@ -18,6 +18,6 @@ namespace tolk { -constexpr const char* TOLK_VERSION = "0.6.0"; +constexpr const char* TOLK_VERSION = "0.7.0"; } // namespace tolk From 710514b8f1dbbace316b4677c08121e6d7f776fb Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 16 Jan 2025 06:42:05 +0000 Subject: [PATCH 19/61] Validate Merkle proofs and updates in TLB validate (#1479) * Validate Merkle proofs and updates in TLB validate * Fix out-of-bound access in tl_jni_object.cpp --- crypto/block/block.tlb | 2 +- crypto/tl/tlbc-gen-cpp.cpp | 2 +- crypto/tl/tlblib.cpp | 8 +++++++- tl/tl/tl_jni_object.cpp | 3 ++- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/crypto/block/block.tlb b/crypto/block/block.tlb index 560433bf..b8b40827 100644 --- a/crypto/block/block.tlb +++ b/crypto/block/block.tlb @@ -296,7 +296,7 @@ transaction$0111 account_addr:bits256 lt:uint64 total_fees:CurrencyCollection state_update:^(HASH_UPDATE Account) description:^TransactionDescr = Transaction; -!merkle_update#02 {X:Type} old_hash:bits256 new_hash:bits256 +!merkle_update#04 {X:Type} old_hash:bits256 new_hash:bits256 old_depth:uint16 new_depth:uint16 old:^X new:^X = MERKLE_UPDATE X; update_hashes#72 {X:Type} old_hash:bits256 new_hash:bits256 = HASH_UPDATE X; diff --git a/crypto/tl/tlbc-gen-cpp.cpp b/crypto/tl/tlbc-gen-cpp.cpp index 55b4a1c0..5730f169 100644 --- a/crypto/tl/tlbc-gen-cpp.cpp +++ b/crypto/tl/tlbc-gen-cpp.cpp @@ -2074,7 +2074,7 @@ void CppTypeCode::generate_skip_field(const Constructor& constr, const Field& fi output_cpp_expr(ss, expr, 100); ss << '.'; } - ss << "validate_skip_ref(ops, cs, weak)" << tail; + ss << "validate_skip_ref(ops, cs, " << (constr.is_special ? "true" : "weak") << ")" << tail; actions += Action{ss.str()}; } diff --git a/crypto/tl/tlblib.cpp b/crypto/tl/tlblib.cpp index 0e0e5626..ee05d371 100644 --- a/crypto/tl/tlblib.cpp +++ b/crypto/tl/tlblib.cpp @@ -133,7 +133,13 @@ bool TLB::validate_ref_internal(int* ops, Ref cell_ref, bool weak) con } bool is_special; auto cs = load_cell_slice_special(std::move(cell_ref), is_special); - return always_special() ? is_special : (is_special ? weak : (validate_skip(ops, cs) && cs.empty_ext())); + if (cs.special_type() == vm::Cell::SpecialType::PrunnedBranch && weak) { + return true; + } + if (always_special() != is_special) { + return false; + } + return validate_skip(ops, cs, weak) && cs.empty_ext(); } bool TLB::print_skip(PrettyPrinter& pp, vm::CellSlice& cs) const { diff --git a/tl/tl/tl_jni_object.cpp b/tl/tl/tl_jni_object.cpp index e7e69789..26a94d62 100644 --- a/tl/tl/tl_jni_object.cpp +++ b/tl/tl/tl_jni_object.cpp @@ -115,8 +115,9 @@ static size_t get_utf8_from_utf16_length(const jchar *p, jsize len) { for (jsize i = 0; i < len; i++) { unsigned int cur = p[i]; if ((cur & 0xF800) == 0xD800) { + ++i; if (i < len) { - unsigned int next = p[++i]; + unsigned int next = p[i]; if ((next & 0xFC00) == 0xDC00 && (cur & 0x400) == 0) { result += 4; continue; From d3485e42b9e7b21a0fb432bbf3f3dc2e41b2b673 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Fri, 17 Jan 2025 12:26:53 +0300 Subject: [PATCH 20/61] Temporary increase gas limit for certain accounts --- crypto/block/transaction.cpp | 61 ++++++++++++++++++++++++++++-------- doc/GlobalVersions.md | 5 +-- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/crypto/block/transaction.cpp b/crypto/block/transaction.cpp index 92043376..363524b0 100644 --- a/crypto/block/transaction.cpp +++ b/crypto/block/transaction.cpp @@ -1145,31 +1145,64 @@ td::RefInt256 ComputePhaseConfig::compute_gas_price(td::uint64 gas_used) const { namespace transaction { /** - * Checks if it is required to increase gas_limit (from GasLimitsPrices config) to special_gas_limit * 2 - * from masterchain GasLimitsPrices config for the transaction. + * Checks if it is required to increase gas_limit (from GasLimitsPrices config) for the transaction * * In January 2024 a highload wallet of @wallet Telegram bot in mainnet was stuck because current gas limit (1M) is * not enough to clean up old queires, thus locking funds inside. * See comment in crypto/smartcont/highload-wallet-v2-code.fc for details on why this happened. * Account address: EQD_v9j1rlsuHHw2FIhcsCFFSD367ldfDdCKcsNmNpIRzUlu - * It was proposed to validators to increase gas limit for this account for a limited amount of time (until 2024-02-29). + * It was proposed to validators to increase gas limit for this account to 70M for a limited amount + * of time (until 2024-02-29). * It is activated by setting global version to 5 in ConfigParam 8. * This config change also activates new behavior for special accounts in masterchain. * + * In Augost 2024 it was decided to unlock other old highload wallets that got into the same situation. + * See https://t.me/tondev_news/129 + * It is activated by setting global version to 9. + * * @param cfg The compute phase configuration. * @param now The Unix time of the transaction. * @param account The account of the transaction. * - * @returns True if gas_limit override is required, false otherwise + * @returns Overridden gas limit or empty td::optional */ -static bool override_gas_limit(const ComputePhaseConfig& cfg, ton::UnixTime now, const Account& account) { - if (!cfg.special_gas_full) { - return false; +static td::optional override_gas_limit(const ComputePhaseConfig& cfg, ton::UnixTime now, + const Account& account) { + struct OverridenGasLimit { + td::uint64 new_limit; + int from_version; + ton::UnixTime until; + }; + static std::map, OverridenGasLimit> accounts = []() { + auto parse_addr = [](const char* s) -> std::pair { + auto r_addr = StdAddress::parse(td::Slice(s)); + r_addr.ensure(); + return {r_addr.ok().workchain, r_addr.ok().addr}; + }; + std::map, OverridenGasLimit> accounts; + + // Increase limit for EQD_v9j1rlsuHHw2FIhcsCFFSD367ldfDdCKcsNmNpIRzUlu until 2024-02-29 00:00:00 UTC + accounts[parse_addr("0:FFBFD8F5AE5B2E1C7C3614885CB02145483DFAEE575F0DD08A72C366369211CD")] = { + .new_limit = 70'000'000, .from_version = 5, .until = 1709164800}; + + // Increase limit for multiple accounts (https://t.me/tondev_news/129) until 2025-03-01 00:00:00 UTC + accounts[parse_addr("UQBeSl-dumOHieZ3DJkNKVkjeso7wZ0VpzR4LCbLGTQ8xr57")] = { + .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; + accounts[parse_addr("EQC3VcQ-43klww9UfimR58TBjBzk7GPupXQ3CNuthoNp-uTR")] = { + .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; + accounts[parse_addr("EQBhwBb8jvokGvfreHRRoeVxI237PrOJgyrsAhLA-4rBC_H5")] = { + .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; + accounts[parse_addr("EQCkoRp4OE-SFUoMEnYfL3vF43T3AzNfW8jyTC4yzk8cJqMS")] = { + .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; + accounts[parse_addr("EQBDanbCeUqI4_v-xrnAN0_I2wRvEIaLg1Qg2ZN5c6Zl1KOh")] = { + .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; + return accounts; + }(); + auto it = accounts.find({account.workchain, account.addr}); + if (it == accounts.end() || cfg.global_version < it->second.from_version || now >= it->second.until) { + return {}; } - ton::UnixTime until = 1709164800; // 2024-02-29 00:00:00 UTC - ton::WorkchainId wc = 0; - const char* addr_hex = "FFBFD8F5AE5B2E1C7C3614885CB02145483DFAEE575F0DD08A72C366369211CD"; - return now < until && account.workchain == wc && account.addr.to_hex() == addr_hex; + return it->second.new_limit; } /** @@ -1183,10 +1216,12 @@ static bool override_gas_limit(const ComputePhaseConfig& cfg, ton::UnixTime now, * @returns The amount of gas. */ td::uint64 Transaction::gas_bought_for(const ComputePhaseConfig& cfg, td::RefInt256 nanograms) { - if (override_gas_limit(cfg, now, account)) { + if (auto new_limit = override_gas_limit(cfg, now, account)) { gas_limit_overridden = true; // Same as ComputePhaseConfig::gas_bought for, but with other gas_limit and max_gas_threshold - auto gas_limit = cfg.mc_gas_prices.special_gas_limit * 2; + auto gas_limit = new_limit.value(); + LOG(INFO) << "overridding gas limit for account " << account.workchain << ":" << account.addr.to_hex() << " to " + << gas_limit; auto max_gas_threshold = compute_max_gas_threshold(cfg.gas_price256, gas_limit, cfg.flat_gas_limit, cfg.flat_gas_price); if (nanograms.is_null() || sgn(nanograms) < 0) { diff --git a/doc/GlobalVersions.md b/doc/GlobalVersions.md index 64b2342a..2048eee2 100644 --- a/doc/GlobalVersions.md +++ b/doc/GlobalVersions.md @@ -48,7 +48,7 @@ Version 5 enables higher gas limits for special contracts. Previously only ticktock transactions had this limit, while ordinary transactions had a default limit of `gas_limit` gas (1M). * Gas usage of special contracts is not taken into account when checking block limits. This allows keeping masterchain block limits low while having high gas limits for elector. -* Gas limit on `EQD_v9j1rlsuHHw2FIhcsCFFSD367ldfDdCKcsNmNpIRzUlu` is increased to `special_gas_limit * 2` until 2024-02-29. +* Gas limit on `EQD_v9j1rlsuHHw2FIhcsCFFSD367ldfDdCKcsNmNpIRzUlu` is increased to 70M (`special_gas_limit * 2`) until 2024-02-29. See [this post](https://t.me/tonstatus/88) for details. ### Loading libraries @@ -131,4 +131,5 @@ Example: if the last masterchain block seqno is `19071` then the list contains b - Jumps to nested continuations of depth more than 8 consume 1 gas for eact subsequent continuation (this does not affect most of TVM code). - Fix exception code in some TVM instructions: now `stk_und` has priority over other error codes. - `PFXDICTADD`, `PFXDICTSET`, `PFXDICTREPLACE`, `PFXDICTDEL`, `GETGASFEE`, `GETSTORAGEFEE`, `GETFORWARDFEE`, `GETORIGINALFWDFEE`, `GETGASFEESIMPLE`, `GETFORWARDFEESIMPLE`, `HASHEXT` -- Now setting the contract code to a library cell does not consume additional gas on execution of the code. \ No newline at end of file +- Now setting the contract code to a library cell does not consume additional gas on execution of the code. +- Temporary increase gas limit for some accounts (see [this post](https://t.me/tondev_news/129) for details, `override_gas_limit` in `transaction.cpp` for the list of accounts). \ No newline at end of file From 2d603f1f479529ad611b268988f9150ee1f87e93 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Fri, 17 Jan 2025 12:46:58 +0300 Subject: [PATCH 21/61] Adjust overridden gas limit --- crypto/block/transaction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/block/transaction.cpp b/crypto/block/transaction.cpp index 836657f1..92e20fb0 100644 --- a/crypto/block/transaction.cpp +++ b/crypto/block/transaction.cpp @@ -1195,7 +1195,7 @@ static td::optional override_gas_limit(const ComputePhaseConfig& cfg accounts[parse_addr("EQCkoRp4OE-SFUoMEnYfL3vF43T3AzNfW8jyTC4yzk8cJqMS")] = { .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; accounts[parse_addr("EQBDanbCeUqI4_v-xrnAN0_I2wRvEIaLg1Qg2ZN5c6Zl1KOh")] = { - .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; + .new_limit = 225'000'000, .from_version = 9, .until = 1740787200}; return accounts; }(); auto it = accounts.find({account.workchain, account.addr}); From a224491179d7a2613a9929e491fe7d84bf8ca7e6 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Fri, 17 Jan 2025 12:58:15 +0000 Subject: [PATCH 22/61] Fix error processing in StaticBagOfCellsDb (#1481) --- crypto/vm/db/StaticBagOfCellsDb.cpp | 46 +++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/crypto/vm/db/StaticBagOfCellsDb.cpp b/crypto/vm/db/StaticBagOfCellsDb.cpp index c667f334..80dbfbf0 100644 --- a/crypto/vm/db/StaticBagOfCellsDb.cpp +++ b/crypto/vm/db/StaticBagOfCellsDb.cpp @@ -309,7 +309,9 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { return 0; } td::Slice offset_view; - CHECK(info_.offset_byte_size <= 8); + if (info_.offset_byte_size > 8) { + return td::Status::Error(PSTRING() << "bag-of-cell error: invalid offset_byte_size " << info_.offset_byte_size); + } char arr[8]; td::RwMutex::ReadLock guard; if (info_.has_index) { @@ -321,19 +323,25 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { offset_view = td::Slice(index_data_).substr((td::int64)idx * info_.offset_byte_size, info_.offset_byte_size); } - CHECK(offset_view.size() == (size_t)info_.offset_byte_size); + if (offset_view.size() != (size_t)info_.offset_byte_size) { + return td::Status::Error(PSTRING() << "bag-of-cell error: invalid offset view size" << offset_view.size()); + } return td::narrow_cast(info_.read_offset(offset_view.ubegin())); } td::Result load_root_idx(int root_i) { - CHECK(root_i >= 0 && root_i < info_.root_count); + if (root_i < 0 || root_i >= info_.root_count) { + return td::Status::Error(PSTRING() << "bag-of-cell error: invalid root index " << root_i); + } if (!info_.has_roots) { return 0; } char arr[8]; TRY_RESULT(idx_view, data_.view(td::MutableSlice(arr, info_.ref_byte_size), info_.roots_offset + (td::int64)root_i * info_.ref_byte_size)); - CHECK(idx_view.size() == (size_t)info_.ref_byte_size); + if (idx_view.size() != (size_t)info_.ref_byte_size) { + return td::Status::Error(PSTRING() << "bag-of-cell error: invalid idx_view size" << idx_view.size()); + } return info_.read_ref(idx_view.ubegin()); } @@ -343,8 +351,9 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { bool should_cache; }; td::Result get_cell_location(int idx) { - CHECK(idx >= 0); - CHECK(idx < info_.cell_count); + if (idx < 0 || idx >= info_.cell_count) { + return td::Status::Error(PSTRING() << "bag-of-cell error: invalid cell index " << idx); + } TRY_STATUS(preload_index(idx)); TRY_RESULT(from, load_idx_offset(idx - 1)); TRY_RESULT(till, load_idx_offset(idx)); @@ -357,10 +366,15 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { res.should_cache = res.end % 2 == 1; res.end /= 2; } - CHECK(std::numeric_limits::max() - res.begin >= info_.data_offset); - CHECK(std::numeric_limits::max() - res.end >= info_.data_offset); + if (std::numeric_limits::max() - res.begin < info_.data_offset || + std::numeric_limits::max() - res.end < info_.data_offset) { + return td::Status::Error(PSTRING() << "bag-of-cell error: invalid cell location (1) " << res.begin << ":" << res.end); + } res.begin += static_cast(info_.data_offset); res.end += static_cast(info_.data_offset); + if (res.begin > res.end) { + return td::Status::Error(PSTRING() << "bag-of-cell error: invalid cell location (2) " << res.begin << ":" << res.end); + } return res; } @@ -396,8 +410,6 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { if (info_.has_index) { return td::Status::OK(); } - - CHECK(idx < info_.cell_count); if (index_i_.load(std::memory_order_relaxed) > idx) { return td::Status::OK(); } @@ -407,12 +419,17 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { auto buf_slice = td::MutableSlice(buf.data(), buf.size()); for (; index_i_ <= idx; index_i_++) { auto offset = td::narrow_cast(info_.data_offset + index_offset_); - CHECK(data_.size() >= offset); + if (data_.size() < offset) { + return td::Status::Error(PSLICE() << "bag-of-cells error: invalid offset " << offset + << " (size=" << data_.size() << ")"); + } TRY_RESULT(cell, data_.view(buf_slice.copy().truncate(data_.size() - offset), offset)); CellSerializationInfo cell_info; TRY_STATUS(cell_info.init(cell, info_.ref_byte_size)); index_offset_ += cell_info.end_offset; - LOG_CHECK((unsigned)info_.offset_byte_size <= 8) << info_.offset_byte_size; + if ((unsigned)info_.offset_byte_size > 8) { + return td::Status::Error(PSTRING() << "bag-of-cell error: invalid offset_byte_size " << info_.offset_byte_size); + } td::uint8 tmp[8]; info_.write_offset(tmp, index_offset_); auto guard = index_data_rw_mutex_.lock_write(); @@ -488,7 +505,10 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { bool should_cache) { deserialize_cell_cnt_.add(1); Ref refs[4]; - CHECK(cell_info.refs_cnt <= 4); + if (cell_info.refs_cnt > 4) { + return td::Status::Error(PSLICE() << "invalid bag-of-cells cell #" << idx << " has " << cell_info.refs_cnt + << " refs"); + } auto* ref_ptr = cell_slice.ubegin() + cell_info.refs_offset; for (int k = 0; k < cell_info.refs_cnt; k++, ref_ptr += info_.ref_byte_size) { int ref_idx = td::narrow_cast(info_.read_ref(ref_ptr)); From e0605156defc570bebf41b8d0032e9cbb1ec46b1 Mon Sep 17 00:00:00 2001 From: neodix42 Date: Tue, 21 Jan 2025 12:27:25 +0400 Subject: [PATCH 23/61] Reworked TON portable artifacts (#1486) * improve windows builds * install nasm for openssl compilation on win * install nasm for openssl compilation on win for github * add create-state, proxy-liteserver, rldp-http-proxy, http-proxy, adnl-proxy, dht-server, libtonlibjson.so and libemulator.so to docker image * build new artifacts inside Docker * add files smartcont/auto/* to docker image * build arm64 in docker branch build * improve secp256k1 build * adding natively portable binaries (all statically linked with libc++, without nixpkgs help) for x86-64 linux * install missing headers on ubuntu 20.04 * use clang-16 on ubuntu 20.04 * remove gsl for portable artifacts; add -f key to generate-random-id in order to read addr_list from file; * typo * decode json * decode json * add github workflow for appimages creation * add missing dependencies * use libc++ for appimages artifacts * typo * appimages wihtout libc++ * appimages with libc++ and some checks * add appimages to release (for testing) * add appimages to release (for testing) * add appimages to release (for testing) * add appimages to release (for testing) 2 * add appimages to release (for testing) 3 * appimages only on ubuntu 22 with ssl-3 for now * appimages only on ubuntu 20 with ssl-3 for now * appimages only on ubuntu 20 with ssl-3 for now * add export LD_LIBRARY_PATH="${APPDIR}/usr/lib:${LD_LIBRARY_PATH}" to appimage AppRun * create release * appimages without jemalloc * bind specific libraries to appimages * add libreadline * add plain portable libs * add proper /lib/x86_64-linux-gnu/libreadline.so.8 * app images build with libc * try to ensure ABI compatibility with older glibc * try to ensure ABI compatibility with older glibc for shared libraries * shared lib without libc but with D_GLIBCXX_USE_CXX11_ABI and -static-libgcc -static-libstdc++ * add -fPIC -fcommon * add /lib/x86_64-linux-gnu/libstdc++.so.6 to static binaries * add -static-libgcc -static-libstdc++ to libtonlibjson and emulator when PORTABLE=1 * add -static-libgcc -static-libstdc++ to libtonlibjson and emulator when PORTABLE=1 * update emulator portable * Update CMakeLists.txt * test portable macos binaries * do not use -static-libgcc -static-libstdc++ on mac for shared libs * do not use -static-libgcc -static-libstdc++ on mac for shared libs * adjust create-release.yml * minor fixes, typos * minor fixes * linux apps double check * avoid infinite loop when place in system bin dir * avoid infinite loop when place in system bin dir 2 * test compilation on linux arm64 * test appimages on arm64 linux * test appimages on arm64 linux 2 * add portable linux arm64 to release * clean up * update README.md --- .github/script/amd64-20.04.Dockerfile | 20 -- .github/script/amd64-22.04.Dockerfile | 20 -- .github/script/arm64-20.04.Dockerfile | 20 -- .github/script/arm64-22.04.Dockerfile | 20 -- .../build-ton-linux-arm64-appimage.yml | 57 ++++ .../build-ton-linux-arm64-shared.yml | 43 +++ .../build-ton-linux-x86-64-appimage.yml | 63 +++++ .../build-ton-macos-13-x86-64-portable.yml | 27 ++ .../build-ton-macos-14-arm64-portable.yml | 27 ++ .../workflows/build-ton-wasm-emscripten.yml | 2 +- .github/workflows/create-release.yml | 244 +++++++++++++----- .github/workflows/ton-arm64-macos.yml | 40 --- .github/workflows/ton-x86-64-linux.yml | 44 ---- .github/workflows/ton-x86-64-macos.yml | 40 --- .github/workflows/ton-x86-64-windows.yml | 2 +- README.md | 18 +- assembly/appimage/AppRun | 3 + assembly/appimage/create-appimages.sh | 50 ++++ assembly/appimage/ton.png | Bin 0 -> 5571 bytes assembly/cicd/jenkins/test-builds.groovy | 237 ----------------- assembly/native/build-ubuntu-appimages.sh | 109 ++++++++ assembly/native/build-ubuntu-portable-libs.sh | 132 ++++++++++ assembly/native/build-ubuntu-portable.sh | 8 +- emulator/CMakeLists.txt | 7 +- tonlib/CMakeLists.txt | 7 +- 25 files changed, 718 insertions(+), 522 deletions(-) delete mode 100644 .github/script/amd64-20.04.Dockerfile delete mode 100644 .github/script/amd64-22.04.Dockerfile delete mode 100644 .github/script/arm64-20.04.Dockerfile delete mode 100644 .github/script/arm64-22.04.Dockerfile create mode 100644 .github/workflows/build-ton-linux-arm64-appimage.yml create mode 100644 .github/workflows/build-ton-linux-arm64-shared.yml create mode 100644 .github/workflows/build-ton-linux-x86-64-appimage.yml create mode 100644 .github/workflows/build-ton-macos-13-x86-64-portable.yml create mode 100644 .github/workflows/build-ton-macos-14-arm64-portable.yml delete mode 100644 .github/workflows/ton-arm64-macos.yml delete mode 100644 .github/workflows/ton-x86-64-linux.yml delete mode 100644 .github/workflows/ton-x86-64-macos.yml create mode 100644 assembly/appimage/AppRun create mode 100644 assembly/appimage/create-appimages.sh create mode 100644 assembly/appimage/ton.png delete mode 100644 assembly/cicd/jenkins/test-builds.groovy create mode 100644 assembly/native/build-ubuntu-appimages.sh create mode 100644 assembly/native/build-ubuntu-portable-libs.sh diff --git a/.github/script/amd64-20.04.Dockerfile b/.github/script/amd64-20.04.Dockerfile deleted file mode 100644 index 1ec89ebd..00000000 --- a/.github/script/amd64-20.04.Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM ubuntu:20.04 - -RUN apt update -RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata -RUN apt install -y build-essential cmake clang openssl libssl-dev zlib1g-dev gperf wget git curl libreadline-dev ccache libmicrohttpd-dev ninja-build libsecp256k1-dev libsodium-dev pkg-config - -WORKDIR / - -ARG BRANCH -ARG REPO -RUN git clone --recurse-submodules https://github.com/$REPO ton && cd ton && git checkout $BRANCH && git submodule update - -WORKDIR /ton -RUN mkdir /ton/build -WORKDIR /ton/build -ENV CC clang -ENV CXX clang++ -ENV CCACHE_DISABLE 1 -RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= -DCMAKE_CXX_FLAGS="-mavx2" .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client diff --git a/.github/script/amd64-22.04.Dockerfile b/.github/script/amd64-22.04.Dockerfile deleted file mode 100644 index 6134d167..00000000 --- a/.github/script/amd64-22.04.Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM ubuntu:22.04 - -RUN apt update -RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata -RUN apt install -y build-essential cmake clang openssl libssl-dev zlib1g-dev gperf wget git curl libreadline-dev ccache libmicrohttpd-dev ninja-build libsecp256k1-dev libsodium-dev pkg-config - -WORKDIR / - -ARG BRANCH -ARG REPO -RUN git clone --recurse-submodules https://github.com/$REPO ton && cd ton && git checkout $BRANCH && git submodule update - -WORKDIR /ton -RUN mkdir /ton/build -WORKDIR /ton/build -ENV CC clang -ENV CXX clang++ -ENV CCACHE_DISABLE 1 -RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= -DCMAKE_CXX_FLAGS="-mavx2" .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client diff --git a/.github/script/arm64-20.04.Dockerfile b/.github/script/arm64-20.04.Dockerfile deleted file mode 100644 index 5e350534..00000000 --- a/.github/script/arm64-20.04.Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM ubuntu:20.04 - -RUN apt update -RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata -RUN apt install -y build-essential cmake clang openssl libssl-dev zlib1g-dev gperf wget git curl libreadline-dev ccache libmicrohttpd-dev ninja-build libsecp256k1-dev libsodium-dev pkg-config - -WORKDIR / - -ARG BRANCH -ARG REPO -RUN git clone --recurse-submodules https://github.com/$REPO ton && cd ton && git checkout $BRANCH && git submodule update - -WORKDIR /ton -RUN mkdir /ton/build -WORKDIR /ton/build -ENV CC clang -ENV CXX clang++ -ENV CCACHE_DISABLE 1 -RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id dht-server lite-client diff --git a/.github/script/arm64-22.04.Dockerfile b/.github/script/arm64-22.04.Dockerfile deleted file mode 100644 index f9805849..00000000 --- a/.github/script/arm64-22.04.Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM ubuntu:22.04 - -RUN apt update -RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata -RUN apt install -y build-essential cmake clang openssl libssl-dev zlib1g-dev gperf wget git curl libreadline-dev ccache libmicrohttpd-dev ninja-build libsecp256k1-dev libsodium-dev pkg-config - -WORKDIR / - -ARG BRANCH -ARG REPO -RUN git clone --recurse-submodules https://github.com/$REPO ton && cd ton && git checkout $BRANCH && git submodule update - -WORKDIR /ton -RUN mkdir /ton/build -WORKDIR /ton/build -ENV CC clang -ENV CXX clang++ -ENV CCACHE_DISABLE 1 -RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id dht-server lite-client diff --git a/.github/workflows/build-ton-linux-arm64-appimage.yml b/.github/workflows/build-ton-linux-arm64-appimage.yml new file mode 100644 index 00000000..d464d8a2 --- /dev/null +++ b/.github/workflows/build-ton-linux-arm64-appimage.yml @@ -0,0 +1,57 @@ +name: Ubuntu TON build (AppImages, arm64) + +on: [push,workflow_dispatch,workflow_call] + +jobs: + build: + runs-on: ubuntu-22.04-arm + + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Install system libraries + run: | + sudo apt update + sudo apt install -y build-essential git cmake ninja-build zlib1g-dev libsecp256k1-dev libmicrohttpd-dev libsodium-dev liblz4-dev + sudo apt remove libgsl-dev + + - name: Install clang-16 + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 16 all + + - name: Build TON + run: | + git submodule sync --recursive + git submodule update + cp assembly/native/build-ubuntu-appimages.sh . + chmod +x build-ubuntu-appimages.sh + ./build-ubuntu-appimages.sh -a + + - name: Make AppImages + run: | + cp assembly/appimage/create-appimages.sh . + cp assembly/appimage/AppRun . + cp assembly/appimage/ton.png . + chmod +x create-appimages.sh + ./create-appimages.sh aarch64 + rm -rf artifacts + + + - name: Build TON libs + run: | + cp assembly/native/build-ubuntu-portable-libs.sh . + chmod +x build-ubuntu-portable-libs.sh + ./build-ubuntu-portable-libs.sh -a + cp ./artifacts/libtonlibjson.so appimages/artifacts/ + cp ./artifacts/libemulator.so appimages/artifacts/ + + - name: Upload artifacts + uses: actions/upload-artifact@master + with: + name: ton-arm64-linux + path: appimages/artifacts diff --git a/.github/workflows/build-ton-linux-arm64-shared.yml b/.github/workflows/build-ton-linux-arm64-shared.yml new file mode 100644 index 00000000..6433df0b --- /dev/null +++ b/.github/workflows/build-ton-linux-arm64-shared.yml @@ -0,0 +1,43 @@ +name: Ubuntu TON build (shared, arm64) + +on: [push,workflow_dispatch,workflow_call] + +jobs: + build: + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04-arm, ubuntu-24.04-arm] + runs-on: ${{ matrix.os }} + + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Install system libraries + run: | + sudo apt-get update + sudo apt-get install -y build-essential git cmake ninja-build zlib1g-dev libsecp256k1-dev libmicrohttpd-dev libsodium-dev liblz4-dev libjemalloc-dev + + - if: matrix.os != 'ubuntu-24.04-arm' + name: Install llvm-16 + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 16 all + + - name: Build TON + run: | + git submodule sync --recursive + git submodule update + cp assembly/native/build-ubuntu-shared.sh . + chmod +x build-ubuntu-shared.sh + ./build-ubuntu-shared.sh -t -a + + - name: Upload artifacts + uses: actions/upload-artifact@master + with: + name: ton-binaries-${{ matrix.os }} + path: artifacts diff --git a/.github/workflows/build-ton-linux-x86-64-appimage.yml b/.github/workflows/build-ton-linux-x86-64-appimage.yml new file mode 100644 index 00000000..4f78ece9 --- /dev/null +++ b/.github/workflows/build-ton-linux-x86-64-appimage.yml @@ -0,0 +1,63 @@ +name: Ubuntu TON build (AppImages, x86-64) + +on: [push,workflow_dispatch,workflow_call] + +jobs: + build: + runs-on: ubuntu-20.04 + + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Install system libraries + run: | + sudo apt update + sudo apt install -y build-essential git cmake ninja-build zlib1g-dev libsecp256k1-dev libmicrohttpd-dev libsodium-dev liblz4-dev + sudo apt remove libgsl-dev + + - name: Install gcc-11 g++-11 + run: | + sudo apt install -y manpages-dev software-properties-common + sudo add-apt-repository ppa:ubuntu-toolchain-r/test + sudo apt update && sudo apt install gcc-11 g++-11 + + - name: Install clang-16 + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 16 all + + - name: Build TON + run: | + git submodule sync --recursive + git submodule update + cp assembly/native/build-ubuntu-appimages.sh . + chmod +x build-ubuntu-appimages.sh + ./build-ubuntu-appimages.sh -a + + - name: Make AppImages + run: | + cp assembly/appimage/create-appimages.sh . + cp assembly/appimage/AppRun . + cp assembly/appimage/ton.png . + chmod +x create-appimages.sh + ./create-appimages.sh x86_64 + rm -rf artifacts + + + - name: Build TON libs + run: | + cp assembly/native/build-ubuntu-portable-libs.sh . + chmod +x build-ubuntu-portable-libs.sh + ./build-ubuntu-portable-libs.sh -a + cp ./artifacts/libtonlibjson.so appimages/artifacts/ + cp ./artifacts/libemulator.so appimages/artifacts/ + + - name: Upload artifacts + uses: actions/upload-artifact@master + with: + name: ton-x86_64-linux + path: appimages/artifacts diff --git a/.github/workflows/build-ton-macos-13-x86-64-portable.yml b/.github/workflows/build-ton-macos-13-x86-64-portable.yml new file mode 100644 index 00000000..5e50a468 --- /dev/null +++ b/.github/workflows/build-ton-macos-13-x86-64-portable.yml @@ -0,0 +1,27 @@ +name: MacOS-13 TON build (portable, x86-64) + +on: [push,workflow_dispatch,workflow_call] + +jobs: + build: + runs-on: macos-13 + + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Build TON + run: | + git submodule sync --recursive + git submodule update + cp assembly/native/build-macos-portable.sh . + chmod +x build-macos-portable.sh + ./build-macos-portable.sh -t -a + + - name: Upload artifacts + uses: actions/upload-artifact@master + with: + name: ton-x86_64-macos + path: artifacts diff --git a/.github/workflows/build-ton-macos-14-arm64-portable.yml b/.github/workflows/build-ton-macos-14-arm64-portable.yml new file mode 100644 index 00000000..8eb3af70 --- /dev/null +++ b/.github/workflows/build-ton-macos-14-arm64-portable.yml @@ -0,0 +1,27 @@ +name: MacOS-14 TON build (portable, arm64) + +on: [push,workflow_dispatch,workflow_call] + +jobs: + build: + runs-on: macos-14 + + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Build TON + run: | + git submodule sync --recursive + git submodule update + cp assembly/native/build-macos-portable.sh . + chmod +x build-macos-portable.sh + ./build-macos-portable.sh -t -a + + - name: Upload artifacts + uses: actions/upload-artifact@master + with: + name: ton-arm64-macos + path: artifacts diff --git a/.github/workflows/build-ton-wasm-emscripten.yml b/.github/workflows/build-ton-wasm-emscripten.yml index 66fe5e93..2ac1a224 100644 --- a/.github/workflows/build-ton-wasm-emscripten.yml +++ b/.github/workflows/build-ton-wasm-emscripten.yml @@ -28,5 +28,5 @@ jobs: - name: Upload artifacts uses: actions/upload-artifact@master with: - name: ton-wasm-binaries + name: ton-wasm path: artifacts diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 04f81a24..3063ce06 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -11,10 +11,26 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Download Linux arm64 artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: build-ton-linux-arm64-appimage.yml + path: artifacts + workflow_conclusion: success + skip_unpack: true + + - name: Download and unzip Linux arm64 artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: build-ton-linux-arm64-appimage.yml + path: artifacts + workflow_conclusion: success + skip_unpack: false + - name: Download Linux x86-64 artifacts uses: dawidd6/action-download-artifact@v6 with: - workflow: ton-x86-64-linux.yml + workflow: build-ton-linux-x86-64-appimage.yml path: artifacts workflow_conclusion: success skip_unpack: true @@ -22,7 +38,7 @@ jobs: - name: Download and unzip Linux x86-64 artifacts uses: dawidd6/action-download-artifact@v6 with: - workflow: ton-x86-64-linux.yml + workflow: build-ton-linux-x86-64-appimage.yml path: artifacts workflow_conclusion: success skip_unpack: false @@ -30,7 +46,7 @@ jobs: - name: Download Mac x86-64 artifacts uses: dawidd6/action-download-artifact@v6 with: - workflow: ton-x86-64-macos.yml + workflow: build-ton-macos-13-x86-64-portable.yml path: artifacts workflow_conclusion: success skip_unpack: true @@ -38,7 +54,7 @@ jobs: - name: Download Mac arm64 artifacts uses: dawidd6/action-download-artifact@v6 with: - workflow: ton-arm64-macos.yml + workflow: build-ton-macos-14-arm64-portable.yml path: artifacts workflow_conclusion: success skip_unpack: true @@ -46,7 +62,7 @@ jobs: - name: Download and unzip Mac x86-64 artifacts uses: dawidd6/action-download-artifact@v6 with: - workflow: ton-x86-64-macos.yml + workflow: build-ton-macos-13-x86-64-portable.yml path: artifacts workflow_conclusion: success skip_unpack: false @@ -54,7 +70,7 @@ jobs: - name: Download and unzip arm64 artifacts uses: dawidd6/action-download-artifact@v6 with: - workflow: ton-arm64-macos.yml + workflow: build-ton-macos-14-arm64-portable.yml path: artifacts workflow_conclusion: success skip_unpack: false @@ -147,7 +163,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries.zip + file: artifacts/ton-x86-64-windows.zip asset_name: ton-win-x86-64.zip tag: ${{ steps.tag.outputs.TAG }} @@ -155,7 +171,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/fift.exe + file: artifacts/ton-x86-64-windows/fift.exe asset_name: fift.exe tag: ${{ steps.tag.outputs.TAG }} @@ -163,7 +179,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/func.exe + file: artifacts/ton-x86-64-windows/func.exe asset_name: func.exe tag: ${{ steps.tag.outputs.TAG }} @@ -171,7 +187,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/tolk.exe + file: artifacts/ton-x86-64-windows/tolk.exe asset_name: tolk.exe tag: ${{ steps.tag.outputs.TAG }} @@ -179,7 +195,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/lite-client.exe + file: artifacts/ton-x86-64-windows/lite-client.exe asset_name: lite-client.exe tag: ${{ steps.tag.outputs.TAG }} @@ -187,7 +203,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/proxy-liteserver.exe + file: artifacts/ton-x86-64-windows/proxy-liteserver.exe asset_name: proxy-liteserver.exe tag: ${{ steps.tag.outputs.TAG }} @@ -195,7 +211,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/rldp-http-proxy.exe + file: artifacts/ton-x86-64-windows/rldp-http-proxy.exe asset_name: rldp-http-proxy.exe tag: ${{ steps.tag.outputs.TAG }} @@ -203,7 +219,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/http-proxy.exe + file: artifacts/ton-x86-64-windows/http-proxy.exe asset_name: http-proxy.exe tag: ${{ steps.tag.outputs.TAG }} @@ -211,7 +227,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/storage-daemon-cli.exe + file: artifacts/ton-x86-64-windows/storage-daemon-cli.exe asset_name: storage-daemon-cli.exe tag: ${{ steps.tag.outputs.TAG }} @@ -219,7 +235,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/storage-daemon.exe + file: artifacts/ton-x86-64-windows/storage-daemon.exe asset_name: storage-daemon.exe tag: ${{ steps.tag.outputs.TAG }} @@ -227,7 +243,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/tonlibjson.dll + file: artifacts/ton-x86-64-windows/tonlibjson.dll asset_name: tonlibjson.dll tag: ${{ steps.tag.outputs.TAG }} @@ -235,7 +251,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/emulator.dll + file: artifacts/ton-x86-64-windows/emulator.dll asset_name: libemulator.dll tag: ${{ steps.tag.outputs.TAG }} @@ -243,7 +259,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-win-binaries/tonlib-cli.exe + file: artifacts/ton-x86-64-windows/tonlib-cli.exe asset_name: tonlib-cli.exe tag: ${{ steps.tag.outputs.TAG }} @@ -253,7 +269,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries.zip + file: artifacts/ton-x86_64-macos.zip asset_name: ton-mac-x86-64.zip tag: ${{ steps.tag.outputs.TAG }} @@ -261,7 +277,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/fift + file: artifacts/ton-x86_64-macos/fift asset_name: fift-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -269,7 +285,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/func + file: artifacts/ton-x86_64-macos/func asset_name: func-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -277,7 +293,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/tolk + file: artifacts/ton-x86_64-macos/tolk asset_name: tolk-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -285,7 +301,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/lite-client + file: artifacts/ton-x86_64-macos/lite-client asset_name: lite-client-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -293,7 +309,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/proxy-liteserver + file: artifacts/ton-x86_64-macos/proxy-liteserver asset_name: proxy-liteserver-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -301,7 +317,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/rldp-http-proxy + file: artifacts/ton-x86_64-macos/rldp-http-proxy asset_name: rldp-http-proxy-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -309,7 +325,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/http-proxy + file: artifacts/ton-x86_64-macos/http-proxy asset_name: http-proxy-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -317,7 +333,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/storage-daemon-cli + file: artifacts/ton-x86_64-macos/storage-daemon-cli asset_name: storage-daemon-cli-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -325,7 +341,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/storage-daemon + file: artifacts/ton-x86_64-macos/storage-daemon asset_name: storage-daemon-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -333,7 +349,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/libtonlibjson.dylib + file: artifacts/ton-x86_64-macos/libtonlibjson.dylib asset_name: tonlibjson-mac-x86-64.dylib tag: ${{ steps.tag.outputs.TAG }} @@ -341,7 +357,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/libemulator.dylib + file: artifacts/ton-x86_64-macos/libemulator.dylib asset_name: libemulator-mac-x86-64.dylib tag: ${{ steps.tag.outputs.TAG }} @@ -349,7 +365,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-macos-binaries/tonlib-cli + file: artifacts/ton-x86_64-macos/tonlib-cli asset_name: tonlib-cli-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} @@ -360,7 +376,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries.zip + file: artifacts/ton-arm64-macos.zip asset_name: ton-mac-arm64.zip tag: ${{ steps.tag.outputs.TAG }} @@ -368,7 +384,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/fift + file: artifacts/ton-arm64-macos/fift asset_name: fift-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -376,7 +392,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/func + file: artifacts/ton-arm64-macos/func asset_name: func-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -384,7 +400,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/tolk + file: artifacts/ton-arm64-macos/tolk asset_name: tolk-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -392,7 +408,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/lite-client + file: artifacts/ton-arm64-macos/lite-client asset_name: lite-client-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -400,7 +416,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/proxy-liteserver + file: artifacts/ton-arm64-macos/proxy-liteserver asset_name: proxy-liteserver-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -408,7 +424,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/rldp-http-proxy + file: artifacts/ton-arm64-macos/rldp-http-proxy asset_name: rldp-http-proxy-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -416,7 +432,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/http-proxy + file: artifacts/ton-arm64-macos/http-proxy asset_name: http-proxy-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -424,7 +440,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/storage-daemon-cli + file: artifacts/ton-arm64-macos/storage-daemon-cli asset_name: storage-daemon-cli-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -432,7 +448,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/storage-daemon + file: artifacts/ton-arm64-macos/storage-daemon asset_name: storage-daemon-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -440,7 +456,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/libtonlibjson.dylib + file: artifacts/ton-arm64-macos/libtonlibjson.dylib asset_name: tonlibjson-mac-arm64.dylib tag: ${{ steps.tag.outputs.TAG }} @@ -448,7 +464,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/libemulator.dylib + file: artifacts/ton-arm64-macos/libemulator.dylib asset_name: libemulator-mac-arm64.dylib tag: ${{ steps.tag.outputs.TAG }} @@ -456,7 +472,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-arm64-macos-binaries/tonlib-cli + file: artifacts/ton-arm64-macos/tonlib-cli asset_name: tonlib-cli-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} @@ -466,7 +482,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries.zip + file: artifacts/ton-x86_64-linux.zip asset_name: ton-linux-x86_64.zip tag: ${{ steps.tag.outputs.TAG }} @@ -474,7 +490,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/fift + file: artifacts/ton-x86_64-linux/fift asset_name: fift-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} @@ -482,7 +498,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/func + file: artifacts/ton-x86_64-linux/func asset_name: func-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} @@ -490,7 +506,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/tolk + file: artifacts/ton-x86_64-linux/tolk asset_name: tolk-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} @@ -498,7 +514,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/lite-client + file: artifacts/ton-x86_64-linux/lite-client asset_name: lite-client-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} @@ -506,7 +522,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/proxy-liteserver + file: artifacts/ton-x86_64-linux/proxy-liteserver asset_name: proxy-liteserver-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} @@ -514,7 +530,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/rldp-http-proxy + file: artifacts/ton-x86_64-linux/rldp-http-proxy asset_name: rldp-http-proxy-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} @@ -522,7 +538,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/http-proxy + file: artifacts/ton-x86_64-linux/http-proxy asset_name: http-proxy-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} @@ -530,7 +546,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/storage-daemon-cli + file: artifacts/ton-x86_64-linux/storage-daemon-cli asset_name: storage-daemon-cli-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} @@ -538,7 +554,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/storage-daemon + file: artifacts/ton-x86_64-linux/storage-daemon asset_name: storage-daemon-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} @@ -546,7 +562,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/libtonlibjson.so + file: artifacts/ton-x86_64-linux/libtonlibjson.so asset_name: tonlibjson-linux-x86_64.so tag: ${{ steps.tag.outputs.TAG }} @@ -554,7 +570,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/libemulator.so + file: artifacts/ton-x86_64-linux/libemulator.so asset_name: libemulator-linux-x86_64.so tag: ${{ steps.tag.outputs.TAG }} @@ -562,16 +578,124 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-x86_64-linux-binaries/tonlib-cli + file: artifacts/ton-x86_64-linux/tonlib-cli asset_name: tonlib-cli-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} + + # linux arm64 + + - name: Upload Linux arm64 artifacts + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux.zip + asset_name: ton-linux-arm64.zip + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - fift + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/fift + asset_name: fift-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - func + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/func + asset_name: func-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/tolk + asset_name: tolk-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - lite-client + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/lite-client + asset_name: lite-client-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - proxy-liteserver + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/proxy-liteserver + asset_name: proxy-liteserver-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - rldp-http-proxy + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/rldp-http-proxy + asset_name: rldp-http-proxy-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - http-proxy + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/http-proxy + asset_name: http-proxy-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - storage-daemon-cli + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/storage-daemon-cli + asset_name: storage-daemon-cli-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - storage-daemon + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/storage-daemon + asset_name: storage-daemon-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - tonlibjson + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/libtonlibjson.so + asset_name: tonlibjson-linux-arm64.so + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - libemulator + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/libemulator.so + asset_name: libemulator-linux-arm64.so + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload Linux arm64 single artifact - tonlib-cli + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/tonlib-cli + asset_name: tonlib-cli-linux-arm64 + tag: ${{ steps.tag.outputs.TAG }} + + - name: Upload WASM artifacts uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/ton-wasm-binaries.zip - asset_name: ton-wasm-binaries.zip + file: artifacts/ton-wasm.zip + asset_name: ton-wasm.zip tag: ${{ steps.tag.outputs.TAG }} - name: Upload Android Tonlib artifacts diff --git a/.github/workflows/ton-arm64-macos.yml b/.github/workflows/ton-arm64-macos.yml deleted file mode 100644 index f128680c..00000000 --- a/.github/workflows/ton-arm64-macos.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: MacOS TON build (portable, arm64) - -on: [push,workflow_dispatch,workflow_call] - -jobs: - build: - runs-on: macos-14 - - steps: - - uses: actions/checkout@v3 - with: - submodules: 'recursive' - - - uses: cachix/install-nix-action@v23 - with: - extra_nix_config: | - access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} - - - name: Build TON - run: | - git submodule sync --recursive - git submodule update - cp assembly/nix/build-macos-nix.sh . - chmod +x build-macos-nix.sh - ./build-macos-nix.sh -t - - - name: Simple binaries test - run: | - sudo mv /nix/store /nix/store2 - artifacts/validator-engine -V - artifacts/lite-client -V - artifacts/fift -V - artifacts/func -V - artifacts/tolk -v - - - name: Upload artifacts - uses: actions/upload-artifact@master - with: - name: ton-arm64-macos-binaries - path: artifacts diff --git a/.github/workflows/ton-x86-64-linux.yml b/.github/workflows/ton-x86-64-linux.yml deleted file mode 100644 index 4cdafa11..00000000 --- a/.github/workflows/ton-x86-64-linux.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Ubuntu TON build (portable, x86-64) - -on: [push,workflow_dispatch,workflow_call] - -jobs: - build: - runs-on: ubuntu-22.04 - - steps: - - run: | - sudo apt update - sudo apt install -y apt-utils - - - uses: actions/checkout@v3 - with: - submodules: 'recursive' - - - uses: cachix/install-nix-action@v23 - with: - extra_nix_config: | - access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} - - - name: Build TON - run: | - git submodule sync --recursive - git submodule update - cp assembly/nix/build-linux-x86-64-nix.sh . - chmod +x build-linux-x86-64-nix.sh - ./build-linux-x86-64-nix.sh -t - - - name: Simple binaries test - run: | - sudo mv /nix/store /nix/store2 - artifacts/validator-engine -V - artifacts/lite-client -V - artifacts/fift -V - artifacts/func -V - artifacts/tolk -v - - - name: Upload artifacts - uses: actions/upload-artifact@master - with: - name: ton-x86_64-linux-binaries - path: artifacts diff --git a/.github/workflows/ton-x86-64-macos.yml b/.github/workflows/ton-x86-64-macos.yml deleted file mode 100644 index 41b8fa23..00000000 --- a/.github/workflows/ton-x86-64-macos.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: MacOS TON build (portable, x86-64) - -on: [push,workflow_dispatch,workflow_call] - -jobs: - build: - runs-on: macos-13 - - steps: - - uses: actions/checkout@v3 - with: - submodules: 'recursive' - - - uses: cachix/install-nix-action@v23 - with: - extra_nix_config: | - access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} - - - name: Build TON - run: | - git submodule sync --recursive - git submodule update - cp assembly/nix/build-macos-nix.sh . - chmod +x build-macos-nix.sh - ./build-macos-nix.sh -t - - - name: Simple binaries test - run: | - sudo mv /nix/store /nix/store2 - artifacts/validator-engine -V - artifacts/lite-client -V - artifacts/fift -V - artifacts/func -V - artifacts/tolk -v - - - name: Upload artifacts - uses: actions/upload-artifact@master - with: - name: ton-x86_64-macos-binaries - path: artifacts diff --git a/.github/workflows/ton-x86-64-windows.yml b/.github/workflows/ton-x86-64-windows.yml index c3c06f90..baaad778 100644 --- a/.github/workflows/ton-x86-64-windows.yml +++ b/.github/workflows/ton-x86-64-windows.yml @@ -32,5 +32,5 @@ jobs: - name: Upload artifacts uses: actions/upload-artifact@master with: - name: ton-win-binaries + name: ton-x86-64-windows path: artifacts diff --git a/README.md b/README.md index 96516d44..d0aa8cb3 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Usually, the response to your pull request will indicate which section it falls ## Build TON blockchain -### Ubuntu 20.4, 22.04 (x86-64, aarch64) +### Ubuntu 20.4, 22.04, 24.04 (x86-64, aarch64) Install additional system libraries ```bash sudo apt-get update @@ -141,18 +141,10 @@ Compile TON tonlib library ./build-android-tonlib.sh ``` -### Build TON portable binaries with Nix package manager -You need to install Nix first. -```bash - sh <(curl -L https://nixos.org/nix/install) --daemon -``` -Then compile TON with Nix by executing below command from the root folder: -```bash - cp -r assembly/nix/* . - export NIX_PATH=nixpkgs=https://github.com/nixOS/nixpkgs/archive/23.05.tar.gz - nix-build linux-x86-64-static.nix -``` -More examples for other platforms can be found under `assembly/nix`. +### TON portable binaries + +Linux portable binaries are wrapped into AppImages, at the same time MacOS portable binaries are statically linked executables. +Linux and MacOS binaries are available for both x86-64 and arm64 architectures. ## Running tests diff --git a/assembly/appimage/AppRun b/assembly/appimage/AppRun new file mode 100644 index 00000000..c7f147b3 --- /dev/null +++ b/assembly/appimage/AppRun @@ -0,0 +1,3 @@ +#!/bin/sh +export LD_LIBRARY_PATH="${APPDIR}/usr/lib:${LD_LIBRARY_PATH}" +exec "$(dirname $0)"/usr/bin/app "$@" diff --git a/assembly/appimage/create-appimages.sh b/assembly/appimage/create-appimages.sh new file mode 100644 index 00000000..2a8cd0ec --- /dev/null +++ b/assembly/appimage/create-appimages.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +if [ ! -d "artifacts" ]; then + echo "No artifacts found." + exit 2 +fi +# x86_64 or aarch64 +ARCH=$1 + +rm -rf appimages + +mkdir -p appimages/artifacts + +wget -nc https://github.com/AppImage/appimagetool/releases/download/continuous/appimagetool-$ARCH.AppImage +chmod +x ./appimagetool-$ARCH.AppImage + +cd appimages +for file in ../artifacts/*; do + if [[ -f "$file" && "$file" != *.so ]]; then + appName=$(basename "$file") + echo $appName + # prepare AppDir + mkdir -p $appName.AppDir/usr/{bin,lib} + cp ../AppRun $appName.AppDir/AppRun + sed -i "s/app/$appName/g" $appName.AppDir/AppRun + chmod +x ./$appName.AppDir/AppRun + printf '[Desktop Entry]\nName='$appName'\nExec='$appName'\nIcon='$appName'\nType=Application\nCategories=Utility;\n' > $appName.AppDir/$appName.desktop + cp ../ton.png $appName.AppDir/$appName.png + cp $file $appName.AppDir/usr/bin/ + cp ../build/openssl_3/libcrypto.so.3 \ + /lib/$ARCH-linux-gnu/libatomic.so.1 \ + /lib/$ARCH-linux-gnu/libsodium.so.23 \ + /lib/$ARCH-linux-gnu/libz.so.1 \ + /lib/$ARCH-linux-gnu/liblz4.so.1 \ + /lib/$ARCH-linux-gnu/libmicrohttpd.so.12 \ + /lib/$ARCH-linux-gnu/libreadline.so.8 \ + /lib/$ARCH-linux-gnu/libstdc++.so.6 \ + $appName.AppDir/usr/lib/ + + chmod +x ./$appName.AppDir/usr/bin/$appName + # create AppImage + ./../appimagetool-$ARCH.AppImage -l $appName.AppDir + mv $appName-$ARCH.AppImage artifacts/$appName + fi +done + +ls -larth artifacts +cp -r ../artifacts/{smartcont,lib} artifacts/ +pwd +ls -larth artifacts diff --git a/assembly/appimage/ton.png b/assembly/appimage/ton.png new file mode 100644 index 0000000000000000000000000000000000000000..2a25c863dbab04619e4ff51dd67612439d84f6f7 GIT binary patch literal 5571 zcmV;!6+G&RP)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGf6951U69E94oEQKA6h5>%>9^eF z{EAB5>y)Iood3J`o_p>&ny%{}#0I+Jsh3a1{_~Ug>7lSE5%cIN@_$gGc|F1Kf>7*x zw};NB$kL6cj+A#D-Iwj}5$bKIKUclYepmum8chzMV@;N`hYUF9-+}05d;t zA6nAdM>;&Qe`V>~c}1jy(18M=qI}oYUI)MLg;VPH^fgW=9V9m>fSlC4*l{T?!^5cw zYweFzlMXTw6u<=DZ%f>LpqF%HNz+~8V3cC=A{#{M~)4UjvDM(QnF=Hm~<3I zX#t$>8D98m59z4N@%t+lSNTatJrosyw*4w;A=l8iskmrXO8~3h?mc`lK{`k_meu%= zuB{*)1yDr*T63>w1WlhBUqIhdPTEotf8!Nw5EN3v1gaT;mPtMwlrf)r`AFQ~kTsD7^R08N3N>;pprf)B88!Bf?6q+iJ z0MN7PB+~dyWe|Y&{1uO;kq+V*hUq+Z18ED3*aFa6qMpPc=^(BU4D0nJq%9z#dbnCY zkEiE6X#K8}h#4>9;JG;IiiVIQ0M2=MM9dkHU;>!4xBEi>2&8u&%B5ls#Q~>zlmk&OtL)=Y)PIZr<^-GEIT0iUJ&XvK}2oP-Q z7#6`Rg@6fYdSWX_`(+#n06+Zdz|VAumnjfhzf$n?y15VlTEBvD^3q)vKs1p;>sJ=z z)%PY`o`Jh5fcGyBm$Y<}4x%LGEggq043Ul-xV8vh^>)wUix}U7q^YY7ytBrgG(_$Q zpsJ;_D}lSb$T$^2Z&$qwyTvX{fo|N}ht`iwqC1hAvB!nP^iGceCA+$!!%5OXkckur zd;<-Yq~i#VjsTS{ooM|?9t`uSdcje@h;t)AMN3DI+deNu2+I6=Z=*xu5)O?3bq9LT z`VmU{MzrOv4izqOLI6j;h#zW0k0-*w(e~kY&pWX{VEYtkAdxD?-7W;gKz&uQ&6U7y z7XU^&AcbIe+^Eg+LhDBg!)_C@Q2^TZE2M=W1z@Wo*(HEc;bQhe#1b_7#sOOdP}Gc7 zDiDhaH{0S$v0eZtdq##vaJv_=nD|KQM7Q>> z(3(0(3qfqdS~Igu0B@a(dz_7|MifQY51g|sO2V==08V}oC$VZVV9f}C){i)hRVHVF z04iE=>W{cUWs3zGus{G9=#RKUcY-tup-=(fkS{fgbl0)Z#1_|5}Q=mkQG!TRz5`(0{0{o=2{?o~ z;pQ;uBiH!p$ymdO1JV?)q8tRM1aK*utl1-WuB-T#>W~lpSF|LOdT2+7(5d|{ZLFG6 zY(BxE5`e&;|3pfEY6fW|>}bzi_Jr7d<$ORh2$%~XKI{qa5>fzGots7k{Q|}tt3!U1 zQOsQ2B3l2~s(onv7GgZi#g#tN5r?8?^RqJMBY<$~*SA$0S4}#Ib)-_d&sZ1;h3R+D zWCYN5I#jF7DpFg~`t8E&)spGQD-xy#Y&Hc-id5XLdvleBp-Oh*)wEL&RgjJlm>FBCi|jo3`;F^8&H zTf#4{@B@|-E zr?IUc36lB8xO%AnNBhYYkAvYYtk>yi_Kl=Re-io_#c`H2FD-4jwTN`Ck!1uB#`-;; z4GSmV4-JNGrC*z0OD7R<>_1i}{kX0F`O~G?fsz{*#uwE1VLxeIhuBk~-OEZzi<?)U=Kw5$z{?Kir(uE$_p=g z?Y|-KAe}rSg$D_)kxBuxY50Y=AMr;$AMh6R$MGNm0rn^s?(HNkks;P|5L#!L2SQE2 z$l;0*X{by&G^GD@zraK2;V;Pc1rflScLfdsO6b7&wyw>1nAp=@4|xcV9#^n|RR?=B zjj{QI*fEmYbHLTuv)?QxEhTy8FQB(Cd{8a9e(~1;8NA+MP?nA-Z&x}v;;B0zw=Yx5 z99ab5@dS4Q1DT?YitdXIf^NKLNGk7HS^$Hx)P?%@L;aYmbUq z#n9XIO89&v6u_v98sZW?w?T;rxeFUXac_d}eC0w$0T^#s-GV1SxA78Ptvj80CsDi^-Aj?-gjom`yaoHk)C`6fD=3^_r+j0 zCFq$iN}m4hp!dv>%(Gs2*p^V0K+OT)j?{4?Cx;?lPeR@iV3Yw+q?2T2CtV?0;QRq}Be%UO zOr5wwP(lD_pr==Vdw)4;3Cr>CKyIOEhn)MqQ&Bj+7a3%3Uqx+8upVFWdvZ{Dl|YyIpp06oQgsMF!=b&bxHB45p)m$e*UF* zeNIFz0es`l9@1iEM6_@K&;>VtCi$k&RMZlH!R$SdiID!JVCThP7rf;|Nlu5;qnrS~ zhypkM>S~4Y2(1KNYROKuCO;XJ6MzBiZ0kE~gxM1~1iI9yoN@6cs3-vN+16;I07nI2 zdA`88B}GR`0YH5n5x)TbK|r3bbXG~PqL!fOZ5w=W5praVbC)RW^1xXoB|u#PEPET8 ze7CP_&MhP?}ll-vED05tIHhhO99!|_jF z4U-lNN=1EU<(5#)`~5p4PX%QP%q9AxuX9ve>BoOoLRw6y6!n>v@7)>pJ|KH8G#Y;R zYsf;76PGAPeS*1vUdTH=BFWM3Lip$ZQ_B^2FDP%Md{jrERi@&6E`=NOw zmXq?F*Zm_QspHX^uM6=9fbL zT%=jrO#cAZ&rNTGq)or&TawoWV&Z>59+Hy}IhsfD%zEZOck|0={S#>Z)->aP(*lTq zW5Of_79+4}VTiPBX5#|LA3D{C^PdS6jZXa0^s}L9I(tI>-OI{I%Vu7>oB0o6*iSA{ z0NL?2K~}@tmw;k&SZMl7=0A}Zv+`j-xfl}wucw^c<7$j~&aib6Br21MM{i^P6EHE$ z4l#HlfP;0o4xY>nyCJJaSUPe_jY(NCCdXDX%E)n_WTg*iw zVCl#ySwfNOw!{PHrSOi*vVEiO^OubsOGU7seP089YX2`vlS zvMC1mwdbSg`ZQr&0A&~tkrhy#(2++W>DlrC_=UA&3~p@Y5- zslxm)>^}~0TPa1`rU^!lQy>Naeii-kf(}eVejMbsQWktzEWrFK;B3eX;Gs;!90~`o&Eplf0a)24X_lD( zECOg?KXRcOL!8B!q3e@|@7|gv4>V4JfN0P2Gzg&U6H4|6_sIhlq=n={^NjFOEf3ee zG@N=cyGdskz?UmAHrkxP4)@R6CP29y6=Xqqc9X`TvmpNMxXVk=kisQ?Eg`1>I4OU^ zIsfpno6g4vj$qIm6?u#qM}R^JZFepDVBc8`doGlG0=T;dR|4iFTq?ATVLz1IH6x!H z=M%ttYta`(h4Z$E$=>Ms=-zy0Y&HcN&7qE%plQhh#H?n@gTt^Xr1A$u!@Vbmu* zSLiZ4Pzhj0v7;5>gvMT%C`Nr!Go3ES5A6dz|1hou=+Ly3CHb(XfIDCGTv$)Lg+(iX z*=6|cp~Yu|lm1W96z+V{Gpm#~h=IYZFK@z?04=w^VzH<4M$Gh2O~)e)vlRvb+z`Q) z04*nat=y|B-ib$E7^n&}ST)?Nw?UY{ z?2A0JycTGX@C6pSMCs96{y-2(A-F$KC_%N9$d>&rV6exr7 zQb`Lj<7!+tOnkvqk_0 z`&VLlNyH%z)>&q3mI+|(ypTt;?6)CEPBd-x9Ls8%T4}QSRy;S1*p9xHD%}Gm7JsvG zF@V^L!;F?~&CeWoi&)D(}>lM(i(BYP$domd_5ZuEk<|2m@3_D0T{HeFSX<}Cn6JuNnG0C!kH=;1Ypov zU%hs=Nez2M9ah%{43i)oXK{KBa6KPf9A5S|mVH*8L#xYH&bI7?7b|WGz#y7Pm9${> zD|P9wuPULbXKfoU3m^l=POAwIp<-@|Zw!o?QxHyGIu`;kKn*AbKd+k?0T>t$CB)I? zAhr)^#U+pFxL@rLOE5Esdudqn&1dsuj#qz=DESFOO7-&tM9v#c1 zNP*_%Hfb8a0x)>5ZD_>-EEFd;Z>%W0Z@z2$T&8d>fDAYt5*PlRdjhy!jE7i>aYLdp zEu`^p{RRSz0At`#APPd5eg=^o@5W=DUxsIkxn^!zKp^Wk5NQM$1M4XAh+#>OiI)mZ zNC6leIv=llx1V$nw>h$=6kMy_T|^Z?2CXIPNeq$>;tGMV-cUl?0wT5mGT@BJ9mJgT zOhN_$WI!L0lZds_&4WAwFu?O=4$xCyJx!vV22u$iLtFpIti2ddYCWgdmEXi!EN9_J zD1Z!?qscGr=^`DZFfMPZx*;OxSq*6gkRg@w_;%ph08Zl3=>e%vDM>DXF|?h%o@9b_ z5L0{seM>oMON}Z5$gp&O_s=ekkPecKyJq-5y4U(@q}-yG0LDO{=~SfgY%-y!05Y8I zOWpLc>Tc6B4_C}Dv%eAxuP7~mF@Ab7zVQRKB8!@r6gMo2kdDHj048v(E%EIGy`&=} z2kS~#J5qW`6i@&;nYp{8Jto7$nHkaAHo22~nSW3KR8;J`+A~brLSW45d+o0=Dgh_} zW`5p2w4{~CTowCQmad(L(;GHW0EIF%lG^-1-z(=mo}`0ggxCMjT&?-rWf4DSlPm}n zfL(OPlKajkp8oydiFhP^YKJw+m*({ZBe#SGzkg@sujd4BEb)%EYSb} literal 0 HcmV?d00001 diff --git a/assembly/cicd/jenkins/test-builds.groovy b/assembly/cicd/jenkins/test-builds.groovy deleted file mode 100644 index 0b5ab7a3..00000000 --- a/assembly/cicd/jenkins/test-builds.groovy +++ /dev/null @@ -1,237 +0,0 @@ -pipeline { - - agent none - stages { - stage('Run Builds') { - parallel { - stage('Ubuntu 20.04 x86-64 (shared)') { - agent { - label 'Ubuntu_x86-64' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/native/build-ubuntu-shared.sh . - chmod +x build-ubuntu-shared.sh - ./build-ubuntu-shared.sh -a - ''' - sh ''' - cd artifacts - zip -9r ton-x86_64-linux-shared ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-x86_64-linux-shared.zip' - } - } - } - stage('Ubuntu 20.04 x86-64 (portable)') { - agent { - label 'Ubuntu_x86-64' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/nix/build-linux-x86-64-nix.sh . - chmod +x build-linux-x86-64-nix.sh - ./build-linux-x86-64-nix.sh - ''' - sh ''' - cd artifacts - zip -9r ton-x86-64-linux-portable ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-x86-64-linux-portable.zip' - } - } - } - stage('Ubuntu 20.04 aarch64 (shared)') { - agent { - label 'Ubuntu_arm64' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/native/build-ubuntu-shared.sh . - chmod +x build-ubuntu-shared.sh - ./build-ubuntu-shared.sh -a - ''' - sh ''' - cd artifacts - zip -9r ton-arm64-linux-shared ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-arm64-linux-shared.zip' - } - } - } - stage('Ubuntu 20.04 aarch64 (portable)') { - agent { - label 'Ubuntu_arm64' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/nix/build-linux-arm64-nix.sh . - chmod +x build-linux-arm64-nix.sh - ./build-linux-arm64-nix.sh - ''' - sh ''' - cd artifacts - zip -9r ton-arm64-linux-portable ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-arm64-linux-portable.zip' - } - } - } - stage('macOS 12.7 x86-64 (shared)') { - agent { - label 'macOS_12.7_x86-64' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/native/build-macos-shared.sh . - chmod +x build-macos-shared.sh - ./build-macos-shared.sh -a - ''' - sh ''' - cd artifacts - zip -9r ton-x86-64-macos-shared ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-x86-64-macos-shared.zip' - } - } - } - stage('macOS 12.7 x86-64 (portable)') { - agent { - label 'macOS_12.7_x86-64' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/nix/build-macos-nix.sh . - chmod +x build-macos-nix.sh - ./build-macos-nix.sh - ''' - sh ''' - cd artifacts - zip -9r ton-x86-64-macos-portable ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-x86-64-macos-portable.zip' - } - } - } - stage('macOS 12.6 aarch64 (shared)') { - agent { - label 'macOS_12.6-arm64-m1' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/native/build-macos-shared.sh . - chmod +x build-macos-shared.sh - ./build-macos-shared.sh -a - ''' - sh ''' - cd artifacts - zip -9r ton-arm64-macos-m1-shared ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-arm64-macos-m1-shared.zip' - } - } - } - stage('macOS 12.6 aarch64 (portable)') { - agent { - label 'macOS_12.6-arm64-m1' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/nix/build-macos-nix.sh . - chmod +x build-macos-nix.sh - ./build-macos-nix.sh - ''' - sh ''' - cd artifacts - zip -9r ton-arm64-macos-portable ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-arm64-macos-portable.zip' - } - } - } - stage('macOS 13.2 aarch64 (shared)') { - agent { - label 'macOS_13.2-arm64-m2' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/native/build-macos-shared.sh . - chmod +x build-macos-shared.sh - ./build-macos-shared.sh -a - ''' - sh ''' - cd artifacts - zip -9r ton-arm64-macos-m2-shared ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-arm64-macos-m2-shared.zip' - } - } - } - stage('Windows Server 2022 x86-64') { - agent { - label 'Windows_x86-64' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - bat ''' - copy assembly\\native\\build-windows.bat . - build-windows.bat - ''' - bat ''' - cd artifacts - zip -9r ton-x86-64-windows ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-x86-64-windows.zip' - } - } - } - stage('Android Tonlib') { - agent { - label 'Ubuntu_x86-64' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/android/build-android-tonlib.sh . - chmod +x build-android-tonlib.sh - ./build-android-tonlib.sh -a - ''' - sh ''' - cd artifacts/tonlib-android-jni - zip -9r ton-android-tonlib ./* - ''' - archiveArtifacts artifacts: 'artifacts/tonlib-android-jni/ton-android-tonlib.zip' - } - } - } - stage('WASM fift func emulator') { - agent { - label 'Ubuntu_x86-64' - } - steps { - timeout(time: 180, unit: 'MINUTES') { - sh ''' - cp assembly/wasm/fift-func-wasm-build-ubuntu.sh . - chmod +x fift-func-wasm-build-ubuntu.sh - ./fift-func-wasm-build-ubuntu.sh -a - ''' - sh ''' - cd artifacts - zip -9r ton-wasm-binaries ./* - ''' - archiveArtifacts artifacts: 'artifacts/ton-wasm-binaries.zip' - } - } - } - } - } - } -} diff --git a/assembly/native/build-ubuntu-appimages.sh b/assembly/native/build-ubuntu-appimages.sh new file mode 100644 index 00000000..4e63234d --- /dev/null +++ b/assembly/native/build-ubuntu-appimages.sh @@ -0,0 +1,109 @@ +#/bin/bash + +with_tests=false +with_artifacts=false + + +while getopts 'ta' flag; do + case "${flag}" in + t) with_tests=true ;; + a) with_artifacts=true ;; + *) break + ;; + esac +done + +if [ ! -d "build" ]; then + mkdir build + cd build +else + cd build + rm -rf .ninja* CMakeCache.txt +fi + +export CC=$(which clang-16) +export CXX=$(which clang++-16) +export CCACHE_DISABLE=1 + +if [ ! -d "openssl_3" ]; then + git clone https://github.com/openssl/openssl openssl_3 + cd openssl_3 + opensslPath=`pwd` + git checkout openssl-3.1.4 + ./config + make build_libs -j12 + test $? -eq 0 || { echo "Can't compile openssl_3"; exit 1; } + cd .. +else + opensslPath=$(pwd)/openssl_3 + echo "Using compiled openssl_3" +fi + +cmake -GNinja .. \ +-DCMAKE_BUILD_TYPE=Release \ +-DPORTABLE=1 \ +-DOPENSSL_ROOT_DIR=$opensslPath \ +-DOPENSSL_INCLUDE_DIR=$opensslPath/include \ +-DOPENSSL_CRYPTO_LIBRARY=$opensslPath/libcrypto.so + + +test $? -eq 0 || { echo "Can't configure ton"; exit 1; } + +if [ "$with_tests" = true ]; then +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ + validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ + generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ + adnl-proxy create-state emulator test-ed25519 test-ed25519-crypto test-bigint \ + test-vm test-fift test-cells test-smartcont test-net test-tdactor test-tdutils \ + test-tonlib-offline test-adnl test-dht test-rldp test-rldp2 test-catchain \ + test-fec test-tddb test-db test-validator-session-state test-emulator proxy-liteserver + test $? -eq 0 || { echo "Can't compile ton"; exit 1; } +else +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ + validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ + generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ + adnl-proxy create-state emulator proxy-liteserver + test $? -eq 0 || { echo "Can't compile ton"; exit 1; } +fi + +# simple binaries' test +./storage/storage-daemon/storage-daemon -V || exit 1 +./validator-engine/validator-engine -V || exit 1 +./lite-client/lite-client -V || exit 1 +./crypto/fift -V || exit 1 + +echo validator-engine +ldd ./validator-engine/validator-engine || exit 1 +ldd ./validator-engine-console/validator-engine-console || exit 1 +ldd ./crypto/fift || exit 1 +echo blockchain-explorer +ldd ./blockchain-explorer/blockchain-explorer || exit 1 +echo libtonlibjson.so +ldd ./tonlib/libtonlibjson.so.0.5 || exit 1 +echo libemulator.so +ldd ./emulator/libemulator.so || exit 1 + +cd .. + +if [ "$with_artifacts" = true ]; then + rm -rf artifacts + mkdir artifacts + mv build/tonlib/libtonlibjson.so.0.5 build/tonlib/libtonlibjson.so + cp build/storage/storage-daemon/storage-daemon build/storage/storage-daemon/storage-daemon-cli \ + build/crypto/fift build/crypto/tlbc build/crypto/func build/tolk/tolk build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ + build/validator-engine-console/validator-engine-console build/tonlib/tonlib-cli build/utils/proxy-liteserver \ + build/tonlib/libtonlibjson.so build/http/http-proxy build/rldp-http-proxy/rldp-http-proxy \ + build/dht-server/dht-server build/lite-client/lite-client build/validator-engine/validator-engine \ + build/utils/generate-random-id build/utils/json2tlo build/adnl/adnl-proxy build/emulator/libemulator.so \ + artifacts + test $? -eq 0 || { echo "Can't copy final binaries"; exit 1; } + cp -R crypto/smartcont artifacts + cp -R crypto/fift/lib artifacts + chmod -R +x artifacts/* +fi + +if [ "$with_tests" = true ]; then + cd build +# ctest --output-on-failure -E "test-catchain|test-actors|test-smartcont|test-adnl|test-validator-session-state|test-dht|test-rldp" + ctest --output-on-failure --timeout 1800 +fi diff --git a/assembly/native/build-ubuntu-portable-libs.sh b/assembly/native/build-ubuntu-portable-libs.sh new file mode 100644 index 00000000..2f0a1ba4 --- /dev/null +++ b/assembly/native/build-ubuntu-portable-libs.sh @@ -0,0 +1,132 @@ +#/bin/bash + +#sudo apt-get update +#sudo apt-get install -y build-essential git cmake ninja-build automake libtool texinfo autoconf libc++-dev libc++abi-dev + +with_artifacts=false + +while getopts 'ta' flag; do + case "${flag}" in + a) with_artifacts=true ;; + *) break + ;; + esac +done + +if [ ! -d "build" ]; then + mkdir build + cd build +else + cd build + rm -rf .ninja* CMakeCache.txt +fi + +export CC=$(which clang) +export CXX=$(which clang++) +export CCACHE_DISABLE=1 + +if [ ! -d "lz4" ]; then +git clone https://github.com/lz4/lz4.git +cd lz4 +lz4Path=`pwd` +git checkout v1.9.4 +CFLAGS="-fPIC" make -j12 +test $? -eq 0 || { echo "Can't compile lz4"; exit 1; } +cd .. +# ./lib/liblz4.a +# ./lib +else + lz4Path=$(pwd)/lz4 + echo "Using compiled lz4" +fi + +if [ ! -d "libsodium" ]; then + export LIBSODIUM_FULL_BUILD=1 + git clone https://github.com/jedisct1/libsodium.git + cd libsodium + sodiumPath=`pwd` + git checkout 1.0.18 + ./autogen.sh + ./configure --with-pic --enable-static + make -j12 + test $? -eq 0 || { echo "Can't compile libsodium"; exit 1; } + cd .. +else + sodiumPath=$(pwd)/libsodium + echo "Using compiled libsodium" +fi + +if [ ! -d "openssl_3" ]; then + git clone https://github.com/openssl/openssl openssl_3 + cd openssl_3 + opensslPath=`pwd` + git checkout openssl-3.1.4 + ./config + make build_libs -j12 + test $? -eq 0 || { echo "Can't compile openssl_3"; exit 1; } + cd .. +else + opensslPath=$(pwd)/openssl_3 + echo "Using compiled openssl_3" +fi + +if [ ! -d "zlib" ]; then + git clone https://github.com/madler/zlib.git + cd zlib + zlibPath=`pwd` + ./configure --static + make -j12 + test $? -eq 0 || { echo "Can't compile zlib"; exit 1; } + cd .. +else + zlibPath=$(pwd)/zlib + echo "Using compiled zlib" +fi + +if [ ! -d "libmicrohttpd" ]; then + git clone https://git.gnunet.org/libmicrohttpd.git + cd libmicrohttpd + libmicrohttpdPath=`pwd` + ./autogen.sh + ./configure --enable-static --disable-tests --disable-benchmark --disable-shared --disable-https --with-pic + make -j12 + test $? -eq 0 || { echo "Can't compile libmicrohttpd"; exit 1; } + cd .. +else + libmicrohttpdPath=$(pwd)/libmicrohttpd + echo "Using compiled libmicrohttpd" +fi + +cmake -GNinja .. \ +-DPORTABLE=1 \ +-DCMAKE_BUILD_TYPE=Release \ +-DOPENSSL_FOUND=1 \ +-DOPENSSL_INCLUDE_DIR=$opensslPath/include \ +-DOPENSSL_CRYPTO_LIBRARY=$opensslPath/libcrypto.a \ +-DZLIB_FOUND=1 \ +-DZLIB_INCLUDE_DIR=$zlibPath \ +-DZLIB_LIBRARIES=$zlibPath/libz.a \ +-DSODIUM_FOUND=1 \ +-DSODIUM_INCLUDE_DIR=$sodiumPath/src/libsodium/include \ +-DSODIUM_LIBRARY_RELEASE=$sodiumPath/src/libsodium/.libs/libsodium.a \ +-DMHD_FOUND=1 \ +-DMHD_INCLUDE_DIR=$libmicrohttpdPath/src/include \ +-DMHD_LIBRARY=$libmicrohttpdPath/src/microhttpd/.libs/libmicrohttpd.a \ +-DLZ4_FOUND=1 \ +-DLZ4_INCLUDE_DIRS=$lz4Path/lib \ +-DLZ4_LIBRARIES=$lz4Path/lib/liblz4.a + + + +test $? -eq 0 || { echo "Can't configure ton"; exit 1; } + +ninja tonlibjson emulator +test $? -eq 0 || { echo "Can't compile ton"; exit 1; } + +cd .. + +mkdir artifacts +mv build/tonlib/libtonlibjson.so.0.5 build/tonlib/libtonlibjson.so +cp build/tonlib/libtonlibjson.so \ + build/emulator/libemulator.so \ + artifacts diff --git a/assembly/native/build-ubuntu-portable.sh b/assembly/native/build-ubuntu-portable.sh index 389c114e..16e77ac8 100644 --- a/assembly/native/build-ubuntu-portable.sh +++ b/assembly/native/build-ubuntu-portable.sh @@ -1,7 +1,7 @@ #/bin/bash #sudo apt-get update -#sudo apt-get install -y build-essential git cmake ninja-build automake libtool texinfo autoconf +#sudo apt-get install -y build-essential git cmake ninja-build automake libtool texinfo autoconf libc++-dev libc++abi-dev with_tests=false with_artifacts=false @@ -24,8 +24,8 @@ else rm -rf .ninja* CMakeCache.txt fi -export CC=$(which clang-16) -export CXX=$(which clang++-16) +export CC=$(which clang) +export CXX=$(which clang++) export CCACHE_DISABLE=1 if [ ! -d "lz4" ]; then @@ -33,7 +33,7 @@ git clone https://github.com/lz4/lz4.git cd lz4 lz4Path=`pwd` git checkout v1.9.4 -make -j12 +CFLAGS="-fPIC" make -j12 test $? -eq 0 || { echo "Can't compile lz4"; exit 1; } cd .. # ./lib/liblz4.a diff --git a/emulator/CMakeLists.txt b/emulator/CMakeLists.txt index 61de96d3..66d8309a 100644 --- a/emulator/CMakeLists.txt +++ b/emulator/CMakeLists.txt @@ -35,7 +35,12 @@ else() add_library(emulator STATIC ${EMULATOR_SOURCE} ${EMULATOR_HEADERS}) endif() -target_link_libraries(emulator PUBLIC emulator_static git) +if (PORTABLE AND NOT APPLE) + target_link_libraries(emulator PUBLIC emulator_static git -static-libgcc -static-libstdc++) +else() + target_link_libraries(emulator PUBLIC emulator_static git) +endif() + generate_export_header(emulator EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/emulator_export.h) target_include_directories(emulator PUBLIC $ diff --git a/tonlib/CMakeLists.txt b/tonlib/CMakeLists.txt index 0855012c..9a56e511 100644 --- a/tonlib/CMakeLists.txt +++ b/tonlib/CMakeLists.txt @@ -98,7 +98,12 @@ else() add_library(tonlibjson STATIC ${TONLIB_JSON_SOURCE} ${TONLIB_JSON_HEADERS}) endif() -target_link_libraries(tonlibjson PRIVATE tonlibjson_private) +if (PORTABLE AND NOT APPLE) + target_link_libraries(tonlibjson PRIVATE tonlibjson_private -static-libgcc -static-libstdc++) +else() + target_link_libraries(tonlibjson PRIVATE tonlibjson_private) +endif() + generate_export_header(tonlibjson EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/tonlib/tonlibjson_export.h) if (!BUILD_SHARED_LIBS) target_compile_definitions(tonlibjson PUBLIC TONLIBJSON_STATIC_DEFINE) From 0f6cf13d45ce45a6ee011caa81020d542ff1b833 Mon Sep 17 00:00:00 2001 From: Ivan Kalinin Date: Fri, 24 Jan 2025 15:41:43 +0100 Subject: [PATCH 24/61] fix(vm): fix saving `ret` on deep jump (#1487) --- crypto/vm/vm.cpp | 9 +++++++-- crypto/vm/vm.h | 9 +++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/crypto/vm/vm.cpp b/crypto/vm/vm.cpp index 77d5d8f8..3c1118c6 100644 --- a/crypto/vm/vm.cpp +++ b/crypto/vm/vm.cpp @@ -247,6 +247,11 @@ int VmState::jump(Ref cont) { // general jump to continuation cont int VmState::jump(Ref cont, int pass_args) { + cont = adjust_jump_cont(std::move(cont), pass_args); + return jump_to(std::move(cont)); +} + +Ref VmState::adjust_jump_cont(Ref cont, int pass_args) { const ControlData* cont_data = cont->get_cdata(); if (cont_data) { // first do the checks @@ -287,7 +292,7 @@ int VmState::jump(Ref cont, int pass_args) { consume_stack_gas(copy); } } - return jump_to(std::move(cont)); + return cont; } else { // have no continuation data, situation is somewhat simpler if (pass_args >= 0) { @@ -299,7 +304,7 @@ int VmState::jump(Ref cont, int pass_args) { consume_stack_gas(pass_args); } } - return jump_to(std::move(cont)); + return cont; } } diff --git a/crypto/vm/vm.h b/crypto/vm/vm.h index 04c5e576..7aaf1e91 100644 --- a/crypto/vm/vm.h +++ b/crypto/vm/vm.h @@ -347,6 +347,7 @@ class VmState final : public VmStateInterface { int call(Ref cont, int pass_args, int ret_args = -1); int jump(Ref cont); int jump(Ref cont, int pass_args); + Ref adjust_jump_cont(Ref cont, int pass_args); int ret(); int ret(int ret_args); int ret_alt(); @@ -374,6 +375,14 @@ class VmState final : public VmStateInterface { if (cnt > free_nested_cont_jump && global_version >= 9) { consume_gas(1); } + + if (cont.not_null()) { + const ControlData* cont_data = cont->get_cdata(); + if (cont_data && (cont_data->stack.not_null() || cont_data->nargs >= 0)) { + // if cont has non-empty stack or expects fixed number of arguments, jump is not simple + cont = adjust_jump_cont(std::move(cont), -1); + } + } } return res; } From da5644e758ff5f0bff504636dd20e1f8f6e257d6 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Fri, 24 Jan 2025 14:48:05 +0000 Subject: [PATCH 25/61] Enable VmState::jump_to bugfix in version 9 (#1491) --- crypto/block/transaction.cpp | 2 +- crypto/vm/vm.h | 3 +-- doc/GlobalVersions.md | 3 ++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/block/transaction.cpp b/crypto/block/transaction.cpp index 92e20fb0..63e9065b 100644 --- a/crypto/block/transaction.cpp +++ b/crypto/block/transaction.cpp @@ -1156,7 +1156,7 @@ namespace transaction { * It is activated by setting global version to 5 in ConfigParam 8. * This config change also activates new behavior for special accounts in masterchain. * - * In Augost 2024 it was decided to unlock other old highload wallets that got into the same situation. + * In August 2024 it was decided to unlock other old highload wallets that got into the same situation. * See https://t.me/tondev_news/129 * It is activated by setting global version to 9. * diff --git a/crypto/vm/vm.h b/crypto/vm/vm.h index 7aaf1e91..a171ef27 100644 --- a/crypto/vm/vm.h +++ b/crypto/vm/vm.h @@ -375,8 +375,7 @@ class VmState final : public VmStateInterface { if (cnt > free_nested_cont_jump && global_version >= 9) { consume_gas(1); } - - if (cont.not_null()) { + if (cont.not_null() && global_version >= 9) { const ControlData* cont_data = cont->get_cdata(); if (cont_data && (cont_data->stack.not_null() || cont_data->nargs >= 0)) { // if cont has non-empty stack or expects fixed number of arguments, jump is not simple diff --git a/doc/GlobalVersions.md b/doc/GlobalVersions.md index 3849072f..f4156ca0 100644 --- a/doc/GlobalVersions.md +++ b/doc/GlobalVersions.md @@ -133,4 +133,5 @@ Example: if the last masterchain block seqno is `19071` then the list contains b - Fix exception code in some TVM instructions: now `stk_und` has priority over other error codes. - `PFXDICTADD`, `PFXDICTSET`, `PFXDICTREPLACE`, `PFXDICTDEL`, `GETGASFEE`, `GETSTORAGEFEE`, `GETFORWARDFEE`, `GETORIGINALFWDFEE`, `GETGASFEESIMPLE`, `GETFORWARDFEESIMPLE`, `HASHEXT` - Now setting the contract code to a library cell does not consume additional gas on execution of the code. -- Temporary increase gas limit for some accounts (see [this post](https://t.me/tondev_news/129) for details, `override_gas_limit` in `transaction.cpp` for the list of accounts). \ No newline at end of file +- Temporary increase gas limit for some accounts (see [this post](https://t.me/tondev_news/129) for details, `override_gas_limit` in `transaction.cpp` for the list of accounts). +- Fix recursive jump to continuations with non-null control data. \ No newline at end of file From e7e57f8e6dac998039d1cfbd910320f3afa15959 Mon Sep 17 00:00:00 2001 From: Marat <98183742+dungeon-master-666@users.noreply.github.com> Date: Sun, 26 Jan 2025 14:39:05 +0100 Subject: [PATCH 26/61] add extra currencies support to emulator (#1494) --- emulator/emulator-emscripten.cpp | 31 ++++++++++++++++-- emulator/emulator-extern.cpp | 53 ++++++++++++++++++++++++++++++ emulator/emulator-extern.h | 8 +++++ emulator/test/emulator-tests.cpp | 55 ++++++++++++++++++++++++++++++++ emulator/tvm-emulator.hpp | 4 +++ 5 files changed, 149 insertions(+), 2 deletions(-) diff --git a/emulator/emulator-emscripten.cpp b/emulator/emulator-emscripten.cpp index 17639d28..e5d4e42d 100644 --- a/emulator/emulator-emscripten.cpp +++ b/emulator/emulator-emscripten.cpp @@ -65,6 +65,7 @@ struct GetMethodParams { std::string address; uint32_t unixtime; uint64_t balance; + std::string extra_currencies; std::string rand_seed_hex; int64_t gas_limit; int method_id; @@ -108,6 +109,32 @@ td::Result decode_get_method_params(const char* json) { TRY_RESULT(balance, td::to_integer_safe(balance_field.get_string())); params.balance = balance; + TRY_RESULT(ec_field, td::get_json_object_field(obj, "extra_currencies", td::JsonValue::Type::Object, true)); + if (ec_field.type() != td::JsonValue::Type::Null) { + if (ec_field.type() != td::JsonValue::Type::Object) { + return td::Status::Error("EC must be of type Object"); + } + td::StringBuilder ec_builder; + auto ec_obj = ec_field.get_object(); + bool is_first = true; + for (auto &field_value : ec_obj) { + auto currency_id = field_value.first; + if (field_value.second.type() != td::JsonValue::Type::String) { + return td::Status::Error(PSLICE() << "EC amount must be of type String"); + } + auto amount = field_value.second.get_string(); + if (!is_first) { + ec_builder << " "; + is_first = false; + } + ec_builder << currency_id << "=" << amount; + } + if (ec_builder.is_error()) { + return td::Status::Error(PSLICE() << "Error building extra currencies string"); + } + params.extra_currencies = ec_builder.as_cslice().str(); + } + TRY_RESULT(rand_seed_str, td::get_json_object_string_field(obj, "rand_seed", false)); params.rand_seed_hex = rand_seed_str; @@ -228,8 +255,8 @@ const char *run_get_method(const char *params, const char* stack, const char* co if ((decoded_params.libs && !tvm_emulator_set_libraries(tvm, decoded_params.libs.value().c_str())) || !tvm_emulator_set_c7(tvm, decoded_params.address.c_str(), decoded_params.unixtime, decoded_params.balance, decoded_params.rand_seed_hex.c_str(), config) || - (decoded_params.prev_blocks_info && - !tvm_emulator_set_prev_blocks_info(tvm, decoded_params.prev_blocks_info.value().c_str())) || + (decoded_params.extra_currencies.size() > 0 && !tvm_emulator_set_extra_currencies(tvm, decoded_params.extra_currencies.c_str())) || + (decoded_params.prev_blocks_info && !tvm_emulator_set_prev_blocks_info(tvm, decoded_params.prev_blocks_info.value().c_str())) || (decoded_params.gas_limit > 0 && !tvm_emulator_set_gas_limit(tvm, decoded_params.gas_limit)) || !tvm_emulator_set_debug_enabled(tvm, decoded_params.debug_enabled)) { tvm_emulator_destroy(tvm); diff --git a/emulator/emulator-extern.cpp b/emulator/emulator-extern.cpp index 4e5f17bf..eb5ff9f9 100644 --- a/emulator/emulator-extern.cpp +++ b/emulator/emulator-extern.cpp @@ -496,6 +496,59 @@ bool tvm_emulator_set_c7(void *tvm_emulator, const char *address, uint32_t unixt return true; } +bool tvm_emulator_set_extra_currencies(void *tvm_emulator, const char *extra_currencies) { + auto emulator = static_cast(tvm_emulator); + vm::Dictionary dict{32}; + td::Slice extra_currencies_str{extra_currencies}; + while (true) { + auto next_space_pos = extra_currencies_str.find(' '); + auto currency_id_amount = next_space_pos == td::Slice::npos ? + extra_currencies_str.substr(0) : extra_currencies_str.substr(0, next_space_pos); + + if (!currency_id_amount.empty()) { + auto delim_pos = currency_id_amount.find('='); + if (delim_pos == td::Slice::npos) { + LOG(ERROR) << "Invalid extra currency format, missing '='"; + return false; + } + + auto currency_id_str = currency_id_amount.substr(0, delim_pos); + auto amount_str = currency_id_amount.substr(delim_pos + 1); + + auto currency_id = td::to_integer_safe(currency_id_str); + if (currency_id.is_error()) { + LOG(ERROR) << "Invalid extra currency id: " << currency_id_str; + return false; + } + auto amount = td::dec_string_to_int256(amount_str); + if (amount.is_null()) { + LOG(ERROR) << "Invalid extra currency amount: " << amount_str; + return false; + } + if (amount == 0) { + continue; + } + if (amount < 0) { + LOG(ERROR) << "Negative extra currency amount: " << amount_str; + return false; + } + + vm::CellBuilder cb; + block::tlb::t_VarUInteger_32.store_integer_value(cb, *amount); + if (!dict.set_builder(td::BitArray<32>(currency_id.ok()), cb, vm::DictionaryBase::SetMode::Add)) { + LOG(ERROR) << "Duplicate extra currency id"; + return false; + } + } + if (next_space_pos == td::Slice::npos) { + break; + } + extra_currencies_str.remove_prefix(next_space_pos + 1); + } + emulator->set_extra_currencies(std::move(dict).extract_root_cell()); + return true; +} + bool tvm_emulator_set_config_object(void* tvm_emulator, void* config) { auto emulator = static_cast(tvm_emulator); auto global_config = std::shared_ptr(static_cast(config), config_deleter); diff --git a/emulator/emulator-extern.h b/emulator/emulator-extern.h index e69a9cb0..14879e1e 100644 --- a/emulator/emulator-extern.h +++ b/emulator/emulator-extern.h @@ -182,6 +182,14 @@ EMULATOR_EXPORT bool tvm_emulator_set_libraries(void *tvm_emulator, const char * */ EMULATOR_EXPORT bool tvm_emulator_set_c7(void *tvm_emulator, const char *address, uint32_t unixtime, uint64_t balance, const char *rand_seed_hex, const char *config); +/** + * @brief Set extra currencies balance + * @param tvm_emulator Pointer to TVM emulator + * @param extra_currencies String with extra currencies balance in format "currency_id1=balance1 currency_id2=balance2 ..." + * @return true in case of success, false in case of error + */ +EMULATOR_EXPORT bool tvm_emulator_set_extra_currencies(void *tvm_emulator, const char *extra_currencies); + /** * @brief Set config for TVM emulator * @param tvm_emulator Pointer to TVM emulator diff --git a/emulator/test/emulator-tests.cpp b/emulator/test/emulator-tests.cpp index 24394b49..a0be447f 100644 --- a/emulator/test/emulator-tests.cpp +++ b/emulator/test/emulator-tests.cpp @@ -400,3 +400,58 @@ TEST(Emulator, tvm_emulator) { CHECK(stack_res->depth() == 1); CHECK(stack_res.write().pop_int()->to_long() == init_data.seqno); } + +TEST(Emulator, tvm_emulator_extra_currencies) { + void *tvm_emulator = tvm_emulator_create("te6cckEBBAEAHgABFP8A9KQT9LzyyAsBAgFiAgMABtBfBAAJofpP8E8XmGlj", "te6cckEBAQEAAgAAAEysuc0=", 1); + std::string addr = "0:" + std::string(64, 'F'); + tvm_emulator_set_c7(tvm_emulator, addr.c_str(), 1337, 1000, std::string(64, 'F').c_str(), nullptr); + CHECK(tvm_emulator_set_extra_currencies(tvm_emulator, "100=20000 200=1")); + unsigned method_crc = td::crc16("get_balance"); + unsigned method_id = (method_crc & 0xffff) | 0x10000; + + auto stack = td::make_ref(); + vm::CellBuilder stack_cb; + CHECK(stack->serialize(stack_cb)); + auto stack_cell = stack_cb.finalize(); + auto stack_boc = td::base64_encode(std_boc_serialize(stack_cell).move_as_ok()); + + std::string tvm_res = tvm_emulator_run_get_method(tvm_emulator, method_id, stack_boc.c_str()); + + auto result_json = td::json_decode(td::MutableSlice(tvm_res)); + auto result = result_json.move_as_ok(); + auto& result_obj = result.get_object(); + + auto success_field = td::get_json_object_field(result_obj, "success", td::JsonValue::Type::Boolean, false); + auto success = success_field.move_as_ok().get_boolean(); + CHECK(success); + + auto stack_field = td::get_json_object_field(result_obj, "stack", td::JsonValue::Type::String, false); + auto stack_val = stack_field.move_as_ok(); + auto& stack_obj = stack_val.get_string(); + auto stack_res_boc = td::base64_decode(stack_obj); + auto stack_res_cell = vm::std_boc_deserialize(stack_res_boc.move_as_ok()); + td::Ref stack_res; + auto stack_res_cs = vm::load_cell_slice(stack_res_cell.move_as_ok()); + CHECK(vm::Stack::deserialize_to(stack_res_cs, stack_res)); + CHECK(stack_res->depth() == 1); + auto tuple = stack_res.write().pop_tuple(); + CHECK(tuple->size() == 2); + + auto ton_balance = tuple->at(0).as_int(); + CHECK(ton_balance == 1000); + + auto cell = tuple->at(1).as_cell(); + auto dict = vm::Dictionary{cell, 32}; + auto it = dict.begin(); + std::map ec_balance; + while (!it.eof()) { + auto id = td::BitArray<32>(it.cur_pos()).to_ulong(); + auto value_cs = it.cur_value(); + auto value = block::tlb::t_VarUInteger_32.as_integer(value_cs); + ec_balance[id] = value; + ++it; + } + CHECK(ec_balance.size() == 2); + CHECK(ec_balance[100] == 20000); + CHECK(ec_balance[200] == 1); +} diff --git a/emulator/tvm-emulator.hpp b/emulator/tvm-emulator.hpp index 413298c9..acc13627 100644 --- a/emulator/tvm-emulator.hpp +++ b/emulator/tvm-emulator.hpp @@ -33,6 +33,10 @@ public: } } + void set_extra_currencies(td::Ref extra_currencies) { + args_.set_extra_currencies(std::move(extra_currencies)); + } + void set_c7_raw(td::Ref c7) { args_.set_c7(std::move(c7)); } From 59a8cf0ae5c3062d14ec4c89a04fee80b5fd05c1 Mon Sep 17 00:00:00 2001 From: neodix42 Date: Sun, 26 Jan 2025 17:41:17 +0400 Subject: [PATCH 27/61] create tolk release github action (#1490) * add create-tolk-release.yml * adjust create-tolk-release for old workflows * use custom tag * use old names * use old names --- .github/workflows/create-release.yml | 10 ++ .github/workflows/create-tolk-release.yml | 134 ++++++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 .github/workflows/create-tolk-release.yml diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 04f81a24..871c7aef 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -17,6 +17,7 @@ jobs: workflow: ton-x86-64-linux.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download and unzip Linux x86-64 artifacts @@ -25,6 +26,7 @@ jobs: workflow: ton-x86-64-linux.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: false - name: Download Mac x86-64 artifacts @@ -33,6 +35,7 @@ jobs: workflow: ton-x86-64-macos.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download Mac arm64 artifacts @@ -41,6 +44,7 @@ jobs: workflow: ton-arm64-macos.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download and unzip Mac x86-64 artifacts @@ -49,6 +53,7 @@ jobs: workflow: ton-x86-64-macos.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: false - name: Download and unzip arm64 artifacts @@ -57,6 +62,7 @@ jobs: workflow: ton-arm64-macos.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: false - name: Download Windows artifacts @@ -65,6 +71,7 @@ jobs: workflow: ton-x86-64-windows.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download and unzip Windows artifacts @@ -73,6 +80,7 @@ jobs: workflow: ton-x86-64-windows.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: false - name: Download WASM artifacts @@ -81,6 +89,7 @@ jobs: workflow: build-ton-wasm-emscripten.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download Android Tonlib artifacts @@ -89,6 +98,7 @@ jobs: workflow: build-ton-linux-android-tonlib.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Show all artifacts diff --git a/.github/workflows/create-tolk-release.yml b/.github/workflows/create-tolk-release.yml new file mode 100644 index 00000000..bddb5b65 --- /dev/null +++ b/.github/workflows/create-tolk-release.yml @@ -0,0 +1,134 @@ +name: Create tolk release + +on: + workflow_dispatch: + inputs: + tag: + description: 'tolk release and tag name' + required: true + +permissions: write-all + +jobs: + create-release: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v3 + + - name: Download and unzip Linux x86-64 artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: ton-x86-64-linux.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: false + + - name: Download and unzip Mac x86-64 artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: ton-x86-64-macos.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: false + + - name: Download and unzip arm64 artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: ton-arm64-macos.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: false + + - name: Download and unzip Windows artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: ton-x86-64-windows.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: false + + - name: Download WASM artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: build-ton-wasm-emscripten.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: true + + - name: Show all artifacts + run: | + tree artifacts + + + # create release + - name: Get registration token + id: getRegToken + run: | + curl -X POST -H \"Accept: application/vnd.github+json\" -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' https://api.github.com/repos/ton-blockchain/ton/actions/runners/registration-token + + - name: Create release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ inputs.tag }} + release_name: ${{ inputs.tag }} + draft: false + prerelease: false + + # upload + + # win + + - name: Upload Windows 2019 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-win-binaries/tolk.exe + asset_name: tolk.exe + tag: ${{ inputs.tag }} + + # mac x86-64 + + - name: Upload Mac x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-macos-binaries/tolk + asset_name: tolk-mac-x86-64 + tag: ${{ inputs.tag }} + + # mac arm64 + + - name: Upload Mac arm64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-macos-binaries/tolk + asset_name: tolk-mac-arm64 + tag: ${{ inputs.tag }} + + # linux x86-64 + + - name: Upload Linux x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-linux-binaries/tolk + asset_name: tolk-linux-x86_64 + tag: ${{ inputs.tag }} + + - name: Upload WASM artifacts + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-wasm-binaries.zip + asset_name: ton-wasm.zip + tag: ${{ inputs.tag }} From 7d9ef6e0bfcd41143f2cea3327d199dcb1c806be Mon Sep 17 00:00:00 2001 From: neodix42 Date: Mon, 27 Jan 2025 11:18:51 +0400 Subject: [PATCH 28/61] Fix wasm artifacts (#1499) * put back emscripten 3.1.19 * add create-tolk-release.yml * filter out master branch only --- .../workflows/build-ton-wasm-emscripten.yml | 19 + .github/workflows/create-release.yml | 11 + .github/workflows/create-tolk-release.yml | 153 ++++ assembly/wasm/fift-func-wasm-build-ubuntu.sh | 4 +- assembly/wasm/intrinsics.fc | 61 ++ assembly/wasm/stdlib.fc | 681 ++++++++++++++++++ 6 files changed, 927 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/create-tolk-release.yml create mode 100644 assembly/wasm/intrinsics.fc create mode 100644 assembly/wasm/stdlib.fc diff --git a/.github/workflows/build-ton-wasm-emscripten.yml b/.github/workflows/build-ton-wasm-emscripten.yml index 2ac1a224..bac0cf98 100644 --- a/.github/workflows/build-ton-wasm-emscripten.yml +++ b/.github/workflows/build-ton-wasm-emscripten.yml @@ -25,6 +25,25 @@ jobs: chmod +x fift-func-wasm-build-ubuntu.sh ./fift-func-wasm-build-ubuntu.sh -a + - name: Prepare test + run: | + cp assembly/wasm/*.fc . + git clone https://github.com/ton-community/func-js.git + cd func-js + npm install + npm run build + npm link + + - name: Test TON WASM artifacts + run: | + base64 -w 0 artifacts/funcfiftlib.wasm > artifacts/funcfiftlib.wasm.js + printf "module.exports = { FuncFiftLibWasm: '" | cat - artifacts/funcfiftlib.wasm.js > temp.txt && mv temp.txt artifacts/funcfiftlib.wasm.js + echo "'}" >> artifacts/funcfiftlib.wasm.js + cp artifacts/funcfiftlib.wasm.js func-js/node_modules/@ton-community/func-js-bin/dist/funcfiftlib.wasm.js + cp artifacts/funcfiftlib.js func-js/node_modules/@ton-community/func-js-bin/dist/funcfiftlib.js + npx func-js stdlib.fc intrinsics.fc --fift ./output.f + + - name: Upload artifacts uses: actions/upload-artifact@master with: diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 3063ce06..1b86d045 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -17,6 +17,7 @@ jobs: workflow: build-ton-linux-arm64-appimage.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download and unzip Linux arm64 artifacts @@ -25,6 +26,7 @@ jobs: workflow: build-ton-linux-arm64-appimage.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: false - name: Download Linux x86-64 artifacts @@ -33,6 +35,7 @@ jobs: workflow: build-ton-linux-x86-64-appimage.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download and unzip Linux x86-64 artifacts @@ -41,6 +44,7 @@ jobs: workflow: build-ton-linux-x86-64-appimage.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: false - name: Download Mac x86-64 artifacts @@ -49,6 +53,7 @@ jobs: workflow: build-ton-macos-13-x86-64-portable.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download Mac arm64 artifacts @@ -57,6 +62,7 @@ jobs: workflow: build-ton-macos-14-arm64-portable.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download and unzip Mac x86-64 artifacts @@ -73,6 +79,7 @@ jobs: workflow: build-ton-macos-14-arm64-portable.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: false - name: Download Windows artifacts @@ -81,6 +88,7 @@ jobs: workflow: ton-x86-64-windows.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download and unzip Windows artifacts @@ -89,6 +97,7 @@ jobs: workflow: ton-x86-64-windows.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: false - name: Download WASM artifacts @@ -97,6 +106,7 @@ jobs: workflow: build-ton-wasm-emscripten.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Download Android Tonlib artifacts @@ -105,6 +115,7 @@ jobs: workflow: build-ton-linux-android-tonlib.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: true - name: Show all artifacts diff --git a/.github/workflows/create-tolk-release.yml b/.github/workflows/create-tolk-release.yml new file mode 100644 index 00000000..370f0d79 --- /dev/null +++ b/.github/workflows/create-tolk-release.yml @@ -0,0 +1,153 @@ +name: Create tolk release + +on: + workflow_dispatch: + inputs: + tag: + description: 'tolk release and tag name' + required: true + +permissions: write-all + +jobs: + create-release: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v3 + + - name: Download and unzip Linux arm64 artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: build-ton-linux-arm64-appimage.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: false + + - name: Download and unzip Linux x86-64 artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: build-ton-linux-x86-64-appimage.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: false + + - name: Download and unzip Mac x86-64 artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: build-ton-macos-13-x86-64-portable.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: false + + - name: Download and unzip arm64 artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: build-ton-macos-14-arm64-portable.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: false + + - name: Download and unzip Windows artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: ton-x86-64-windows.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: false + + - name: Download WASM artifacts + uses: dawidd6/action-download-artifact@v6 + with: + workflow: build-ton-wasm-emscripten.yml + path: artifacts + workflow_conclusion: success + branch: master + skip_unpack: true + + - name: Show all artifacts + run: | + tree artifacts + + + # create release + - name: Get registration token + id: getRegToken + run: | + curl -X POST -H \"Accept: application/vnd.github+json\" -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' https://api.github.com/repos/ton-blockchain/ton/actions/runners/registration-token + + - name: Create release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ inputs.tag }} + release_name: ${{ inputs.tag }} + draft: false + prerelease: false + + # upload + + # win + + - name: Upload Windows 2019 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86-64-windows/tolk.exe + asset_name: tolk.exe + tag: ${{ inputs.tag }} + + # mac x86-64 + + - name: Upload Mac x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-macos/tolk + asset_name: tolk-mac-x86-64 + tag: ${{ inputs.tag }} + + # mac arm64 + + - name: Upload Mac arm64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-macos/tolk + asset_name: tolk-mac-arm64 + tag: ${{ inputs.tag }} + + # linux x86-64 + + - name: Upload Linux x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-linux/tolk + asset_name: tolk-linux-x86_64 + tag: ${{ inputs.tag }} + + # linux arm64 + + - name: Upload Linux arm64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-linux/tolk + asset_name: tolk-linux-arm64 + tag: ${{ inputs.tag }} + + - name: Upload WASM artifacts + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-wasm.zip + asset_name: ton-wasm.zip + tag: ${{ inputs.tag }} diff --git a/assembly/wasm/fift-func-wasm-build-ubuntu.sh b/assembly/wasm/fift-func-wasm-build-ubuntu.sh index 8c0069f3..a463c02a 100644 --- a/assembly/wasm/fift-func-wasm-build-ubuntu.sh +++ b/assembly/wasm/fift-func-wasm-build-ubuntu.sh @@ -71,8 +71,8 @@ echo fi cd emsdk -./emsdk install 3.1.40 -./emsdk activate 3.1.40 +./emsdk install 3.1.19 +./emsdk activate 3.1.19 EMSDK_DIR=`pwd` . $EMSDK_DIR/emsdk_env.sh diff --git a/assembly/wasm/intrinsics.fc b/assembly/wasm/intrinsics.fc new file mode 100644 index 00000000..14a4498d --- /dev/null +++ b/assembly/wasm/intrinsics.fc @@ -0,0 +1,61 @@ +#pragma allow-post-modification; +#pragma compute-asm-ltr; + +(slice, slice) __tact_load_address(slice cs) inline { + slice raw = cs~load_msg_addr(); + return (cs, raw); +} + +slice __gen_slice1 () asm """ + B{b5ee9c72410101010005000006abcdefe1e98884} B>boc boc boc boc boc tuple cons(X head, tuple tail) asm "CONS"; + +;;; Extracts the head and the tail of lisp-style list. +forall X -> (X, tuple) uncons(tuple list) asm "UNCONS"; + +;;; Extracts the tail and the head of lisp-style list. +forall X -> (tuple, X) list_next(tuple list) asm(-> 1 0) "UNCONS"; + +;;; Returns the head of lisp-style list. +forall X -> X car(tuple list) asm "CAR"; + +;;; Returns the tail of lisp-style list. +tuple cdr(tuple list) asm "CDR"; + +;;; Creates tuple with zero elements. +tuple empty_tuple() asm "NIL"; + +;;; Appends a value `x` to a `Tuple t = (x1, ..., xn)`, but only if the resulting `Tuple t' = (x1, ..., xn, x)` +;;; is of length at most 255. Otherwise throws a type check exception. +forall X -> tuple tpush(tuple t, X value) asm "TPUSH"; +forall X -> (tuple, ()) ~tpush(tuple t, X value) asm "TPUSH"; + +;;; Creates a tuple of length one with given argument as element. +forall X -> [X] single(X x) asm "SINGLE"; + +;;; Unpacks a tuple of length one +forall X -> X unsingle([X] t) asm "UNSINGLE"; + +;;; Creates a tuple of length two with given arguments as elements. +forall X, Y -> [X, Y] pair(X x, Y y) asm "PAIR"; + +;;; Unpacks a tuple of length two +forall X, Y -> (X, Y) unpair([X, Y] t) asm "UNPAIR"; + +;;; Creates a tuple of length three with given arguments as elements. +forall X, Y, Z -> [X, Y, Z] triple(X x, Y y, Z z) asm "TRIPLE"; + +;;; Unpacks a tuple of length three +forall X, Y, Z -> (X, Y, Z) untriple([X, Y, Z] t) asm "UNTRIPLE"; + +;;; Creates a tuple of length four with given arguments as elements. +forall X, Y, Z, W -> [X, Y, Z, W] tuple4(X x, Y y, Z z, W w) asm "4 TUPLE"; + +;;; Unpacks a tuple of length four +forall X, Y, Z, W -> (X, Y, Z, W) untuple4([X, Y, Z, W] t) asm "4 UNTUPLE"; + +;;; Returns the first element of a tuple (with unknown element types). +forall X -> X first(tuple t) asm "FIRST"; + +;;; Returns the second element of a tuple (with unknown element types). +forall X -> X second(tuple t) asm "SECOND"; + +;;; Returns the third element of a tuple (with unknown element types). +forall X -> X third(tuple t) asm "THIRD"; + +;;; Returns the fourth element of a tuple (with unknown element types). +forall X -> X fourth(tuple t) asm "3 INDEX"; + +;;; Returns the first element of a pair tuple. +forall X, Y -> X pair_first([X, Y] p) asm "FIRST"; + +;;; Returns the second element of a pair tuple. +forall X, Y -> Y pair_second([X, Y] p) asm "SECOND"; + +;;; Returns the first element of a triple tuple. +forall X, Y, Z -> X triple_first([X, Y, Z] p) asm "FIRST"; + +;;; Returns the second element of a triple tuple. +forall X, Y, Z -> Y triple_second([X, Y, Z] p) asm "SECOND"; + +;;; Returns the third element of a triple tuple. +forall X, Y, Z -> Z triple_third([X, Y, Z] p) asm "THIRD"; + + +;;; Push null element (casted to given type) +;;; By the TVM type `Null` FunC represents absence of a value of some atomic type. +;;; So `null` can actually have any atomic type. +forall X -> X null() asm "PUSHNULL"; + +;;; Moves a variable [x] to the top of the stack +forall X -> (X, ()) ~impure_touch(X x) impure asm "NOP"; + + + +;;; Returns the current Unix time as an Integer +int now() asm "NOW"; + +;;; Returns the internal address of the current smart contract as a Slice with a `MsgAddressInt`. +;;; If necessary, it can be parsed further using primitives such as [parse_std_addr]. +slice my_address() asm "MYADDR"; + +;;; Returns the balance of the smart contract as a tuple consisting of an int +;;; (balance in nanotoncoins) and a `cell` +;;; (a dictionary with 32-bit keys representing the balance of "extra currencies") +;;; at the start of Computation Phase. +;;; Note that RAW primitives such as [send_raw_message] do not update this field. +[int, cell] get_balance() asm "BALANCE"; + +;;; Returns the logical time of the current transaction. +int cur_lt() asm "LTIME"; + +;;; Returns the starting logical time of the current block. +int block_lt() asm "BLOCKLT"; + +;;; Computes the representation hash of a `cell` [c] and returns it as a 256-bit unsigned integer `x`. +;;; Useful for signing and checking signatures of arbitrary entities represented by a tree of cells. +int cell_hash(cell c) asm "HASHCU"; + +;;; Computes the hash of a `slice s` and returns it as a 256-bit unsigned integer `x`. +;;; The result is the same as if an ordinary cell containing only data and references from `s` had been created +;;; and its hash computed by [cell_hash]. +int slice_hash(slice s) asm "HASHSU"; + +;;; Computes sha256 of the data bits of `slice` [s]. If the bit length of `s` is not divisible by eight, +;;; throws a cell underflow exception. The hash value is returned as a 256-bit unsigned integer `x`. +int string_hash(slice s) asm "SHA256U"; + +{- + # Signature checks +-} + +;;; Checks the Ed25519-`signature` of a `hash` (a 256-bit unsigned integer, usually computed as the hash of some data) +;;; using [public_key] (also represented by a 256-bit unsigned integer). +;;; The signature must contain at least 512 data bits; only the first 512 bits are used. +;;; The result is `−1` if the signature is valid, `0` otherwise. +;;; Note that `CHKSIGNU` creates a 256-bit slice with the hash and calls `CHKSIGNS`. +;;; That is, if [hash] is computed as the hash of some data, these data are hashed twice, +;;; the second hashing occurring inside `CHKSIGNS`. +int check_signature(int hash, slice signature, int public_key) asm "CHKSIGNU"; + +;;; Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `public_key`, +;;; similarly to [check_signature]. +;;; If the bit length of [data] is not divisible by eight, throws a cell underflow exception. +;;; The verification of Ed25519 signatures is the standard one, +;;; with sha256 used to reduce [data] to the 256-bit number that is actually signed. +int check_data_signature(slice data, slice signature, int public_key) asm "CHKSIGNS"; + +{--- + # Computation of boc size + The primitives below may be useful for computing storage fees of user-provided data. +-} + +;;; Returns `(x, y, z, -1)` or `(null, null, null, 0)`. +;;; Recursively computes the count of distinct cells `x`, data bits `y`, and cell references `z` +;;; in the DAG rooted at `cell` [c], effectively returning the total storage used by this DAG taking into account +;;; the identification of equal cells. +;;; The values of `x`, `y`, and `z` are computed by a depth-first traversal of this DAG, +;;; with a hash table of visited cell hashes used to prevent visits of already-visited cells. +;;; The total count of visited cells `x` cannot exceed non-negative [max_cells]; +;;; otherwise the computation is aborted before visiting the `(max_cells + 1)`-st cell and +;;; a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. +(int, int, int) compute_data_size(cell c, int max_cells) impure asm "CDATASIZE"; + +;;; Similar to [compute_data_size?], but accepting a `slice` [s] instead of a `cell`. +;;; The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; +;;; however, the data bits and the cell references of [s] are accounted for in `y` and `z`. +(int, int, int) slice_compute_data_size(slice s, int max_cells) impure asm "SDATASIZE"; + +;;; A non-quiet version of [compute_data_size?] that throws a cell overflow exception (`8`) on failure. +(int, int, int, int) compute_data_size?(cell c, int max_cells) asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +;;; A non-quiet version of [slice_compute_data_size?] that throws a cell overflow exception (8) on failure. +(int, int, int, int) slice_compute_data_size?(cell c, int max_cells) asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +;;; Throws an exception with exit_code excno if cond is not 0 (commented since implemented in compilator) +;; () throw_if(int excno, int cond) impure asm "THROWARGIF"; + +{-- + # Debug primitives + Only works for local TVM execution with debug level verbosity +-} +;;; Dumps the stack (at most the top 255 values) and shows the total stack depth. +() dump_stack() impure asm "DUMPSTK"; + +{- + # Persistent storage save and load +-} + +;;; Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. +cell get_data() asm "c4 PUSH"; + +;;; Sets `cell` [c] as persistent contract data. You can update persistent contract storage with this primitive. +() set_data(cell c) impure asm "c4 POP"; + +{- + # Continuation primitives +-} +;;; Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. +;;; The primitive returns the current value of `c3`. +cont get_c3() impure asm "c3 PUSH"; + +;;; Updates the current value of `c3`. Usually, it is used for updating smart contract code in run-time. +;;; Note that after execution of this primitive the current code +;;; (and the stack of recursive function calls) won't change, +;;; but any other function call will use a function from the new code. +() set_c3(cont c) impure asm "c3 POP"; + +;;; Transforms a `slice` [s] into a simple ordinary continuation `c`, with `c.code = s` and an empty stack and savelist. +cont bless(slice s) impure asm "BLESS"; + +{--- + # Gas related primitives +-} + +;;; Sets current gas limit `gl` to its maximal allowed value `gm`, and resets the gas credit `gc` to zero, +;;; decreasing the value of `gr` by `gc` in the process. +;;; In other words, the current smart contract agrees to buy some gas to finish the current transaction. +;;; This action is required to process external messages, which bring no value (hence no gas) with themselves. +;;; +;;; For more details check [accept_message effects](https://ton.org/docs/#/smart-contracts/accept). +() accept_message() impure asm "ACCEPT"; + +;;; Sets current gas limit `gl` to the minimum of limit and `gm`, and resets the gas credit `gc` to zero. +;;; If the gas consumed so far (including the present instruction) exceeds the resulting value of `gl`, +;;; an (unhandled) out of gas exception is thrown before setting new gas limits. +;;; Notice that [set_gas_limit] with an argument `limit ≥ 2^63 − 1` is equivalent to [accept_message]. +() set_gas_limit(int limit) impure asm "SETGASLIMIT"; + +;;; Commits the current state of registers `c4` (“persistent data”) and `c5` (“actions”) +;;; so that the current execution is considered “successful” with the saved values even if an exception +;;; in Computation Phase is thrown later. +() commit() impure asm "COMMIT"; + +;;; Not implemented +;;; Computes the amount of gas that can be bought for `amount` nanoTONs, +;;; and sets `gl` accordingly in the same way as [set_gas_limit]. +;;() buy_gas(int amount) impure asm "BUYGAS"; + +;;; Computes the minimum of two integers [x] and [y]. +int min(int x, int y) asm "MIN"; + +;;; Computes the maximum of two integers [x] and [y]. +int max(int x, int y) asm "MAX"; + +;;; Sorts two integers. +(int, int) minmax(int x, int y) asm "MINMAX"; + +;;; Computes the absolute value of an integer [x]. +int abs(int x) asm "ABS"; + +{- + # Slice primitives + + It is said that a primitive _loads_ some data, + if it returns the data and the remainder of the slice + (so it can also be used as [modifying method](https://ton.org/docs/#/func/statements?id=modifying-methods)). + + It is said that a primitive _preloads_ some data, if it returns only the data + (it can be used as [non-modifying method](https://ton.org/docs/#/func/statements?id=non-modifying-methods)). + + Unless otherwise stated, loading and preloading primitives read the data from a prefix of the slice. +-} + + +;;; Converts a `cell` [c] into a `slice`. Notice that [c] must be either an ordinary cell, +;;; or an exotic cell (see [TVM.pdf](https://ton-blockchain.github.io/docs/tvm.pdf), 3.1.2) +;;; which is automatically loaded to yield an ordinary cell `c'`, converted into a `slice` afterwards. +slice begin_parse(cell c) asm "CTOS"; + +;;; Checks if [s] is empty. If not, throws an exception. +() end_parse(slice s) impure asm "ENDS"; + +;;; Loads the first reference from the slice. +(slice, cell) load_ref(slice s) asm(-> 1 0) "LDREF"; + +;;; Preloads the first reference from the slice. +cell preload_ref(slice s) asm "PLDREF"; + +{- Functions below are commented because are implemented on compilator level for optimisation -} + +;;; Loads a signed [len]-bit integer from a slice [s]. +;; (slice, int) ~load_int(slice s, int len) asm(s len -> 1 0) "LDIX"; + +;;; Loads an unsigned [len]-bit integer from a slice [s]. +;; (slice, int) ~load_uint(slice s, int len) asm( -> 1 0) "LDUX"; + +;;; Preloads a signed [len]-bit integer from a slice [s]. +;; int preload_int(slice s, int len) asm "PLDIX"; + +;;; Preloads an unsigned [len]-bit integer from a slice [s]. +;; int preload_uint(slice s, int len) asm "PLDUX"; + +;;; Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate `slice s''`. +;; (slice, slice) load_bits(slice s, int len) asm(s len -> 1 0) "LDSLICEX"; + +;;; Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate `slice s''`. +;; slice preload_bits(slice s, int len) asm "PLDSLICEX"; + +;;; Loads serialized amount of TonCoins (any unsigned integer up to `2^120 - 1`). +(slice, int) load_grams(slice s) asm(-> 1 0) "LDGRAMS"; +(slice, int) load_coins(slice s) asm(-> 1 0) "LDVARUINT16"; + +(slice, int) load_varint16(slice s) asm(-> 1 0) "LDVARINT16"; +(slice, int) load_varint32(slice s) asm(-> 1 0) "LDVARINT32"; +(slice, int) load_varuint16(slice s) asm(-> 1 0) "LDVARUINT16"; +(slice, int) load_varuint32(slice s) asm(-> 1 0) "LDVARUINT32"; + +;;; Returns all but the first `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice skip_bits(slice s, int len) asm "SDSKIPFIRST"; +(slice, ()) ~skip_bits(slice s, int len) asm "SDSKIPFIRST"; + +;;; Returns the first `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice first_bits(slice s, int len) asm "SDCUTFIRST"; + +;;; Returns all but the last `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice skip_last_bits(slice s, int len) asm "SDSKIPLAST"; +(slice, ()) ~skip_last_bits(slice s, int len) asm "SDSKIPLAST"; + +;;; Returns the last `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice slice_last(slice s, int len) asm "SDCUTLAST"; + +;;; Loads a dictionary `D` (HashMapE) from `slice` [s]. +;;; (returns `null` if `nothing` constructor is used). +(slice, cell) load_dict(slice s) asm(-> 1 0) "LDDICT"; + +;;; Preloads a dictionary `D` from `slice` [s]. +cell preload_dict(slice s) asm "PLDDICT"; + +;;; Loads a dictionary as [load_dict], but returns only the remainder of the slice. +slice skip_dict(slice s) asm "SKIPDICT"; +(slice, ()) ~skip_dict(slice s) asm "SKIPDICT"; + +;;; Loads (Maybe ^Cell) from `slice` [s]. +;;; In other words loads 1 bit and if it is true +;;; loads first ref and return it with slice remainder +;;; otherwise returns `null` and slice remainder +(slice, cell) load_maybe_ref(slice s) asm(-> 1 0) "LDOPTREF"; + +;;; Preloads (Maybe ^Cell) from `slice` [s]. +cell preload_maybe_ref(slice s) asm "PLDOPTREF"; + + +;;; Returns the depth of `cell` [c]. +;;; If [c] has no references, then return `0`; +;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. +;;; If [c] is a `null` instead of a cell, returns zero. +int cell_depth(cell c) asm "CDEPTH"; + + +{- + # Slice size primitives +-} + +;;; Returns the number of references in `slice` [s]. +int slice_refs(slice s) asm "SREFS"; + +;;; Returns the number of data bits in `slice` [s]. +int slice_bits(slice s) asm "SBITS"; + +;;; Returns both the number of data bits and the number of references in `slice` [s]. +(int, int) slice_bits_refs(slice s) asm "SBITREFS"; + +;;; Checks whether a `slice` [s] is empty (i.e., contains no bits of data and no cell references). +int slice_empty?(slice s) asm "SEMPTY"; + +;;; Checks whether `slice` [s] has no bits of data. +int slice_data_empty?(slice s) asm "SDEMPTY"; + +;;; Checks whether `slice` [s] has no references. +int slice_refs_empty?(slice s) asm "SREMPTY"; + +;;; Returns the depth of `slice` [s]. +;;; If [s] has no references, then returns `0`; +;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [s]. +int slice_depth(slice s) asm "SDEPTH"; + +{- + # Builder size primitives +-} + +;;; Returns the number of cell references already stored in `builder` [b] +int builder_refs(builder b) asm "BREFS"; + +;;; Returns the number of data bits already stored in `builder` [b]. +int builder_bits(builder b) asm "BBITS"; + +;;; Returns the depth of `builder` [b]. +;;; If no cell references are stored in [b], then returns 0; +;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [b]. +int builder_depth(builder b) asm "BDEPTH"; + +{- + # Builder primitives + It is said that a primitive _stores_ a value `x` into a builder `b` + if it returns a modified version of the builder `b'` with the value `x` stored at the end of it. + It can be used as [non-modifying method](https://ton.org/docs/#/func/statements?id=non-modifying-methods). + + All the primitives below first check whether there is enough space in the `builder`, + and only then check the range of the value being serialized. +-} + +;;; Creates a new empty `builder`. +builder begin_cell() asm "NEWC"; + +;;; Converts a `builder` into an ordinary `cell`. +cell end_cell(builder b) asm "ENDC"; + +;;; Stores a reference to `cell` [c] into `builder` [b]. +builder store_ref(builder b, cell c) asm(c b) "STREF"; + +;;; Stores an unsigned [len]-bit integer `x` into `b` for `0 ≤ len ≤ 256`. +;; builder store_uint(builder b, int x, int len) asm(x b len) "STUX"; + +;;; Stores a signed [len]-bit integer `x` into `b` for` 0 ≤ len ≤ 257`. +;; builder store_int(builder b, int x, int len) asm(x b len) "STIX"; + + +;;; Stores `slice` [s] into `builder` [b] +builder store_slice(builder b, slice s) asm "STSLICER"; + +;;; Stores (serializes) an integer [x] in the range `0..2^120 − 1` into `builder` [b]. +;;; The serialization of [x] consists of a 4-bit unsigned big-endian integer `l`, +;;; which is the smallest integer `l ≥ 0`, such that `x < 2^8l`, +;;; followed by an `8l`-bit unsigned big-endian representation of [x]. +;;; If [x] does not belong to the supported range, a range check exception is thrown. +;;; +;;; Store amounts of TonCoins to the builder as VarUInteger 16 +builder store_grams(builder b, int x) asm "STGRAMS"; +builder store_coins(builder b, int x) asm "STVARUINT16"; + +builder store_varint16(builder b, int x) asm "STVARINT16"; +builder store_varint32(builder b, int x) asm "STVARINT32"; +builder store_varuint16(builder b, int x) asm "STVARUINT16"; +builder store_varuint32(builder b, int x) asm "STVARUINT32"; + +;;; Stores dictionary `D` represented by `cell` [c] or `null` into `builder` [b]. +;;; In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. +builder store_dict(builder b, cell c) asm(c b) "STDICT"; + +;;; Stores (Maybe ^Cell) to builder: +;;; if cell is null store 1 zero bit +;;; otherwise store 1 true bit and ref to cell +builder store_maybe_ref(builder b, cell c) asm(c b) "STOPTREF"; + + +{- + # Address manipulation primitives + The address manipulation primitives listed below serialize and deserialize values according to the following TL-B scheme: + ```TL-B + addr_none$00 = MsgAddressExt; + addr_extern$01 len:(## 8) external_address:(bits len) + = MsgAddressExt; + anycast_info$_ depth:(#<= 30) { depth >= 1 } + rewrite_pfx:(bits depth) = Anycast; + addr_std$10 anycast:(Maybe Anycast) + workchain_id:int8 address:bits256 = MsgAddressInt; + addr_var$11 anycast:(Maybe Anycast) addr_len:(## 9) + workchain_id:int32 address:(bits addr_len) = MsgAddressInt; + _ _:MsgAddressInt = MsgAddress; + _ _:MsgAddressExt = MsgAddress; + + int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool + src:MsgAddress dest:MsgAddressInt + value:CurrencyCollection ihr_fee:Grams fwd_fee:Grams + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ext_out_msg_info$11 src:MsgAddress dest:MsgAddressExt + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ``` + A deserialized `MsgAddress` is represented by a tuple `t` as follows: + + - `addr_none` is represented by `t = (0)`, + i.e., a tuple containing exactly one integer equal to zero. + - `addr_extern` is represented by `t = (1, s)`, + where slice `s` contains the field `external_address`. In other words, ` + t` is a pair (a tuple consisting of two entries), containing an integer equal to one and slice `s`. + - `addr_std` is represented by `t = (2, u, x, s)`, + where `u` is either a `null` (if `anycast` is absent) or a slice `s'` containing `rewrite_pfx` (if anycast is present). + Next, integer `x` is the `workchain_id`, and slice `s` contains the address. + - `addr_var` is represented by `t = (3, u, x, s)`, + where `u`, `x`, and `s` have the same meaning as for `addr_std`. +-} + +;;; Loads from slice [s] the only prefix that is a valid `MsgAddress`, +;;; and returns both this prefix `s'` and the remainder `s''` of [s] as slices. +(slice, slice) load_msg_addr(slice s) asm(-> 1 0) "LDMSGADDR"; + +;;; Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. +;;; If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. +tuple parse_addr(slice s) asm "PARSEMSGADDR"; + +;;; Parses slice [s] containing a valid `MsgAddressInt` (usually a `msg_addr_std`), +;;; applies rewriting from the anycast (if present) to the same-length prefix of the address, +;;; and returns both the workchain and the 256-bit address as integers. +;;; If the address is not 256-bit, or if [s] is not a valid serialization of `MsgAddressInt`, +;;; throws a cell deserialization exception. +(int, int) parse_std_addr(slice s) asm "REWRITESTDADDR"; + +;;; A variant of [parse_std_addr] that returns the (rewritten) address as a slice [s], +;;; even if it is not exactly 256 bit long (represented by a `msg_addr_var`). +(int, slice) parse_var_addr(slice s) asm "REWRITEVARADDR"; + +{- + # Dictionary primitives +-} + + +;;; Sets the value associated with [key_len]-bit key signed index in dictionary [dict] to [value] (cell), +;;; and returns the resulting dictionary. +cell idict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETREF"; +(cell, ()) ~idict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETREF"; + +;;; Sets the value associated with [key_len]-bit key unsigned index in dictionary [dict] to [value] (cell), +;;; and returns the resulting dictionary. +cell udict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETREF"; +(cell, ()) ~udict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETREF"; + +cell idict_get_ref(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGETOPTREF"; +(cell, int) idict_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGETREF" "NULLSWAPIFNOT"; +(cell, int) udict_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUGETREF" "NULLSWAPIFNOT"; +(cell, cell) idict_set_get_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETGETOPTREF"; +(cell, cell) udict_set_get_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETGETOPTREF"; +(cell, int) idict_delete?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDEL"; +(cell, int) udict_delete?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDEL"; +(slice, int) idict_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGET" "NULLSWAPIFNOT"; +(slice, int) udict_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUGET" "NULLSWAPIFNOT"; +(cell, slice, int) idict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; +(cell, slice, int) udict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~idict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~udict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; +(cell, cell, int) idict_delete_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGETREF" "NULLSWAPIFNOT"; +(cell, cell, int) udict_delete_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGETREF" "NULLSWAPIFNOT"; +(cell, (cell, int)) ~idict_delete_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGETREF" "NULLSWAPIFNOT"; +(cell, (cell, int)) ~udict_delete_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGETREF" "NULLSWAPIFNOT"; +cell udict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUSET"; +(cell, ()) ~udict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUSET"; +cell idict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTISET"; +(cell, ()) ~idict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTISET"; +cell dict_set(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTSET"; +(cell, ()) ~dict_set(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTSET"; +(cell, int) udict_add?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUADD"; +(cell, int) udict_replace?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUREPLACE"; +(cell, int) udict_replace_ref?(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUREPLACEREF"; +(cell, slice, int) udict_replaceget?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUREPLACEGET" "NULLSWAPIFNOT"; +(cell, cell, int) udict_replaceget_ref?(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUREPLACEGETREF" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~udict_replaceget?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUREPLACEGET" "NULLSWAPIFNOT"; +(cell, (cell, int)) ~udict_replaceget_ref?(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUREPLACEGETREF" "NULLSWAPIFNOT"; +(cell, int) idict_add?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIADD"; +(cell, int) idict_replace?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIREPLACE"; +(cell, int) idict_replace_ref?(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTIREPLACEREF"; +(cell, slice, int) idict_replaceget?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIREPLACEGET" "NULLSWAPIFNOT"; +(cell, cell, int) idict_replaceget_ref?(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTIREPLACEGETREF" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~idict_replaceget?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIREPLACEGET" "NULLSWAPIFNOT"; +(cell, (cell, int)) ~idict_replaceget_ref?(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTIREPLACEGETREF" "NULLSWAPIFNOT"; +cell udict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUSETB"; +(cell, ()) ~udict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUSETB"; +cell idict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTISETB"; +(cell, ()) ~idict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTISETB"; +cell dict_set_builder(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTSETB"; +(cell, ()) ~dict_set_builder(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTSETB"; +(cell, int) dict_replace_builder?(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTREPLACEB"; +(cell, builder, int) dict_replaceget_builder?(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTREPLACEGETB" "NULLSWAPIFNOT"; +(cell, slice, int) dict_replaceget?(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTREPLACEGET" "NULLSWAPIFNOT"; +(cell, (builder, int)) ~dict_replaceget_builder?(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTREPLACEGETB" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~dict_replaceget?(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTREPLACEGET" "NULLSWAPIFNOT"; +(cell, int) udict_add_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUADDB"; +(cell, int) udict_replace_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUREPLACEB"; +(cell, builder, int) udict_replaceget_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUREPLACEGETB" "NULLSWAPIFNOT"; +(cell, (builder, int)) ~udict_replaceget_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUREPLACEGETB" "NULLSWAPIFNOT"; +(cell, int) idict_add_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIADDB"; +(cell, int) idict_replace_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIREPLACEB"; +(cell, builder, int) idict_replaceget_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIREPLACEGETB" "NULLSWAPIFNOT"; +(cell, (builder, int)) ~idict_replaceget_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIREPLACEGETB" "NULLSWAPIFNOT"; +(cell, int, slice, int) udict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~udict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; +(cell, int, slice, int) idict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~idict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; +(cell, slice, slice, int) dict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; +(cell, (slice, slice, int)) ~dict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; +(cell, int, slice, int) udict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~udict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; +(cell, int, slice, int) idict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~idict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; +(cell, slice, slice, int) dict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; +(cell, (slice, slice, int)) ~dict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_min?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_max?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; +(int, cell, int) udict_get_min_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; +(int, cell, int) udict_get_max_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_min?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_max?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; +(int, cell, int) idict_get_min_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; +(int, cell, int) idict_get_max_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_next?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_nexteq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_prev?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_preveq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_next?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_nexteq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_prev?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_preveq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; + +;;; Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL +cell new_dict() asm "NEWDICT"; +;;; Checks whether a dictionary is empty. Equivalent to cell_null?. +int dict_empty?(cell c) asm "DICTEMPTY"; + + +{- Prefix dictionary primitives -} +(slice, slice, slice, int) pfxdict_get?(cell dict, int key_len, slice key) asm(key dict key_len) "PFXDICTGETQ" "NULLSWAPIFNOT2"; +(cell, int) pfxdict_set?(cell dict, int key_len, slice key, slice value) asm(value key dict key_len) "PFXDICTSET"; +(cell, int) pfxdict_delete?(cell dict, int key_len, slice key) asm(key dict key_len) "PFXDICTDEL"; + +;;; Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. +cell config_param(int x) asm "CONFIGOPTPARAM"; +;;; Checks whether c is a null. Note, that FunC also has polymorphic null? built-in. +int cell_null?(cell c) asm "ISNULL"; + +;;; Creates an output action which would reserve exactly amount nanotoncoins (if mode = 0), at most amount nanotoncoins (if mode = 2), or all but amount nanotoncoins (if mode = 1 or mode = 3), from the remaining balance of the account. It is roughly equivalent to creating an outbound message carrying amount nanotoncoins (or b − amount nanotoncoins, where b is the remaining balance) to oneself, so that the subsequent output actions would not be able to spend more money than the remainder. Bit +2 in mode means that the external action does not fail if the specified amount cannot be reserved; instead, all remaining balance is reserved. Bit +8 in mode means `amount <- -amount` before performing any further actions. Bit +4 in mode means that amount is increased by the original balance of the current account (before the compute phase), including all extra currencies, before performing any other checks and actions. Currently, amount must be a non-negative integer, and mode must be in the range 0..15. +() raw_reserve(int amount, int mode) impure asm "RAWRESERVE"; +;;; Similar to raw_reserve, but also accepts a dictionary extra_amount (represented by a cell or null) with extra currencies. In this way currencies other than TonCoin can be reserved. +() raw_reserve_extra(int amount, cell extra_amount, int mode) impure asm "RAWRESERVEX"; +;;; Sends a raw message contained in msg, which should contain a correctly serialized object Message X, with the only exception that the source address is allowed to have dummy value addr_none (to be automatically replaced with the current smart contract address), and ihr_fee, fwd_fee, created_lt and created_at fields can have arbitrary values (to be rewritten with correct values during the action phase of the current transaction). Integer parameter mode contains the flags. Currently mode = 0 is used for ordinary messages; mode = 128 is used for messages that are to carry all the remaining balance of the current smart contract (instead of the value originally indicated in the message); mode = 64 is used for messages that carry all the remaining value of the inbound message in addition to the value initially indicated in the new message (if bit 0 is not set, the gas fees are deducted from this amount); mode' = mode + 1 means that the sender wants to pay transfer fees separately; mode' = mode + 2 means that any errors arising while processing this message during the action phase should be ignored. Finally, mode' = mode + 32 means that the current account must be destroyed if its resulting balance is zero. This flag is usually employed together with +128. +() send_raw_message(cell msg, int mode) impure asm "SENDRAWMSG"; +;;; Creates an output action that would change this smart contract code to that given by cell new_code. Notice that this change will take effect only after the successful termination of the current run of the smart contract +() set_code(cell new_code) impure asm "SETCODE"; + +;;; Generates a new pseudo-random unsigned 256-bit integer x. The algorithm is as follows: if r is the old value of the random seed, considered as a 32-byte array (by constructing the big-endian representation of an unsigned 256-bit integer), then its sha512(r) is computed; the first 32 bytes of this hash are stored as the new value r' of the random seed, and the remaining 32 bytes are returned as the next random value x. +int random() impure asm "RANDU256"; +;;; Generates a new pseudo-random integer z in the range 0..range−1 (or range..−1, if range < 0). More precisely, an unsigned random value x is generated as in random; then z := x * range / 2^256 is computed. +int rand(int range) impure asm "RAND"; +;;; Returns the current random seed as an unsigned 256-bit Integer. +int get_seed() impure asm "RANDSEED"; +;;; Sets the random seed to unsigned 256-bit seed. +() set_seed(int x) impure asm "SETRAND"; +;;; Mixes unsigned 256-bit integer x into the random seed r by setting the random seed to sha256 of the concatenation of two 32-byte strings: the first with the big-endian representation of the old seed r, and the second with the big-endian representation of x. +() randomize(int x) impure asm "ADDRAND"; +;;; Equivalent to randomize(cur_lt());. +() randomize_lt() impure asm "LTIME" "ADDRAND"; + +;;; Checks whether the data parts of two slices coinside +int equal_slices_bits(slice a, slice b) asm "SDEQ"; +;;; Checks whether b is a null. Note, that FunC also has polymorphic null? built-in. +int builder_null?(builder b) asm "ISNULL"; +;;; Concatenates two builders +builder store_builder(builder to, builder from) asm "STBR"; + +;; CUSTOM: + +;; TVM UPGRADE 2023-07 https://docs.ton.org/learn/tvm-instructions/tvm-upgrade-2023-07 +;; In mainnet since 20 Dec 2023 https://t.me/tonblockchain/226 + +;;; Retrieves code of smart-contract from c7 + +cell my_code() asm "MYCODE"; From 99b78f78d7d05f3e7971d976e9fdb16db3e1c3b2 Mon Sep 17 00:00:00 2001 From: Marat <98183742+dungeon-master-666@users.noreply.github.com> Date: Mon, 27 Jan 2025 08:20:20 +0100 Subject: [PATCH 29/61] build fix (#1495) --- emulator/emulator-emscripten.cpp | 2 +- emulator/emulator_export_list | 1 + emulator/test/emulator-tests.cpp | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/emulator/emulator-emscripten.cpp b/emulator/emulator-emscripten.cpp index e5d4e42d..efb14eff 100644 --- a/emulator/emulator-emscripten.cpp +++ b/emulator/emulator-emscripten.cpp @@ -115,7 +115,7 @@ td::Result decode_get_method_params(const char* json) { return td::Status::Error("EC must be of type Object"); } td::StringBuilder ec_builder; - auto ec_obj = ec_field.get_object(); + auto& ec_obj = ec_field.get_object(); bool is_first = true; for (auto &field_value : ec_obj) { auto currency_id = field_value.first; diff --git a/emulator/emulator_export_list b/emulator/emulator_export_list index feb653e2..bd991cd7 100644 --- a/emulator/emulator_export_list +++ b/emulator/emulator_export_list @@ -17,6 +17,7 @@ _emulator_config_destroy _tvm_emulator_create _tvm_emulator_set_libraries _tvm_emulator_set_c7 +_tvm_emulator_set_extra_currencies _tvm_emulator_set_config_object _tvm_emulator_set_prev_blocks_info _tvm_emulator_set_gas_limit diff --git a/emulator/test/emulator-tests.cpp b/emulator/test/emulator-tests.cpp index a0be447f..ae273ddf 100644 --- a/emulator/test/emulator-tests.cpp +++ b/emulator/test/emulator-tests.cpp @@ -445,7 +445,7 @@ TEST(Emulator, tvm_emulator_extra_currencies) { auto it = dict.begin(); std::map ec_balance; while (!it.eof()) { - auto id = td::BitArray<32>(it.cur_pos()).to_ulong(); + auto id = static_cast(td::BitArray<32>(it.cur_pos()).to_ulong()); auto value_cs = it.cur_value(); auto value = block::tlb::t_VarUInteger_32.as_integer(value_cs); ec_balance[id] = value; From ed88f55a3daedef0c4bc607b6594c060a05bd0e4 Mon Sep 17 00:00:00 2001 From: tuminzee <58180803+tuminzee@users.noreply.github.com> Date: Mon, 27 Jan 2025 13:05:56 +0530 Subject: [PATCH 30/61] fix broken link (#1497) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d0aa8cb3..897ba809 100644 --- a/README.md +++ b/README.md @@ -47,9 +47,9 @@ Main TON monorepo, which includes the code of the node/validator, lite-client, t __The Open Network (TON)__ is a fast, secure, scalable blockchain focused on handling _millions of transactions per second_ (TPS) with the goal of reaching hundreds of millions of blockchain users. - To learn more about different aspects of TON blockchain and its underlying ecosystem check [documentation](https://ton.org/docs) - To run node, validator or lite-server check [Participate section](https://ton.org/docs/participate/nodes/run-node) -- To develop decentralised apps check [Tutorials](https://ton.org/docs/develop/smart-contracts/), [FunC docs](https://ton.org/docs/develop/func/overview) and [DApp tutorials](https://ton.org/docs/develop/dapps/) +- To develop decentralised apps check [Tutorials](https://docs.ton.org/v3/guidelines/smart-contracts/guidelines), [FunC docs](https://ton.org/docs/develop/func/overview) and [DApp tutorials](https://docs.ton.org/v3/guidelines/dapps/overview) - To work on TON check [wallets](https://ton.app/wallets), [explorers](https://ton.app/explorers), [DEXes](https://ton.app/dex) and [utilities](https://ton.app/utilities) -- To interact with TON check [APIs](https://ton.org/docs/develop/dapps/apis/) +- To interact with TON check [APIs](https://docs.ton.org/v3/guidelines/dapps/apis-sdks/overview) ## Updates flow From 2a02b547868a4eb2fa69f44d4197ec46e919dc8d Mon Sep 17 00:00:00 2001 From: Andrey Tvorozhkov Date: Mon, 27 Jan 2025 14:22:00 +0500 Subject: [PATCH 31/61] Fix `advance_ext` (#746) --- crypto/vm/cells/CellSlice.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/vm/cells/CellSlice.cpp b/crypto/vm/cells/CellSlice.cpp index 466bcd8d..4d8c3c5a 100644 --- a/crypto/vm/cells/CellSlice.cpp +++ b/crypto/vm/cells/CellSlice.cpp @@ -264,7 +264,7 @@ bool CellSlice::advance_ext(unsigned bits, unsigned refs) { } bool CellSlice::advance_ext(unsigned bits_refs) { - return advance_ext(bits_refs >> 16, bits_refs & 0xffff); + return advance_ext(bits_refs & 0xffff, bits_refs >> 16); } // (PRIVATE) From 8ffa3dd9dcab9135fe14a02f712c315bdd3079ed Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Mon, 27 Jan 2025 09:55:00 +0000 Subject: [PATCH 32/61] Fix printing TLB NatWidth (#1501) --- crypto/tl/tlblib.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/tl/tlblib.cpp b/crypto/tl/tlblib.cpp index ee05d371..05ea8e1c 100644 --- a/crypto/tl/tlblib.cpp +++ b/crypto/tl/tlblib.cpp @@ -45,7 +45,7 @@ bool Bool::print_skip(PrettyPrinter& pp, vm::CellSlice& cs) const { } bool NatWidth::print_skip(PrettyPrinter& pp, vm::CellSlice& cs) const { - long long value = (long long)cs.fetch_ulong(32); + long long value = (long long)cs.fetch_ulong(n); return value >= 0 && pp.out_int(value); } From 6f1feb43d5941f964e40691be8185c747f385793 Mon Sep 17 00:00:00 2001 From: EmelyanenkoK Date: Mon, 27 Jan 2025 12:58:54 +0300 Subject: [PATCH 33/61] Update Changelogs --- Changelog.md | 14 ++++++++++++++ recent_changelog.md | 21 ++++++++++----------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Changelog.md b/Changelog.md index fd513bc8..fa713e7e 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,3 +1,17 @@ +## 2025.02 Update +1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersion.md](./doc/GlobalVersion.md) +2. Fix for better discovery of updated nodes' (validators') IPs: retry dht queries +3. Series of improvements for extra currency adoption: fixed c7 in rungetmethod, reserve modes +4. TVM Fix: saving ret on deep jump +5. A few fixes of tl-b schemes: crc computation, incorrect tag for merkle proofs, advance_ext, NatWidth print +6. Emulator improvements: fix setting libraries, extracurrency support +7. Increase of gas limit for unlocking highload-v2 wallets locked in the beginning of 2024 +8. Validator console improvement: dashed names, better shard formats + + +Besides the work of the core team, this update is based on the efforts of @dbaranovstonfi from StonFi(libraries in emulator), @Rexagon (ret on deep jumps), @tvorogme from DTon (`advance_ext`), Nan from Zellic (`stk_und` and JNI) + + ## 2024.12 Update 1. FunC 0.4.6: Fix in try/catch handling, fixing pure flag for functions stored in variables diff --git a/recent_changelog.md b/recent_changelog.md index cc877c2c..0ec08583 100644 --- a/recent_changelog.md +++ b/recent_changelog.md @@ -1,13 +1,12 @@ -## 2024.12 Update +## 2025.02 Update +1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersion.md](./doc/GlobalVersion.md) +2. Fix for better discovery of updated nodes' (validators') IPs: retry dht queries +3. Series of improvements for extra currency adoption: fixed c7 in rungetmethod, reserve modes +4. TVM Fix: saving ret on deep jump +5. A few fixes of tl-b schemes: crc computation, incorrect tag for merkle proofs, advance_ext, NatWidth print +6. Emulator improvements: fix setting libraries, extracurrency support +7. Increase of gas limit for unlocking highload-v2 wallets locked in the beginning of 2024 +8. Validator console improvement: dashed names, better shard formats -1. FunC 0.4.6: Fix in try/catch handling, fixing pure flag for functions stored in variables -2. Merging parts of Accelerator: support of specific shard monitoring, archive/liteserver slice format, support for partial liteservers, proxy liteserver, on-demand neighbour queue loading -3. Fix of asynchronous cell loading -4. Various improvements: caching certificates checks, better block overloading detection, `_malloc` in emulator -5. Introduction of telemetry in overlays -6. Use non-null local-id for tonlib-LS interaction - mitigates MitM attack. -7. Adding `SECP256K1_XONLY_PUBKEY_TWEAK_ADD`, `SETCONTCTRMANY` instructions to TVM (activated by `Config8.version >= 9`) -8. Private keys export via validator-engine-console - required for better backups -9. Fix proof checking in tonlib, `hash` in `raw.Message` in tonlib_api -Besides the work of the core team, this update is based on the efforts of OtterSec and LayerZero (FunC), tg:@throwunless (FunC), Aviv Frenkel and Dima Kogan from Fordefi (LS MitM), @hacker-volodya (Tonlib), OKX team (async cell loading), @krigga (emulator) +Besides the work of the core team, this update is based on the efforts of @dbaranovstonfi from StonFi(libraries in emulator), @Rexagon (ret on deep jumps), @tvorogme from DTon (`advance_ext`), Nan from Zellic (`stk_und` and JNI) From 294db6922717b88689fae4797804f953008cdb19 Mon Sep 17 00:00:00 2001 From: EmelyanenkoK Date: Mon, 27 Jan 2025 14:33:52 +0300 Subject: [PATCH 34/61] Fix typos in changelog --- Changelog.md | 5 ++--- recent_changelog.md | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Changelog.md b/Changelog.md index fa713e7e..34195f74 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,8 +1,8 @@ ## 2025.02 Update -1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersion.md](./doc/GlobalVersion.md) +1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersions.md](./doc/GlobalVersions.md) 2. Fix for better discovery of updated nodes' (validators') IPs: retry dht queries 3. Series of improvements for extra currency adoption: fixed c7 in rungetmethod, reserve modes -4. TVM Fix: saving ret on deep jump +4. TVM: Fix processing continuation control data on deep jump 5. A few fixes of tl-b schemes: crc computation, incorrect tag for merkle proofs, advance_ext, NatWidth print 6. Emulator improvements: fix setting libraries, extracurrency support 7. Increase of gas limit for unlocking highload-v2 wallets locked in the beginning of 2024 @@ -11,7 +11,6 @@ Besides the work of the core team, this update is based on the efforts of @dbaranovstonfi from StonFi(libraries in emulator), @Rexagon (ret on deep jumps), @tvorogme from DTon (`advance_ext`), Nan from Zellic (`stk_und` and JNI) - ## 2024.12 Update 1. FunC 0.4.6: Fix in try/catch handling, fixing pure flag for functions stored in variables diff --git a/recent_changelog.md b/recent_changelog.md index 0ec08583..dfa39aa6 100644 --- a/recent_changelog.md +++ b/recent_changelog.md @@ -1,8 +1,8 @@ ## 2025.02 Update -1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersion.md](./doc/GlobalVersion.md) +1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersions.md](./doc/GlobalVersions.md) 2. Fix for better discovery of updated nodes' (validators') IPs: retry dht queries 3. Series of improvements for extra currency adoption: fixed c7 in rungetmethod, reserve modes -4. TVM Fix: saving ret on deep jump +4. TVM: Fix processing continuation control data on deep jump 5. A few fixes of tl-b schemes: crc computation, incorrect tag for merkle proofs, advance_ext, NatWidth print 6. Emulator improvements: fix setting libraries, extracurrency support 7. Increase of gas limit for unlocking highload-v2 wallets locked in the beginning of 2024 From c720204199d1615f1cffa00db6d376830dc8603d Mon Sep 17 00:00:00 2001 From: Marat <98183742+dungeon-master-666@users.noreply.github.com> Date: Mon, 27 Jan 2025 12:34:21 +0100 Subject: [PATCH 35/61] Fix BUILD_SHARED_LIBS issue (#1496) --- CMakeLists.txt | 1 + emulator/CMakeLists.txt | 16 ++++++---------- tonlib/CMakeLists.txt | 12 +++++------- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index af264036..cea3fc7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,6 +84,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED TRUE) set(CMAKE_CXX_EXTENSIONS FALSE) #BEGIN internal +option(BUILD_SHARED_LIBS "Use \"ON\" to build shared libraries instead of static where it's not specified (not recommended)" OFF) option(USE_EMSCRIPTEN "Use \"ON\" for config building wasm." OFF) option(TON_ONLY_TONLIB "Use \"ON\" to build only tonlib." OFF) if (USE_EMSCRIPTEN) diff --git a/emulator/CMakeLists.txt b/emulator/CMakeLists.txt index 66d8309a..a0799541 100644 --- a/emulator/CMakeLists.txt +++ b/emulator/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) -option(BUILD_SHARED_LIBS "Use \"OFF\" for a static build." ON) - if (NOT OPENSSL_FOUND) find_package(OpenSSL REQUIRED) endif() @@ -11,11 +9,6 @@ set(EMULATOR_STATIC_SOURCE tvm-emulator.hpp ) -set(EMULATOR_HEADERS - transaction-emulator.h - emulator-extern.h -) - set(EMULATOR_SOURCE emulator-extern.cpp ) @@ -29,10 +22,10 @@ include(GenerateExportHeader) add_library(emulator_static STATIC ${EMULATOR_STATIC_SOURCE}) target_link_libraries(emulator_static PUBLIC ton_crypto smc-envelope) -if (NOT USE_EMSCRIPTEN AND BUILD_SHARED_LIBS) - add_library(emulator SHARED ${EMULATOR_SOURCE} ${EMULATOR_HEADERS}) +if (USE_EMSCRIPTEN) + add_library(emulator STATIC ${EMULATOR_SOURCE}) else() - add_library(emulator STATIC ${EMULATOR_SOURCE} ${EMULATOR_HEADERS}) + add_library(emulator SHARED ${EMULATOR_SOURCE}) endif() if (PORTABLE AND NOT APPLE) @@ -42,6 +35,9 @@ else() endif() generate_export_header(emulator EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/emulator_export.h) +if (USE_EMSCRIPTEN) + target_compile_definitions(emulator PUBLIC EMULATOR_STATIC_DEFINE) +endif() target_include_directories(emulator PUBLIC $ $) diff --git a/tonlib/CMakeLists.txt b/tonlib/CMakeLists.txt index 9a56e511..eb538361 100644 --- a/tonlib/CMakeLists.txt +++ b/tonlib/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) -option(BUILD_SHARED_LIBS "Use \"OFF\" for a static build." ON) - if (NOT OPENSSL_FOUND) find_package(OpenSSL REQUIRED) endif() @@ -92,10 +90,10 @@ set(TONLIB_JSON_HEADERS tonlib/tonlib_client_json.h) set(TONLIB_JSON_SOURCE tonlib/tonlib_client_json.cpp) include(GenerateExportHeader) -if (NOT USE_EMSCRIPTEN AND BUILD_SHARED_LIBS) - add_library(tonlibjson SHARED ${TONLIB_JSON_SOURCE} ${TONLIB_JSON_HEADERS}) +if (USE_EMSCRIPTEN) + add_library(tonlibjson STATIC ${TONLIB_JSON_SOURCE}) else() - add_library(tonlibjson STATIC ${TONLIB_JSON_SOURCE} ${TONLIB_JSON_HEADERS}) + add_library(tonlibjson SHARED ${TONLIB_JSON_SOURCE}) endif() if (PORTABLE AND NOT APPLE) @@ -105,7 +103,7 @@ else() endif() generate_export_header(tonlibjson EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/tonlib/tonlibjson_export.h) -if (!BUILD_SHARED_LIBS) +if (USE_EMSCRIPTEN) target_compile_definitions(tonlibjson PUBLIC TONLIBJSON_STATIC_DEFINE) endif() target_include_directories(tonlibjson PUBLIC @@ -159,7 +157,7 @@ endif() install(FILES ${TONLIB_JSON_HEADERS} ${CMAKE_CURRENT_BINARY_DIR}/tonlib/tonlibjson_export.h DESTINATION include/tonlib/) -if (NOT USE_EMSCRIPTEN AND BUILD_SHARED_LIBS) +if (NOT USE_EMSCRIPTEN) install(EXPORT Tonlib FILE TonlibTargets.cmake NAMESPACE Tonlib:: From 989629a832bd37faa89dedb9b0db29b0e0edc168 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Wed, 15 Jan 2025 01:41:15 +0700 Subject: [PATCH 36/61] [Tolk] Compiler built-in `__expect_type()` for testing purposes Currently, tolk-tester can test various "output" of the compiler: pass input and check output, validate fif codegen, etc. But it can not test compiler internals and AST representation. I've added an ability to have special functions to check/expose internal compiler state. The first (and the only now) is: > __expect_type(some_expr, ""); Such a call has special treatment in a compilation process. Compilation fails if this expression doesn't have requested type. It's intended to be used in tests only. Not present in stdlib. --- tolk-tester/tests/a6.tolk | 3 + tolk-tester/tests/generics-1.tolk | 5 +- tolk-tester/tests/inference-tests.tolk | 94 ++++++++++++++++++++++++++ tolk/builtins.cpp | 7 ++ tolk/lexer.cpp | 11 ++- tolk/lexer.h | 1 + tolk/pipe-infer-types-and-calls.cpp | 21 ++++++ tolk/type-system.cpp | 11 +-- tolk/type-system.h | 1 + 9 files changed, 147 insertions(+), 7 deletions(-) create mode 100644 tolk-tester/tests/inference-tests.tolk diff --git a/tolk-tester/tests/a6.tolk b/tolk-tester/tests/a6.tolk index 32fd3364..94494546 100644 --- a/tolk-tester/tests/a6.tolk +++ b/tolk-tester/tests/a6.tolk @@ -1,6 +1,9 @@ fun f(a: int, b: int, c: int, d: int, e: int, f: int): (int, int) { // solve a 2x2 linear equation var D: int = a*d - b*c;;;; var Dx: int = e*d-b*f ;;;; var Dy: int = a * f - e * c; + __expect_type(D, "int"); + __expect_type(D*D, "int"); + __expect_type(calc_phi, "() -> int"); return (Dx/D,Dy/D); };;;; diff --git a/tolk-tester/tests/generics-1.tolk b/tolk-tester/tests/generics-1.tolk index 0d872cc1..5a649a44 100644 --- a/tolk-tester/tests/generics-1.tolk +++ b/tolk-tester/tests/generics-1.tolk @@ -41,10 +41,12 @@ fun manyEq(a: T1, b: T2, c: T3): [T1, T2, T3] { @method_id(104) fun test104(f: int) { - return ( + var result = ( manyEq(1 ? 1 : 1, f ? 0 : null, !f ? getTwo() as int : null), manyEq((f ? null as int : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool()), 0, eq4(f)) ); + __expect_type(result, "([int, int, int], [(int, bool), int, int])"); + return result; } fun calcSum(x: X, y: X) { return x + y; } @@ -68,6 +70,7 @@ fun abstractTransform(xToY: (X) -> Y, yToR: (((Y))) -> R, initialX: X): @method_id(106) fun test106() { var c = beginCell().storeInt(106, 32).endCell(); + __expect_type(calcYPlus1, "(int) -> int"); return [ abstractTransform(cellToSlice, calcLoad32, c), abstractTransform(calcYPlus1, calcYPlus1, 0), diff --git a/tolk-tester/tests/inference-tests.tolk b/tolk-tester/tests/inference-tests.tolk new file mode 100644 index 00000000..3d451581 --- /dev/null +++ b/tolk-tester/tests/inference-tests.tolk @@ -0,0 +1,94 @@ +// the goal of this file is not only to @testcase results — +// but to check that this file compiles + +fun eq(value: X): X { return value; } + +fun test1(x: int, y: int) { + __expect_type(0, "int"); + __expect_type("0"c, "int"); + __expect_type(x, "int"); + __expect_type(x + y, "int"); + __expect_type(x * y, "int"); + __expect_type(x & y, "int"); + __expect_type(x << y, "int"); + __expect_type((((x))), "int"); + __expect_type(x = x, "int"); + __expect_type(x += x, "int"); + __expect_type(x &= x, "int"); + __expect_type(random() ? x : y, "int"); + __expect_type(eq(x), "int"); + __expect_type(eq(x), "int"); + __expect_type(eq(null), "int"); + __expect_type(x as int, "int"); + __expect_type(+x, "int"); + __expect_type(~x, "int"); + { + var x: slice = beginCell().endCell().beginParse(); + __expect_type(x, "slice"); + __expect_type(beginCell(), "builder"); + __expect_type(beginCell().endCell(), "cell"); + } +} + +fun test2(x: int, y: bool) { + __expect_type(!x, "bool"); + __expect_type(x != x, "bool"); + __expect_type(x <= x, "bool"); + __expect_type(x <=> x, "bool"); + __expect_type(x <=> x, "bool"); + __expect_type(!random(), "bool"); + __expect_type(!!(x != null), "bool"); + __expect_type(x ? x != null : null == x, "bool"); + __expect_type(y & true, "bool"); + __expect_type(y ^= false, "bool"); + __expect_type(x && y, "bool"); + __expect_type(true && false && true, "bool"); + __expect_type(x || x, "bool"); + __expect_type(x || !x || (true & false), "bool"); +} + +fun test3() { + __expect_type(true as int, "int"); + __expect_type(!random() as int, "int"); +} + +fun test4(x: int) { + __expect_type((), "()"); + __expect_type((x, x), "(int, int)"); + __expect_type((x, (x, x), x), "(int, (int, int), int)"); +} + +fun test5(x: int) { + __expect_type([], "[]"); + __expect_type([x], "[int]"); + __expect_type([x, x >= 1], "[int, bool]"); + __expect_type([x, x >= 1, null as slice], "[int, bool, slice]"); + __expect_type((x, [x], [[x], x]), "(int, [int], [[int], int])"); + __expect_type(getMyOriginalBalanceWithExtraCurrencies(), "[int, cell]"); +} + +fun test6() { + var t = createEmptyTuple(); + __expect_type(t, "tuple"); + t.tuplePush(1); + __expect_type(t, "tuple"); +} + +fun test7() { + __expect_type(test3(), "void"); + __expect_type(test3, "() -> void"); + var cb = test1; + __expect_type(cb, "(int, int) -> void"); + var t = createEmptyTuple(); + __expect_type(beginCell().endCell, "(builder) -> cell"); + // __expect_type(eq<(int, slice)>, "(int, slice) -> (int, slice)"); +} + + +fun main() { + return 0; +} + +/** +@testcase | 0 | | 0 +*/ diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index d704ec4d..73aef2d9 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -1255,6 +1255,13 @@ void define_builtins() { define_builtin_func("debugDumpStack", {}, Unit, nullptr, AsmOp::Custom("DUMPSTK", 0, 0), 0); + + // functions not presented in stdlib at all + // used in tolk-tester to check/expose internal compiler state + // each of them is handled in a special way, search by its name + define_builtin_func("__expect_type", {TypeDataUnknown::create(), Slice}, Unit, nullptr, + AsmOp::Nop(), + FunctionData::flagMarkedAsPure); } } // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 7e8c8fb2..78ec991e 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -555,6 +555,15 @@ Lexer::Lexer(const SrcFile* file) next(); } +Lexer::Lexer(std::string_view text) + : file(nullptr) + , p_start(text.data()) + , p_end(p_start + text.size()) + , p_next(p_start) + , location() { + next(); +} + void Lexer::next() { while (cur_token_idx == last_token_idx && !is_eof()) { update_location(); @@ -563,7 +572,7 @@ void Lexer::next() { } } if (is_eof()) { - add_token(tok_eof, file->text); + add_token(tok_eof, ""); } cur_token = tokens_circularbuf[++cur_token_idx & 7]; } diff --git a/tolk/lexer.h b/tolk/lexer.h index 81d579db..9dbfe3b6 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -173,6 +173,7 @@ public: }; explicit Lexer(const SrcFile* file); + explicit Lexer(std::string_view text); Lexer(const Lexer&) = delete; Lexer &operator=(const Lexer&) = delete; diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index d8a7d41b..011c83d7 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -240,6 +240,24 @@ public: TypePtr get_result() const { return unified_result; } }; +// handle __expect_type(expr, "type") call +// this is used in compiler tests +GNU_ATTRIBUTE_NOINLINE GNU_ATTRIBUTE_COLD +static void handle_possible_compiler_internal_call(const FunctionData* current_function, V v) { + const FunctionData* fun_ref = v->fun_maybe; + tolk_assert(fun_ref && fun_ref->is_builtin_function()); + static_cast(current_function); + + if (fun_ref->name == "__expect_type") { + tolk_assert(v->get_num_args() == 2); + TypePtr expected_type = parse_type_from_string(v->get_arg(1)->get_expr()->as()->str_val); + TypePtr expr_type = v->get_arg(0)->inferred_type; + if (expected_type != expr_type) { + v->error("__expect_type failed: expected " + to_string(expected_type) + ", got " + to_string(expr_type)); + } + } +} + /* * This class handles all types of AST vertices and traverses them, filling all AnyExprV::inferred_type. * Note, that it isn't derived from ASTVisitor, it has manual `switch` over all existing vertex types. @@ -974,6 +992,9 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { TypePtr inferred_type = dot_obj && fun_ref->does_return_self() ? dot_obj->inferred_type : fun_ref->inferred_return_type; assign_inferred_type(v, inferred_type); assign_inferred_type(callee, fun_ref->inferred_full_type); + if (fun_ref->is_builtin_function() && fun_ref->name[0] == '_') { + handle_possible_compiler_internal_call(current_function, v); + } // note, that mutate params don't affect typing, they are handled when converting to IR } diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp index b21bd0ee..72419d8c 100644 --- a/tolk/type-system.cpp +++ b/tolk/type-system.cpp @@ -651,11 +651,6 @@ static TypePtr parse_simple_type(Lexer& lex) { std::vector items = parse_nested_type_list(lex, tok_opbracket, "`[`", tok_clbracket, "`]` or `,`"); return TypeDataTypedTuple::create(std::move(items)); } - case tok_fun: { - lex.next(); - std::vector params_types = parse_nested_type_list_in_parenthesis(lex); - lex.expect(tok_arrow, "`->`"); - } default: lex.unexpected(""); } @@ -695,6 +690,12 @@ TypePtr parse_type_from_tokens(Lexer& lex) { return parse_type_expression(lex); } +// for internal usage only +TypePtr parse_type_from_string(std::string_view text) { + Lexer lex(text); + return parse_type_expression(lex); +} + std::ostream& operator<<(std::ostream& os, TypePtr type_data) { return os << (type_data ? type_data->as_human_readable() : "(nullptr-type)"); } diff --git a/tolk/type-system.h b/tolk/type-system.h index 13c0e4b0..0db42709 100644 --- a/tolk/type-system.h +++ b/tolk/type-system.h @@ -417,6 +417,7 @@ public: class Lexer; TypePtr parse_type_from_tokens(Lexer& lex); +TypePtr parse_type_from_string(std::string_view text); void type_system_init(); From 565bc597356481d5de047625fa94ae1b66658539 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Wed, 18 Dec 2024 19:26:26 +0300 Subject: [PATCH 37/61] [Tolk] Refactor: get rid of split_vars, construct valid LET ops In FunC (and in Tolk before), tensor vars (actually occupying several stack slots) were represented as a single var in terms or IR vars (Ops): > var a = (1, 2); > LET (_i) = (_1, _2) Now, every tensor of N stack slots is represented as N IR vars. > LET (_i, _j) = (_1, _2) This will give an ability to control access to parts of a tensor when implementing `tensorVar.0` syntax. --- tolk-tester/tests/a10.tolk | 4 +- tolk-tester/tests/cells-slices.tolk | 8 +-- tolk-tester/tests/logical-operators.tolk | 6 +- tolk-tester/tests/mutate-methods.tolk | 6 +- tolk-tester/tests/null-keyword.tolk | 4 +- tolk/abscode.cpp | 87 +++++------------------- tolk/analyzer.cpp | 34 --------- tolk/generics-helpers.cpp | 6 +- tolk/pipe-ast-to-legacy.cpp | 84 ++++++++++++++--------- tolk/pipe-check-rvalue-lvalue.cpp | 2 +- tolk/pipe-generate-fif-output.cpp | 5 -- tolk/pipe-refine-lvalue-for-mutate.cpp | 4 +- tolk/symtable.cpp | 4 +- tolk/symtable.h | 11 +-- tolk/tolk.h | 19 ++---- tolk/type-system.cpp | 25 ------- tolk/type-system.h | 8 --- 17 files changed, 100 insertions(+), 217 deletions(-) diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index 7301f1d5..af104248 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -104,9 +104,9 @@ fun testStartBalanceCodegen2() { testDumpDontPolluteStack PROC:<{ ... DUMPSTK - x{6d79} PUSHSLICE // f s _9 + x{6d79} PUSHSLICE // f s _5 STRDUMP DROP - SBITS // f _11 + SBITS // f _6 }> """ diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index 6f316f2e..0a2807e6 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -220,11 +220,11 @@ Note, that since 'compute-asm-ltr' became on be default, chaining methods codege 1 PUSHINT // _0 _1=1 SWAP // _1=1 _0 32 STU // _0 - 2 PUSHINT // _0 _5=2 - SWAP // _5=2 _0 + 2 PUSHINT // _0 _4=2 + SWAP // _4=2 _0 32 STU // _0 - 3 PUSHINT // _0 _9=3 - SWAP // _9=3 _0 + 3 PUSHINT // _0 _7=3 + SWAP // _7=3 _0 32 STU // _0 }> """ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index fb437bb3..9ef4858d 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -332,15 +332,15 @@ These are moments of future optimizations. For now, it's more than enough. DUP // x x IFNOTJMP:<{ // x DROP // - 1 PUSHINT // _7=1 + 1 PUSHINT // _5=1 }> // x DUP // x x IFNOTJMP:<{ // x DROP // - 1 PUSHINT // _8=1 + 1 PUSHINT // _6=1 }> // x 100 THROWIFNOT - -4 PUSHINT // _12=-4 + -4 PUSHINT // _9=-4 }> """ diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index 816e4c8d..0a5ca191 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -307,7 +307,7 @@ fun main(){} ... incrementTwoInPlace CALLDICT // x y sum1 -ROT - 10 PUSHINT // sum1 x y _8=10 + 10 PUSHINT // sum1 x y _10=10 incrementTwoInPlace CALLDICT // sum1 x y sum2 s1 s3 s0 XCHG3 // x y sum1 sum2 }> @@ -317,8 +317,8 @@ fun main(){} """ load_next PROC:<{ // cs - 32 LDI // _3 cs - SWAP // cs _3 + 32 LDI // _4 cs + SWAP // cs _4 }> """ diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 9ace9995..40e391c8 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -113,7 +113,7 @@ fun main() { CONS // numbers UNCONS // h numbers DUP // h numbers numbers - CAR // h numbers _12 + CAR // h numbers _13 """ @fif_codegen @@ -133,7 +133,7 @@ fun main() { """ test7 PROC:<{ ... - LDOPTREF // b _18 _17 + LDOPTREF // b _8 _7 DROP // b c ISNULL // b _11 10 MULCONST // b _13 diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 7bcb0f84..25ddd10e 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -31,16 +31,6 @@ void TmpVar::dump(std::ostream& os) const { os << " : " << v_type << " (width "; os << v_type->calc_width_on_stack(); os << ")"; - if (coord > 0) { - os << " = _" << (coord >> 8) << '.' << (coord & 255); - } else if (coord < 0) { - int n = (~coord >> 8), k = (~coord & 0xff); - if (k) { - os << " = (_" << n << ".._" << (n + k - 1) << ")"; - } else { - os << " = ()"; - } - } os << std::endl; } @@ -51,7 +41,7 @@ void TmpVar::show(std::ostream& os, int omit_idx) const { return; } } - os << '_' << idx; + os << '_' << ir_idx; } std::ostream& operator<<(std::ostream& os, const TmpVar& var) { @@ -182,47 +172,6 @@ void VarDescrList::show(std::ostream& os) const { os << " ]\n"; } -void Op::split_vars(const std::vector& vars) { - split_var_list(left, vars); - split_var_list(right, vars); - for (auto& op : block0) { - op.split_vars(vars); - } - for (auto& op : block1) { - op.split_vars(vars); - } -} - -void Op::split_var_list(std::vector& var_list, const std::vector& vars) { - int new_size = 0, changes = 0; - for (var_idx_t v : var_list) { - int c = vars.at(v).coord; - if (c < 0) { - ++changes; - new_size += (~c & 0xff); - } else { - ++new_size; - } - } - if (!changes) { - return; - } - std::vector new_var_list; - new_var_list.reserve(new_size); - for (var_idx_t v : var_list) { - int c = vars.at(v).coord; - if (c < 0) { - int n = (~c >> 8), k = (~c & 0xff); - while (k-- > 0) { - new_var_list.push_back(n++); - } - } else { - new_var_list.push_back(v); - } - } - var_list = std::move(new_var_list); -} - void Op::show(std::ostream& os, const std::vector& vars, std::string pfx, int mode) const { if (mode & 2) { os << pfx << " ["; @@ -444,26 +393,22 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation location) { - vars.emplace_back(var_cnt, var_type, v_sym, location); - return var_cnt++; -} - -bool CodeBlob::import_params(FormalArgList&& arg_list) { - if (var_cnt || in_var_cnt) { - return false; +std::vector CodeBlob::create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc) { + std::vector ir_idx; + ir_idx.reserve(var_type->calc_width_on_stack()); + if (const TypeDataTensor* t_tensor = var_type->try_as()) { + for (TypePtr item : t_tensor->items) { + std::vector nested = create_var(item, v_sym, loc); + ir_idx.insert(ir_idx.end(), nested.begin(), nested.end()); + } + } else if (var_type != TypeDataVoid::create()) { + tolk_assert(var_type->calc_width_on_stack() == 1); + vars.emplace_back(var_cnt, var_type, v_sym, loc); + ir_idx.emplace_back(var_cnt); + var_cnt++; } - std::vector list; - for (const auto& par : arg_list) { - TypePtr arg_type; - const LocalVarData* arg_sym; - SrcLocation arg_loc; - std::tie(arg_type, arg_sym, arg_loc) = par; - list.push_back(create_var(arg_type, arg_sym, arg_loc)); - } - emplace_back(loc, Op::_Import, list); - in_var_cnt = var_cnt; - return true; + tolk_assert(static_cast(ir_idx.size()) == var_type->calc_width_on_stack()); + return ir_idx; } } // namespace tolk diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 8539afdd..9303bc83 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -26,40 +26,6 @@ namespace tolk { * */ -int CodeBlob::split_vars(bool strict) { - int n = var_cnt, changes = 0; - for (int j = 0; j < var_cnt; j++) { - TmpVar& var = vars[j]; - int width_j = var.v_type->calc_width_on_stack(); - if (strict && width_j < 0) { - throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"}; - } - if (width_j == 1) { - continue; - } - std::vector comp_types; - var.v_type->extract_components(comp_types); - tolk_assert(width_j <= 254 && n <= 0x7fff00); - tolk_assert((unsigned)width_j == comp_types.size()); - var.coord = ~((n << 8) + width_j); - for (int i = 0; i < width_j; i++) { - auto v = create_var(comp_types[i], vars[j].v_sym, vars[j].where); - tolk_assert(v == n + i); - tolk_assert(vars[v].idx == v); - vars[v].coord = ((int)j << 8) + i + 1; - } - n += width_j; - ++changes; - } - if (!changes) { - return 0; - } - for (auto& op : ops) { - op.split_vars(vars); - } - return changes; -} - bool CodeBlob::compute_used_code_vars() { VarDescrList empty_var_info; return compute_used_code_vars(ops, empty_var_info, true); diff --git a/tolk/generics-helpers.cpp b/tolk/generics-helpers.cpp index 3d353cc4..7a2dd83f 100644 --- a/tolk/generics-helpers.cpp +++ b/tolk/generics-helpers.cpp @@ -202,8 +202,8 @@ td::Result> deduce_substitutionTs_on_generic_func_call(cons try { GenericSubstitutionsDeduceForFunctionCall deducing(called_fun); for (const LocalVarData& param : called_fun->parameters) { - if (param.declared_type->has_genericT_inside() && param.idx < static_cast(arg_types.size())) { - deducing.consider_next_condition(param.declared_type, arg_types[param.idx]); + if (param.declared_type->has_genericT_inside() && param.param_idx < static_cast(arg_types.size())) { + deducing.consider_next_condition(param.declared_type, arg_types[param.param_idx]); } } int idx = deducing.get_first_not_deduced_idx(); @@ -233,7 +233,7 @@ const FunctionData* instantiate_generic_function(SrcLocation loc, const Function std::vector parameters; parameters.reserve(fun_ref->get_num_params()); for (const LocalVarData& orig_p : fun_ref->parameters) { - parameters.emplace_back(orig_p.name, orig_p.loc, replace_genericT_with_deduced(orig_p.declared_type, fun_ref->genericTs, substitutionTs), orig_p.flags, orig_p.idx); + parameters.emplace_back(orig_p.name, orig_p.loc, replace_genericT_with_deduced(orig_p.declared_type, fun_ref->genericTs, substitutionTs), orig_p.flags, orig_p.param_idx); } TypePtr declared_return_type = replace_genericT_with_deduced(fun_ref->declared_return_type, fun_ref->genericTs, substitutionTs); const GenericsInstantiation* instantiationTs = new GenericsInstantiation(loc, std::move(substitutionTs)); diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index d60bb8b3..c64fe751 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -34,15 +34,15 @@ namespace tolk { struct LValGlobs { - std::vector> globs; + std::vector>> globs; - void add_modified_glob(const GlobalVarData* g_sym, var_idx_t local_ir_idx) { - globs.emplace_back(g_sym, local_ir_idx); + void add_modified_glob(const GlobalVarData* g_sym, std::vector local_ir_idx) { + globs.emplace_back(g_sym, std::move(local_ir_idx)); } void gen_ops_set_globs(CodeBlob& code, SrcLocation loc) const { for (const auto& [g_sym, ir_idx] : globs) { - Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, std::vector{ ir_idx }, g_sym); + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, ir_idx, g_sym); op.set_impure_flag(); } } @@ -93,7 +93,9 @@ static std::vector> pre_compile_tensor_inner(CodeBlob& co void on_var_modified(var_idx_t ir_idx, SrcLocation loc, CodeBlob& code) { tolk_assert(is_watched(ir_idx)); - var_idx_t tmp_idx = code.create_tmp_var(code.vars[ir_idx].v_type, loc); + std::vector tmp_idx_arr = code.create_tmp_var(code.vars[ir_idx].v_type, loc); + tolk_assert(tmp_idx_arr.size() == 1); + var_idx_t tmp_idx = tmp_idx_arr[0]; code.emplace_back(loc, Op::_Let, std::vector{tmp_idx}, std::vector{ir_idx}); for (std::vector& prev_vars : res_lists) { std::replace(prev_vars.begin(), prev_vars.end(), ir_idx, tmp_idx); @@ -143,7 +145,7 @@ static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE std::vector right = pre_compile_expr(rhs, code); const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as(); std::vector types_list = inferred_tuple->items; - std::vector rvect = {code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc)}; + std::vector rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc); code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); LValGlobs globs; std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); @@ -164,7 +166,7 @@ static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation here, std::vector&& args_vars, const FunctionData* fun_ref) { - std::vector rvect = {code.create_tmp_var(ret_type, here)}; + std::vector rvect = code.create_tmp_var(ret_type, here); Op& op = code.emplace_back(here, Op::_Call, rvect, std::move(args_vars), fun_ref); if (!fun_ref->is_marked_as_pure()) { op.set_impure_flag(); @@ -175,9 +177,9 @@ static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcL static std::vector process_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValGlobs* lval_globs) { if (const auto* glob_ref = sym->try_as()) { - std::vector rvect = {code.create_tmp_var(glob_ref->declared_type, loc)}; + std::vector rvect = code.create_tmp_var(glob_ref->declared_type, loc); if (lval_globs) { - lval_globs->add_modified_glob(glob_ref, rvect[0]); + lval_globs->add_modified_glob(glob_ref, rvect); return rvect; } else { code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, glob_ref); @@ -186,22 +188,25 @@ static std::vector process_symbol(SrcLocation loc, const Symbol* sym, } if (const auto* const_ref = sym->try_as()) { if (const_ref->is_int_const()) { - std::vector rvect = {code.create_tmp_var(TypeDataInt::create(), loc)}; + std::vector rvect = code.create_tmp_var(TypeDataInt::create(), loc); code.emplace_back(loc, Op::_IntConst, rvect, const_ref->as_int_const()); return rvect; } else { - std::vector rvect = {code.create_tmp_var(TypeDataSlice::create(), loc)}; + std::vector rvect = code.create_tmp_var(TypeDataSlice::create(), loc); code.emplace_back(loc, Op::_SliceConst, rvect, const_ref->as_slice_const()); return rvect; } } if (const auto* fun_ref = sym->try_as()) { - std::vector rvect = {code.create_tmp_var(fun_ref->inferred_full_type, loc)}; + std::vector rvect = code.create_tmp_var(fun_ref->inferred_full_type, loc); code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); return rvect; } if (const auto* var_ref = sym->try_as()) { - return {var_ref->idx}; +#ifdef TOLK_DEBUG + tolk_assert(static_cast(var_ref->ir_idx.size()) == var_ref->declared_type->calc_width_on_stack()); +#endif + return var_ref->ir_idx; } throw Fatal("process_symbol"); } @@ -244,7 +249,7 @@ static std::vector process_binary_operator(V v, v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); std::vector cond = pre_compile_expr(v->get_lhs(), code); tolk_assert(cond.size() == 1); - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code)); @@ -266,7 +271,7 @@ static std::vector process_unary_operator(V v, Co static std::vector process_ternary_operator(V v, CodeBlob& code) { std::vector cond = pre_compile_expr(v->get_cond(), code); tolk_assert(cond.size() == 1); - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code)); @@ -299,7 +304,7 @@ static std::vector process_function_call(V v, Code std::vector tfunc = pre_compile_expr(v->get_callee(), code); tolk_assert(tfunc.size() == 1); args_vars.push_back(tfunc[0]); - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars)); op.set_impure_flag(); return rvect; @@ -361,8 +366,8 @@ static std::vector process_function_call(V v, Code } } } - std::vector rvect = {code.create_tmp_var(real_ret_type, v->loc)}; - left.push_back(rvect[0]); + std::vector rvect = code.create_tmp_var(real_ret_type, v->loc); + left.insert(left.end(), rvect.begin(), rvect.end()); code.on_var_modification(left, v->loc); code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); local_globs.gen_ops_set_globs(code, v->loc); @@ -388,21 +393,21 @@ static std::vector process_typed_tuple(V v, CodeBlob if (lval_globs) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work v->error("[...] can not be used as lvalue here"); } - std::vector left = std::vector{code.create_tmp_var(v->inferred_type, v->loc)}; + std::vector left = code.create_tmp_var(v->inferred_type, v->loc); std::vector right = pre_compile_tensor(code, v->get_items()); code.emplace_back(v->loc, Op::_Tuple, left, std::move(right)); return left; } static std::vector process_int_const(V v, CodeBlob& code) { - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); code.emplace_back(v->loc, Op::_IntConst, rvect, v->intval); return rvect; } static std::vector process_string_const(V v, CodeBlob& code) { ConstantValue value = eval_const_init_value(v); - std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); if (value.is_int()) { code.emplace_back(v->loc, Op::_IntConst, rvect, value.as_int()); } else { @@ -426,9 +431,9 @@ static std::vector process_local_var(V v, CodeBlob return process_symbol(v->loc, v->var_ref, code, nullptr); } - tolk_assert(v->var_ref->idx == -1); - v->var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, v->var_ref, v->loc)); - return {v->var_ref->idx}; + tolk_assert(v->var_ref->ir_idx.empty()); + v->var_ref->mutate()->assign_ir_idx(code.create_var(v->inferred_type, v->var_ref, v->loc)); + return v->var_ref->ir_idx; } static std::vector process_local_vars_declaration(V, CodeBlob&) { @@ -439,7 +444,7 @@ static std::vector process_local_vars_declaration(V process_underscore(V v, CodeBlob& code) { // when _ is used as left side of assignment, like `(cs, _) = cs.loadAndReturn()` - return {code.create_tmp_var(v->inferred_type, v->loc)}; + return code.create_tmp_var(v->inferred_type, v->loc); } std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs) { @@ -516,8 +521,8 @@ static void process_assert_statement(V v, CodeBlob& code) static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { if (auto v_ref = v_catch_var->try_as(); v_ref && v_ref->sym) { // not underscore const LocalVarData* var_ref = v_ref->sym->as(); - tolk_assert(var_ref->idx == -1); - var_ref->mutate()->assign_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc)); + tolk_assert(var_ref->ir_idx.empty()); + var_ref->mutate()->assign_ir_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc)); } } @@ -627,14 +632,13 @@ static void process_throw_statement(V v, CodeBlob& code) { static void process_return_statement(V v, CodeBlob& code) { std::vector return_vars = v->has_return_value() ? pre_compile_expr(v->get_return_value(), code) : std::vector{}; if (code.fun_ref->does_return_self()) { - tolk_assert(return_vars.size() == 1); return_vars = {}; } if (code.fun_ref->has_mutate_params()) { std::vector mutated_vars; for (const LocalVarData& p_sym: code.fun_ref->parameters) { if (p_sym.is_mutate_parameter()) { - mutated_vars.push_back(p_sym.idx); + mutated_vars.insert(mutated_vars.end(), p_sym.ir_idx.begin(), p_sym.ir_idx.end()); } } return_vars.insert(return_vars.begin(), mutated_vars.begin(), mutated_vars.end()); @@ -647,7 +651,7 @@ static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code if (code.fun_ref->has_mutate_params()) { for (const LocalVarData& p_sym: code.fun_ref->parameters) { if (p_sym.is_mutate_parameter()) { - mutated_vars.push_back(p_sym.idx); + mutated_vars.insert(mutated_vars.end(), p_sym.ir_idx.begin(), p_sym.ir_idx.end()); } } } @@ -685,11 +689,23 @@ void process_any_statement(AnyV v, CodeBlob& code) { static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, FunctionBodyCode* code_body) { auto v_body = fun_ref->ast_root->as()->get_body()->as(); CodeBlob* blob = new CodeBlob{fun_ref->name, fun_ref->loc, fun_ref}; - FormalArgList legacy_arg_list; - for (const LocalVarData& param : fun_ref->parameters) { - legacy_arg_list.emplace_back(param.declared_type, ¶m, param.loc); + + std::vector rvect_import; + int total_arg_width = 0; + for (int i = 0; i < fun_ref->get_num_params(); ++i) { + total_arg_width += fun_ref->parameters[i].declared_type->calc_width_on_stack(); } - blob->import_params(std::move(legacy_arg_list)); + rvect_import.reserve(total_arg_width); + + for (int i = 0; i < fun_ref->get_num_params(); ++i) { + const LocalVarData& param_i = fun_ref->parameters[i]; + std::vector ir_idx = blob->create_var(param_i.declared_type, ¶m_i, param_i.loc); + rvect_import.insert(rvect_import.end(), ir_idx.begin(), ir_idx.end()); + param_i.mutate()->assign_ir_idx(std::move(ir_idx)); + } + blob->emplace_back(fun_ref->loc, Op::_Import, rvect_import); + blob->in_var_cnt = blob->var_cnt; + tolk_assert(blob->var_cnt == total_arg_width); for (AnyV item : v_body->get_items()) { process_any_statement(item, *blob); diff --git a/tolk/pipe-check-rvalue-lvalue.cpp b/tolk/pipe-check-rvalue-lvalue.cpp index 038b0999..943dfb96 100644 --- a/tolk/pipe-check-rvalue-lvalue.cpp +++ b/tolk/pipe-check-rvalue-lvalue.cpp @@ -38,7 +38,7 @@ static void fire_error_cannot_be_used_as_lvalue(AnyV v, const std::string& detai GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD static void fire_error_modifying_immutable_variable(AnyExprV v, const LocalVarData* var_ref) { - if (var_ref->idx == 0 && var_ref->name == "self") { + if (var_ref->param_idx == 0 && var_ref->name == "self") { v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); } else { v->error("modifying immutable variable `" + var_ref->name + "`"); diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 9092e564..7ef6ba7b 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -54,11 +54,6 @@ static void generate_output_func(const FunctionData* fun_ref) { std::cerr << "after prune_unreachable: \n"; code->print(std::cerr, 0); } - code->split_vars(true); - if (G.is_verbosity(5)) { - std::cerr << "after split_vars: \n"; - code->print(std::cerr, 0); - } for (int i = 0; i < 8; i++) { code->compute_used_code_vars(); if (G.is_verbosity(4)) { diff --git a/tolk/pipe-refine-lvalue-for-mutate.cpp b/tolk/pipe-refine-lvalue-for-mutate.cpp index 45dd3a94..540d7413 100644 --- a/tolk/pipe-refine-lvalue-for-mutate.cpp +++ b/tolk/pipe-refine-lvalue-for-mutate.cpp @@ -38,11 +38,11 @@ static void fire_error_invalid_mutate_arg_passed(AnyExprV v, const FunctionData* std::string arg_str(arg_expr->type == ast_reference ? arg_expr->as()->get_name() : "obj"); // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` - if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && p_sym.idx == 0 && fun_ref->does_accept_self()) { + if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && p_sym.param_idx == 0 && fun_ref->does_accept_self()) { v->error("`" + fun_ref->name + "` is a mutating method; consider calling `" + arg_str + "." + fun_ref->name + "()`, not `" + fun_ref->name + "(" + arg_str + ")`"); } // case: `cs.mutating_function()`; suggest: `mutating_function(mutate cs)` or make it a method - if (p_sym.is_mutate_parameter() && called_as_method && p_sym.idx == 0 && !fun_ref->does_accept_self()) { + if (p_sym.is_mutate_parameter() && called_as_method && p_sym.param_idx == 0 && !fun_ref->does_accept_self()) { v->error("function `" + fun_ref->name + "` mutates parameter `" + p_sym.name + "`; consider calling `" + fun_ref->name + "(mutate " + arg_str + ")`, not `" + arg_str + "." + fun_ref->name + "`(); alternatively, rename parameter to `self` to make it a method"); } // case: `mutating_function(arg)`; suggest: `mutate arg` diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index 918fdab3..c56dc6ed 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -93,8 +93,8 @@ void GlobalConstData::assign_resolved_type(TypePtr declared_type) { this->declared_type = declared_type; } -void LocalVarData::assign_idx(int idx) { - this->idx = idx; +void LocalVarData::assign_ir_idx(std::vector&& ir_idx) { + this->ir_idx = std::move(ir_idx); } void LocalVarData::assign_resolved_type(TypePtr declared_type) { diff --git a/tolk/symtable.h b/tolk/symtable.h index 3cda24ed..27753ceb 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -59,20 +59,23 @@ struct LocalVarData final : Symbol { TypePtr declared_type; // either at declaration `var x:int`, or if omitted, from assigned value `var x=2` int flags; - int idx; + int param_idx; // 0...N for function parameters, -1 for local vars + std::vector ir_idx; - LocalVarData(std::string name, SrcLocation loc, TypePtr declared_type, int flags, int idx) + LocalVarData(std::string name, SrcLocation loc, TypePtr declared_type, int flags, int param_idx) : Symbol(std::move(name), loc) , declared_type(declared_type) , flags(flags) - , idx(idx) { + , param_idx(param_idx) { } + bool is_parameter() const { return param_idx >= 0; } + bool is_immutable() const { return flags & flagImmutable; } bool is_mutate_parameter() const { return flags & flagMutateParameter; } LocalVarData* mutate() const { return const_cast(this); } - void assign_idx(int idx); + void assign_ir_idx(std::vector&& ir_idx); void assign_resolved_type(TypePtr declared_type); void assign_inferred_type(TypePtr inferred_type); }; diff --git a/tolk/tolk.h b/tolk/tolk.h index 5ec4d3e0..7b44931e 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -45,17 +45,15 @@ typedef int const_idx_t; struct TmpVar { TypePtr v_type; - var_idx_t idx; + var_idx_t ir_idx; const LocalVarData* v_sym; // points to var defined in code; nullptr for implicitly created tmp vars - int coord; SrcLocation where; std::vector> on_modification; - TmpVar(var_idx_t _idx, TypePtr type, const LocalVarData* v_sym, SrcLocation loc) + TmpVar(var_idx_t ir_idx, TypePtr type, const LocalVarData* v_sym, SrcLocation loc) : v_type(type) - , idx(_idx) + , ir_idx(ir_idx) , v_sym(v_sym) - , coord(0) , where(loc) { } @@ -345,8 +343,6 @@ struct Op { void show_var_list(std::ostream& os, const std::vector& list, const std::vector& vars) const; static void show_block(std::ostream& os, const Op* block, const std::vector& vars, std::string pfx = "", int mode = 0); - void split_vars(const std::vector& vars); - static void split_var_list(std::vector& var_list, const std::vector& vars); bool compute_used_vars(const CodeBlob& code, bool edit); bool std_compute_used_vars(bool disabled = false); bool set_var_info(const VarDescrList& new_var_info); @@ -385,9 +381,6 @@ inline ListIterator end(const Op* op_list) { return ListIterator{}; } -typedef std::tuple FormalArg; -typedef std::vector FormalArgList; - struct AsmOpList; struct FunctionBodyCode { @@ -1115,12 +1108,10 @@ struct CodeBlob { #endif return res; } - bool import_params(FormalArgList&& arg_list); - var_idx_t create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc); - var_idx_t create_tmp_var(TypePtr var_type, SrcLocation loc) { + std::vector create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc); + std::vector create_tmp_var(TypePtr var_type, SrcLocation loc) { return create_var(var_type, nullptr, loc); } - int split_vars(bool strict = false); bool compute_used_code_vars(); bool compute_used_code_vars(std::unique_ptr& ops, const VarDescrList& var_info, bool edit) const; void print(std::ostream& os, int flags = 0) const; diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp index 72419d8c..401c72af 100644 --- a/tolk/type-system.cpp +++ b/tolk/type-system.cpp @@ -537,31 +537,6 @@ bool TypeDataVoid::can_be_casted_with_as_operator(TypePtr cast_to) const { } -// -------------------------------------------- -// extract_components() -// -// used in code generation (transforming Ops to other Ops) -// to be removed in the future -// - -void TypeDataGenericT::extract_components(std::vector& comp_types) const { - assert(false); -} - -void TypeDataTensor::extract_components(std::vector& comp_types) const { - for (TypePtr item : items) { - item->extract_components(comp_types); - } -} - -void TypeDataUnresolved::extract_components(std::vector& comp_types) const { - assert(false); -} - -void TypeDataVoid::extract_components(std::vector& comp_types) const { -} - - // -------------------------------------------- // parsing type from tokens // diff --git a/tolk/type-system.h b/tolk/type-system.h index 0db42709..482039e6 100644 --- a/tolk/type-system.h +++ b/tolk/type-system.h @@ -97,10 +97,6 @@ public: virtual int calc_width_on_stack() const { return 1; } - - virtual void extract_components(std::vector& comp_types) const { - comp_types.push_back(this); - } }; /* @@ -291,7 +287,6 @@ public: bool can_rhs_be_assigned(TypePtr rhs) const override; bool can_be_casted_with_as_operator(TypePtr cast_to) const override; int calc_width_on_stack() const override; - void extract_components(std::vector& comp_types) const override; }; /* @@ -318,7 +313,6 @@ public: void traverse(const TraverserCallbackT& callback) const override; TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; int calc_width_on_stack() const override; - void extract_components(std::vector& comp_types) const override; }; /* @@ -387,7 +381,6 @@ public: bool can_rhs_be_assigned(TypePtr rhs) const override; bool can_be_casted_with_as_operator(TypePtr cast_to) const override; int calc_width_on_stack() const override; - void extract_components(std::vector& comp_types) const override; }; /* @@ -408,7 +401,6 @@ public: bool can_rhs_be_assigned(TypePtr rhs) const override; bool can_be_casted_with_as_operator(TypePtr cast_to) const override; int calc_width_on_stack() const override; - void extract_components(std::vector& comp_types) const override; }; From 7a1602f591cc7db4ea9ce12a84dc3c4bf90dd32e Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 27 Jan 2025 10:29:17 +0300 Subject: [PATCH 38/61] [Tolk] Support syntax `tensorVar.0` and `tupleVar.0` It works both for reading and writing: > var t = (1, 2); > t.0; // 1 > t.0 = 5; > t; // (5, 2) It also works for typed/untyped tuples, producing INDEX and SETINDEX. Global tensors and tuples works. Nesting `t.0.1.2` works. `mutate` works. Even mixing tuples inside tensors inside a global for writing works. --- crypto/smartcont/tolk-stdlib/common.tolk | 17 +- tolk-tester/tests/a10.tolk | 32 +- tolk-tester/tests/a6_1.tolk | 4 +- .../tests/allow_post_modification.tolk | 13 +- tolk-tester/tests/bit-operators.tolk | 54 +-- tolk-tester/tests/cells-slices.tolk | 20 +- tolk-tester/tests/codegen_check_demo.tolk | 6 +- tolk-tester/tests/generics-1.tolk | 19 +- tolk-tester/tests/if_stmt.tolk | 6 +- tolk-tester/tests/indexed-access.tolk | 287 ++++++++++++ tolk-tester/tests/invalid-assign-1.tolk | 9 + tolk-tester/tests/invalid-assign-2.tolk | 11 + tolk-tester/tests/invalid-assign-3.tolk | 10 + tolk-tester/tests/invalid-assign-4.tolk | 10 + tolk-tester/tests/invalid-assign-5.tolk | 9 + tolk-tester/tests/invalid-assign-6.tolk | 9 + tolk-tester/tests/invalid-assign-7.tolk | 8 + tolk-tester/tests/invalid-call-1.tolk | 2 +- tolk-tester/tests/invalid-call-10.tolk | 11 + tolk-tester/tests/invalid-call-11.tolk | 11 + tolk-tester/tests/invalid-call-7.tolk | 2 +- tolk-tester/tests/invalid-call-8.tolk | 4 +- tolk-tester/tests/invalid-generics-12.tolk | 15 + tolk-tester/tests/invalid-typing-13.tolk | 9 + tolk-tester/tests/known-bugs.tolk | 27 -- tolk-tester/tests/logical-operators.tolk | 40 +- tolk-tester/tests/mutate-methods.tolk | 12 +- tolk-tester/tests/no-spaces.tolk | 8 +- tolk-tester/tests/null-keyword.tolk | 38 +- tolk-tester/tests/op-priority.tolk | 24 +- tolk-tester/tests/use-before-declare.tolk | 2 +- tolk-tester/tests/var-apply.tolk | 10 + tolk/abscode.cpp | 63 +-- tolk/asmops.cpp | 19 +- tolk/ast-from-tokens.cpp | 5 +- tolk/ast.h | 10 +- tolk/builtins.cpp | 14 + tolk/codegen.cpp | 2 +- tolk/pipe-ast-to-legacy.cpp | 437 ++++++++++++++---- tolk/pipe-check-rvalue-lvalue.cpp | 4 +- tolk/pipe-infer-types-and-calls.cpp | 110 ++++- tolk/tolk.h | 54 +-- 42 files changed, 1119 insertions(+), 338 deletions(-) create mode 100644 tolk-tester/tests/indexed-access.tolk create mode 100644 tolk-tester/tests/invalid-assign-1.tolk create mode 100644 tolk-tester/tests/invalid-assign-2.tolk create mode 100644 tolk-tester/tests/invalid-assign-3.tolk create mode 100644 tolk-tester/tests/invalid-assign-4.tolk create mode 100644 tolk-tester/tests/invalid-assign-5.tolk create mode 100644 tolk-tester/tests/invalid-assign-6.tolk create mode 100644 tolk-tester/tests/invalid-assign-7.tolk create mode 100644 tolk-tester/tests/invalid-call-10.tolk create mode 100644 tolk-tester/tests/invalid-call-11.tolk create mode 100644 tolk-tester/tests/invalid-generics-12.tolk create mode 100644 tolk-tester/tests/invalid-typing-13.tolk delete mode 100644 tolk-tester/tests/known-bugs.tolk diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index 46068a20..4c0c4007 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -21,23 +21,32 @@ fun tuplePush(mutate self: tuple, value: T): void asm "TPUSH"; /// Returns the first element of a non-empty tuple. +/// `t.0` is actually the same as `t.tupleFirst()` @pure -fun tupleFirst(t: tuple): T +fun tupleFirst(self: tuple): T asm "FIRST"; /// Returns the [`index`]-th element of a tuple. +/// `t.i` is actually the same as `t.tupleAt(i)` @pure -fun tupleAt(t: tuple, index: int): T +fun tupleAt(self: tuple, index: int): T + builtin; + +/// Sets the [`index`]-th element of a tuple to a specified value +/// (element with this index must already exist, a new element isn't created). +/// `t.i = value` is actually the same as `t.tupleSetAt(value, i)` +@pure +fun tupleSetAt(mutate self: tuple, value: T, index: int): void builtin; /// Returns the size of a tuple (elements count in it). @pure -fun tupleSize(t: tuple): int +fun tupleSize(self: tuple): int asm "TLEN"; /// Returns the last element of a non-empty tuple. @pure -fun tupleLast(t: tuple): T +fun tupleLast(self: tuple): T asm "LAST"; diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index af104248..755a3bfb 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -78,6 +78,17 @@ fun testStartBalanceCodegen2() { return first; } +global cur: [int, int, int]; +global next: [int, int, int]; + +@method_id(95) +fun test95() { + cur = [1, 2, 3]; + next = [2, 3, 4]; + (cur, next) = (next, [3, 4, 5]); + return (cur, next); +} + /** method_id | in | out @testcase | 0 | 101 15 | 100 1 @@ -90,6 +101,7 @@ fun testStartBalanceCodegen2() { @testcase | 89 | 4 | 1 4 1 4 @testcase | 91 | | 10 @testcase | 92 | | 10 32 +@testcase | 95 | | [ 2 3 4 ] [ 3 4 5 ] @fif_codegen """ @@ -104,9 +116,9 @@ fun testStartBalanceCodegen2() { testDumpDontPolluteStack PROC:<{ ... DUMPSTK - x{6d79} PUSHSLICE // f s _5 + x{6d79} PUSHSLICE // f s '5 STRDUMP DROP - SBITS // f _6 + SBITS // f '6 }> """ @@ -127,4 +139,20 @@ fun testStartBalanceCodegen2() { FIRST // first }> """ + +@fif_codegen +""" + test95 PROC:<{ + ... + next GETGLOB // '10 + 3 PUSHINT // '10 '12=3 + 4 PUSHINT // '10 '12=3 '13=4 + 5 PUSHINT // '10 '12=3 '13=4 '14=5 + TRIPLE // '15 '16 + next SETGLOB + cur SETGLOB + cur GETGLOB // '17 + next GETGLOB // '17 '18 + }> +""" */ diff --git a/tolk-tester/tests/a6_1.tolk b/tolk-tester/tests/a6_1.tolk index 4995c42d..8079972b 100644 --- a/tolk-tester/tests/a6_1.tolk +++ b/tolk-tester/tests/a6_1.tolk @@ -7,7 +7,7 @@ fun main(a: int, b: int, c: int, d: int, e: int, f: int): (int, int) { @method_id(101) fun testDivMod(x: int, y: int) { - return [divMod(x, y), modDiv(x, y), mulDivMod(x, y, 10)]; + return (divMod(x, y), modDiv(x, y), mulDivMod(x, y, 10)); } /** @@ -18,5 +18,5 @@ fun testDivMod(x: int, y: int) { @testcase | 0 | 448 -433 -444 792 150012 -356232 | -218 -572 @testcase | 0 | -40 -821 433 -734 -721629 -741724 | -206 889 @testcase | 0 | -261 -98 -494 868 -166153 733738 | 263 995 -@testcase | 101 | 112 3 | [ 37 1 1 37 33 6 ] +@testcase | 101 | 112 3 | 37 1 1 37 33 6 */ diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index e374f62b..df758a1e 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -89,12 +89,14 @@ fun test_if_else(x: int): (int, int, int, int, int) { @method_id(21) fun test_assign_with_inner(x: int) { - return (x, x += 10, [(x, x += 20, eq(x -= 50), x)], eq2((x, x *= eq(x /= 2)))); + var result = (x, x += 10, [x, x += 20, eq(x -= 50), x], eq2((x, x *= eq(x /= 2)))); + return result; } @method_id(22) fun test_assign_with_mutate(x: int) { - return (x, mul2(mutate x, x += 5), x.`~inc`(mul2(mutate x, x)), x); + var (result, _) = ((x, mul2(mutate x, x += 5), x.`~inc`(mul2(mutate x, x)), x), 0); + return result; } @method_id(23) @@ -138,5 +140,12 @@ fun main() { inc CALLDICT // self newY }> """ + +@fif_codegen +""" + test_assign_tensor_global PROC:<{ + // x.0 x.1 +""" + @code_hash 7627024945492125068389905298530400936797031708759561372406088054030801992712 */ diff --git a/tolk-tester/tests/bit-operators.tolk b/tolk-tester/tests/bit-operators.tolk index 4cb8e1ba..b0106883 100644 --- a/tolk-tester/tests/bit-operators.tolk +++ b/tolk-tester/tests/bit-operators.tolk @@ -127,10 +127,10 @@ fun testBoolCompareOptimized(x: bool) { """ boolWithBitwiseConst PROC:<{ // - 0 PUSHINT // _3 - -1 PUSHINT // _3 _5 - 0 PUSHINT // _3 _5 _7 - -1 PUSHINT // _3 _5 _7 _8 + 0 PUSHINT // '3 + -1 PUSHINT // '3 '5 + 0 PUSHINT // '3 '5 '7 + -1 PUSHINT // '3 '5 '7 '8 }> """ @@ -142,22 +142,22 @@ fun testBoolCompareOptimized(x: bool) { UNTIL:<{ INC // i n cnt s2 PUSH // i n cnt i - NOT // i n cnt _6 + NOT // i n cnt '6 }> // i n cnt UNTIL:<{ INC // i n cnt s2 PUSH // i n cnt i - NOT // i n cnt _9 + NOT // i n cnt '9 }> // i n cnt UNTIL:<{ INC // i n cnt OVER // i n cnt n - 0 EQINT // i n cnt _12 + 0 EQINT // i n cnt '12 }> // i n cnt s0 s2 XCHG // cnt n i - NOT // cnt n _13 - SWAP // cnt _13 n - 0 EQINT // cnt _13 _14 + NOT // cnt n '13 + SWAP // cnt '13 n + 0 EQINT // cnt '13 '14 }> """ @@ -165,12 +165,12 @@ fun testBoolCompareOptimized(x: bool) { """ testConstNegateCodegen PROC:<{ // - TRUE // _0 - FALSE // _0 _1 - FALSE // _0 _1 _2 - TRUE // _0 _1 _2 _3 - TRUE // _0 _1 _2 _3 _4 - FALSE // _0 _1 _2 _3 _4 _5 + TRUE // '0 + FALSE // '0 '1 + FALSE // '0 '1 '2 + TRUE // '0 '1 '2 '3 + TRUE // '0 '1 '2 '3 '4 + FALSE // '0 '1 '2 '3 '4 '5 }> """ @@ -179,11 +179,11 @@ fun testBoolCompareOptimized(x: bool) { testBoolNegateOptimized PROC:<{ // x DUP // x x - NOT // x _1 - OVER // x _1 x - NOT // x _1 _2 + NOT // x '1 + OVER // x '1 x + NOT // x '1 '2 s2 s(-1) PUXC - TRUE // x _1 x _2 _3 + TRUE // x '1 x '2 '3 }> """ @@ -192,13 +192,13 @@ fun testBoolCompareOptimized(x: bool) { testBoolCompareOptimized PROC:<{ // x DUP // x x - NOT // x _1 - OVER // x _1 x - eqX CALLDICT // x _1 _2 - NOT // x _1 _3 - s2 PUSH // x _1 _3 x - eqX CALLDICT // x _1 _3 _4 - s3 PUSH // x _1 _3 _4 x + NOT // x '1 + OVER // x '1 x + eqX CALLDICT // x '1 '2 + NOT // x '1 '3 + s2 PUSH // x '1 '3 x + eqX CALLDICT // x '1 '3 '4 + s3 PUSH // x '1 '3 '4 x }> """ */ diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index 0a2807e6..19e2e215 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -216,16 +216,16 @@ Note, that since 'compute-asm-ltr' became on be default, chaining methods codege """ test6 PROC:<{ // - NEWC // _0 - 1 PUSHINT // _0 _1=1 - SWAP // _1=1 _0 - 32 STU // _0 - 2 PUSHINT // _0 _4=2 - SWAP // _4=2 _0 - 32 STU // _0 - 3 PUSHINT // _0 _7=3 - SWAP // _7=3 _0 - 32 STU // _0 + NEWC // '0 + 1 PUSHINT // '0 '1=1 + SWAP // '1=1 '0 + 32 STU // '0 + 2 PUSHINT // '0 '4=2 + SWAP // '4=2 '0 + 32 STU // '0 + 3 PUSHINT // '0 '7=3 + SWAP // '7=3 '0 + 32 STU // '0 }> """ */ diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk index e40f0377..b355a9b7 100644 --- a/tolk-tester/tests/codegen_check_demo.tolk +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -35,7 +35,7 @@ Below, I just give examples of @fif_codegen tag: """ main PROC:<{ // s - 17 PUSHINT // s _1=17 + 17 PUSHINT // s '1=17 OVER // s z=17 t WHILE:<{ ... @@ -63,7 +63,7 @@ main PROC:<{ @fif_codegen """ OVER - 0 GTINT // s z t _5 + 0 GTINT // s z t '5 """ @fif_codegen @@ -83,7 +83,7 @@ FALSE }> """ -@fif_codegen NOT // _8 +@fif_codegen NOT // '8 @fif_codegen main PROC:<{ @fif_codegen_avoid PROCINLINE diff --git a/tolk-tester/tests/generics-1.tolk b/tolk-tester/tests/generics-1.tolk index 5a649a44..453ec282 100644 --- a/tolk-tester/tests/generics-1.tolk +++ b/tolk-tester/tests/generics-1.tolk @@ -14,13 +14,19 @@ fun getTwo(): X { return 2 as X; } fun takeInt(a: int) { return a; } @method_id(102) -fun test102(): (int, int, int, [(int, int)]) { +fun test102(): (int, int, int, [int, int]) { var a: int = getTwo(); var _: int = getTwo(); var b = getTwo() as int; var c: int = 1 ? getTwo() : getTwo(); var c redef = getTwo(); - return (eq1(a), eq2(b), takeInt(getTwo()), [(getTwo(), getTwo())]); + var ab_tens = (0, (1, 2)); + ab_tens.0 = getTwo(); + ab_tens.1.1 = getTwo(); + var ab_tup = [0, [1, 2]]; + ab_tup.0 = getTwo(); + ab_tup.1.1 = getTwo(); + return (eq1(a), eq2(b), takeInt(getTwo()), [getTwo(), ab_tens.1.1]); } @method_id(103) @@ -43,9 +49,9 @@ fun manyEq(a: T1, b: T2, c: T3): [T1, T2, T3] { fun test104(f: int) { var result = ( manyEq(1 ? 1 : 1, f ? 0 : null, !f ? getTwo() as int : null), - manyEq((f ? null as int : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool()), 0, eq4(f)) + manyEq(f ? null as int : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool(), eq4(f)) ); - __expect_type(result, "([int, int, int], [(int, bool), int, int])"); + __expect_type(result, "([int, int, int], [int, bool, int])"); return result; } @@ -74,7 +80,8 @@ fun test106() { return [ abstractTransform(cellToSlice, calcLoad32, c), abstractTransform(calcYPlus1, calcYPlus1, 0), - abstractTransform(calcTensorPlus1, calcTensorMul2, (2, 2)) + abstractTransform(calcTensorPlus1, calcTensorMul2, (2, 2)).0, + abstractTransform(calcTensorPlus1, calcTensorMul2, (2, 2)).1 ]; } @@ -135,7 +142,7 @@ fun main(x: int): (int, [[int, int]]) { @testcase | 101 | 0 | 0 0 0 [ 0 0 ] 0 0 0 [ 0 0 ] 0 0 0 [] @testcase | 102 | | 2 2 2 [ 2 2 ] @testcase | 103 | 0 | 0 100 100 -@testcase | 104 | 0 | [ 1 (null) 2 ] [ 2 -1 0 0 ] +@testcase | 104 | 0 | [ 1 (null) 2 ] [ 2 -1 0 ] @testcase | 105 | | 3 @testcase | 106 | | [ 106 2 6 6 ] @testcase | 107 | | 6 6 1 1 6 6 diff --git a/tolk-tester/tests/if_stmt.tolk b/tolk-tester/tests/if_stmt.tolk index 2c51ac51..0f78a516 100644 --- a/tolk-tester/tests/if_stmt.tolk +++ b/tolk-tester/tests/if_stmt.tolk @@ -54,13 +54,13 @@ fun main() { test3 PROC:<{ // x DUP // x x - 20 NEQINT // x _2 + 20 NEQINT // x '2 IFNOTJMP:<{ // x DROP // - 20 PUSHINT // _3=20 + 20 PUSHINT // '3=20 }> // x DUP // x x - 50 EQINT // x _5 + 50 EQINT // x '5 IFNOTJMP:<{ // x """ */ diff --git a/tolk-tester/tests/indexed-access.tolk b/tolk-tester/tests/indexed-access.tolk new file mode 100644 index 00000000..38094fa5 --- /dev/null +++ b/tolk-tester/tests/indexed-access.tolk @@ -0,0 +1,287 @@ + +fun increment(mutate self: int) { + self += 1; +} + +fun increment2(mutate a: int, mutate b: int) { + a += 1; + b += 1; +} + +fun assign1020(mutate a: int, mutate b: int) { + a = 10; + b = 20; +} + +fun plus(mutate self: int, y: int): int { + val newVals = (self + y, y * 10); + self = newVals.0; + return newVals.1; +} + +fun eq(v: X): X { return v; } + +@method_id(101) +fun test101() { + var t = (1, (2, 3), [4, 5, [6, 7]], 8); + (t.0, t.1.0, t.2.0) = (2, 3, 5); + t.3.increment(); + t.2.1 += (t.1.1 += 1) - t.1.1 + 1; + increment2(mutate t.2.2.0, mutate t.2.2.1); + return t; +} + +global t102: (int, (int, int), [int, int, [int, int]], int); + +@method_id(102) +fun test102() { + t102 = (1, (2, 3), [4, 5, [6, 7]], 8); + (t102.0, t102.1.0, t102.2.0) = (2, 3, 5); + t102.3.increment(); + t102.2.1 += (t102.1.1 += 1) - t102.1.1 + 1; + increment2(mutate t102.2.2.0, mutate t102.2.2.1); + return t102; +} + +global t103: (int, int); + +@method_id(103) +fun test103() { + t103 = (5, 5); + assign1020(mutate t103.0, mutate t103.1); + var t = (5, 5); + assign1020(mutate t.0, mutate t.1); + return (t103, t); +} + +global t104: [[int, int]]; + +@method_id(104) +fun test104() { + var m = [[5, 5]]; + (m.0.0, m.0.1) = (10, 20); + t104 = [[5, 5]]; + (t104.0.0, t104.0.1) = (10, 20); + return (t104, m); +} + +@method_id(105) +fun test105(x: int, y: int): (tuple, int, (int, int), int, int) { + var ab = (createEmptyTuple(), (x, y), tupleSize); + ab.0.tuplePush(1); + tuplePush(mutate ab.0, 2); + ab.1.0 = null; + ab.1.1 += 10; + var cb = ab.2; + return (ab.0, ab.0.1, ab.1, cb(ab.0), ab.2(ab.0)); +} + +@method_id(106) +fun test106(x: int, y: int) { + var ab = [createEmptyTuple(), [x, y], tupleSize]; + ab.0.tuplePush(1); + tuplePush(mutate ab.0, 2); + ab.1.0 = null; + ab.1.1 += 10; + var cb = ab.2; + return (ab.0, ab.1, cb(ab.0), ab.2(ab.0)); +} + +@method_id(107) +fun test107() { + var ab = createEmptyTuple(); + ab.tuplePush(1); + ab.tuplePush(beginCell().storeInt(1, 32)); + return (ab.0 as int, getBuilderBitsCount(ab.1)); +} + +global t108: [int, [int, [int]]]; + +@method_id(108) +fun test108(last: int) { + t108 = [1, [2, [last]]]; + t108.1.1.0.increment(); + var t = [1, [2, [last]]]; + t.1.1.0.increment(); + return (t108, t); +} + +@method_id(109) +fun test109(x: (int, int)): (int, int, int, int, int, int, int) { + return (x.1, x.1.plus(x.1 / 20), x.1, x.1 = x.1 * 2, x.1, x.1 += 1, x.1); +} + +@method_id(110) +fun test110(f: int, s: int) { + var x = [f, s]; + return (x, x.1, x.1.plus(x.1 / 20), x.1, x.1 = x.1 * 2, x.1, x.1 += 1, x.1, x); +} + +global xx: (int, int); + +@method_id(111) +fun test111(x: (int, int)) { + xx = x; + return (x, xx.1, xx.1.plus(xx.1 / 20), eq(xx.1 += (x.1 *= 0)), xx.1 = xx.1 * 2, xx.1, xx.1 += 1, xx.1, x); +} + +global yy: [int, int]; + +@method_id(112) +fun test112(f: int, s: int) { + yy = [f, s]; + return (yy, yy.1, yy.1.plus(yy.1 / 20), eq(yy.1 += (yy.1 *= 0)), yy.1 = yy.1 * 2, yy.1, yy.1 += 1, yy.1, yy); +} + +@pure +fun getConstTuple() { + return [1,2]; +} + +fun testCodegenNoPureIndexedAccess() { + (getConstTuple().1, getConstTuple().0) = (3, 4); + return 0; +} + +@method_id(113) +fun test113() { + var x = [[1, 2]]; + return (x, x.0, plus(mutate x.0.0, 10), x.0, x, x.0 = [10, 20], x); +} + +@method_id(114) +fun test114(f: int, s: int) { + var x = ((), (f, s), ()); + return (x, x.1, plus(mutate x.1.0, 10), x.1, x, x.1 = (10, 20), x); +} + +@method_id(115) +fun test115() { + var y = [[[[true]]]]; + return (y, y.0.0.0.0 = !y.0.0.0.0, y.0); +} + +@method_id(116) +fun test116() { + var t = createEmptyTuple(); + t.tuplePush(1); + try { + return t.100500 as int; + } catch(excNo) { + return excNo; + } +} + +@method_id(117) +fun test117() { + var t = createEmptyTuple(); + t.tuplePush(1); + try { + return (t.0 as tuple).0 as int; + } catch(excNo) { + return excNo; + } +} + +@method_id(118) +fun testCodegenIndexPostfix1(x: (int, int)) { + var ab = (x.1, x.0); + return ab; +} + +@method_id(119) +fun testCodegenIndexPostfix2(x: (int, (int, int), int)) { + var y = x; + return (y.2, y.0, y.1.1); +} + +fun getT() { return (1, 2); } + +@method_id(120) +fun test120() { + return (getT().0 = 3, getT().0 = 4, [getT().0 = 5, getT().0 = 6]); +} + +@method_id(121) +fun test121(zero: int) { + var t = createEmptyTuple(); + t.tuplePush(-100); + t.tupleSetAt(0, zero); + (t.0 as int).increment(); + (((t.0) as int) as int).increment(); + increment(mutate t.0 as int); + return t; +} + +fun main(){} + + +/** +@testcase | 101 | | 2 3 4 [ 5 6 [ 7 8 ] ] 9 +@testcase | 102 | | 2 3 4 [ 5 6 [ 7 8 ] ] 9 +@testcase | 103 | | 10 20 10 20 +@testcase | 104 | | [ [ 10 20 ] ] [ [ 10 20 ] ] +@testcase | 105 | 5 6 | [ 1 2 ] 2 (null) 16 2 2 +@testcase | 106 | 5 6 | [ 1 2 ] [ (null) 16 ] 2 2 +@testcase | 107 | | 1 32 +@testcase | 108 | 3 | [ 1 [ 2 [ 4 ] ] ] [ 1 [ 2 [ 4 ] ] ] +@testcase | 109 | 0 100 | 100 50 105 210 210 211 211 +@testcase | 110 | 0 100 | [ 0 100 ] 100 50 105 210 210 211 211 [ 0 211 ] +@testcase | 111 | 0 100 | 0 100 100 50 105 210 210 211 211 0 0 +@testcase | 112 | 0 100 | [ 0 100 ] 100 50 105 210 210 211 211 [ 0 211 ] +@testcase | 113 | | [ [ 1 2 ] ] [ 1 2 ] 100 [ 11 2 ] [ [ 11 2 ] ] [ 10 20 ] [ [ 10 20 ] ] +@testcase | 114 | 1 2 | 1 2 1 2 100 11 2 11 2 10 20 10 20 +@testcase | 115 | | [ [ [ [ -1 ] ] ] ] 0 [ [ [ 0 ] ] ] +@testcase | 116 | | 5 +@testcase | 117 | | 7 +@testcase | 118 | 1 2 | 2 1 +@testcase | 119 | 1 2 3 4 | 4 1 3 +@testcase | 120 | | 3 4 [ 5 6 ] +@testcase | 121 | 0 | [ 3 ] + +@fif_codegen +""" + testCodegenNoPureIndexedAccess PROC:<{ + // + 0 PUSHINT // '8=0 + }> +""" + +@fif_codegen +""" + test104 PROC:<{ + // + 5 PUSHINT // '2=5 + DUP // '2=5 '3=5 + PAIR // '1 + SINGLE // m + 10 PUSHINT // m '5=10 + 20 PUSHINT // m '5=10 '6=20 + s2 PUSH // m '5=10 '6=20 m + 0 INDEX // m '10=10 '12=20 '8 + SWAP // m '10=10 '8 '12=20 + 1 SETINDEX // m '10=10 '8 + SWAP // m '8 '10=10 + 0 SETINDEX // m '8 + 0 SETINDEX // m + ... +""" + +@fif_codegen +""" + testCodegenIndexPostfix1 PROC:<{ + // x.0 x.1 + // ab.1 ab.0 + SWAP // ab.0 ab.1 + }> +""" + +@fif_codegen +""" + testCodegenIndexPostfix2 PROC:<{ + // x.0 x.1.0 x.1.1 x.2 + s2 POP // y.0 y.2 y.1.1 + s1 s2 XCHG // y.2 y.0 y.1.1 + }> +""" + */ diff --git a/tolk-tester/tests/invalid-assign-1.tolk b/tolk-tester/tests/invalid-assign-1.tolk new file mode 100644 index 00000000..f605056e --- /dev/null +++ b/tolk-tester/tests/invalid-assign-1.tolk @@ -0,0 +1,9 @@ +fun main() { + var c = 1; + (c, c) = (2, 3); +} + +/** +@compilation_should_fail +@stderr one variable modified twice inside the same expression +*/ diff --git a/tolk-tester/tests/invalid-assign-2.tolk b/tolk-tester/tests/invalid-assign-2.tolk new file mode 100644 index 00000000..2838ed9a --- /dev/null +++ b/tolk-tester/tests/invalid-assign-2.tolk @@ -0,0 +1,11 @@ +fun incThree(mutate a: int, mutate b: int, mutate c: int) {} + +fun main() { + var c = [[[1, 2]]]; + incThree(mutate c.0.0.0, mutate c.0.0.1, mutate c.0.0.0); +} + +/** +@compilation_should_fail +@stderr one variable modified twice inside the same expression +*/ diff --git a/tolk-tester/tests/invalid-assign-3.tolk b/tolk-tester/tests/invalid-assign-3.tolk new file mode 100644 index 00000000..d3f5d1f1 --- /dev/null +++ b/tolk-tester/tests/invalid-assign-3.tolk @@ -0,0 +1,10 @@ +global gg: (int, int); + +fun main() { + [gg.0, gg.1, gg.0] = [0, 1, 0]; +} + +/** +@compilation_should_fail +@stderr one variable modified twice inside the same expression +*/ diff --git a/tolk-tester/tests/invalid-assign-4.tolk b/tolk-tester/tests/invalid-assign-4.tolk new file mode 100644 index 00000000..67340b20 --- /dev/null +++ b/tolk-tester/tests/invalid-assign-4.tolk @@ -0,0 +1,10 @@ +global gg: (int, [int, int]); + +fun main() { + (gg.1.0, gg.1, gg.1.1) = (0, [1, 2], 3); +} + +/** +@compilation_should_fail +@stderr one variable both modified and read inside the same expression +*/ diff --git a/tolk-tester/tests/invalid-assign-5.tolk b/tolk-tester/tests/invalid-assign-5.tolk new file mode 100644 index 00000000..f3fe59f7 --- /dev/null +++ b/tolk-tester/tests/invalid-assign-5.tolk @@ -0,0 +1,9 @@ +fun main() { + var ab = (1, 2); + (ab, ab.1) = ((2, 3), 4); +} + +/** +@compilation_should_fail +@stderr one variable both modified and read inside the same expression +*/ diff --git a/tolk-tester/tests/invalid-assign-6.tolk b/tolk-tester/tests/invalid-assign-6.tolk new file mode 100644 index 00000000..59d769e9 --- /dev/null +++ b/tolk-tester/tests/invalid-assign-6.tolk @@ -0,0 +1,9 @@ +fun main() { + var t = createEmptyTuple(); + t.0 = (1, 2); +} + +/** +@compilation_should_fail +@stderr can not put `(int, int)` into a tuple, because it occupies 2 stack slots in TVM, not 1 +*/ diff --git a/tolk-tester/tests/invalid-assign-7.tolk b/tolk-tester/tests/invalid-assign-7.tolk new file mode 100644 index 00000000..6a33e696 --- /dev/null +++ b/tolk-tester/tests/invalid-assign-7.tolk @@ -0,0 +1,8 @@ +fun main(cs: slice) { + var cb = cs.tupleSize; +} + +/** +@compilation_should_fail +@stderr referencing a method for `tuple` with object of type `slice` +*/ diff --git a/tolk-tester/tests/invalid-call-1.tolk b/tolk-tester/tests/invalid-call-1.tolk index 3542f580..7435bb3c 100644 --- a/tolk-tester/tests/invalid-call-1.tolk +++ b/tolk-tester/tests/invalid-call-1.tolk @@ -6,5 +6,5 @@ fun main(x: int) { /** @compilation_should_fail -@stderr calling a non-function +@stderr non-existing method `asdf` of type `int` */ diff --git a/tolk-tester/tests/invalid-call-10.tolk b/tolk-tester/tests/invalid-call-10.tolk new file mode 100644 index 00000000..9a28c004 --- /dev/null +++ b/tolk-tester/tests/invalid-call-10.tolk @@ -0,0 +1,11 @@ +fun takeInvalidTuple(t: [int, (int, builder), int]) { +} + +fun main() { + takeInvalidTuple([1, (2, beginCell()), 0]); +} + +/** +@compilation_should_fail +@stderr can not put `(int, builder)` into a tuple, because it occupies 2 stack slots in TVM, not 1 + */ diff --git a/tolk-tester/tests/invalid-call-11.tolk b/tolk-tester/tests/invalid-call-11.tolk new file mode 100644 index 00000000..f631c546 --- /dev/null +++ b/tolk-tester/tests/invalid-call-11.tolk @@ -0,0 +1,11 @@ +fun main() { + var functions = (beginCell, beginCell); + var b = functions.1(); // ok + var c = functions.2(); // error +} + +/** +@compilation_should_fail +@stderr invalid tensor index, expected 0..1 +@stderr functions.2() + */ diff --git a/tolk-tester/tests/invalid-call-7.tolk b/tolk-tester/tests/invalid-call-7.tolk index 4ad038c9..cf8c788c 100644 --- a/tolk-tester/tests/invalid-call-7.tolk +++ b/tolk-tester/tests/invalid-call-7.tolk @@ -9,6 +9,6 @@ fun main() { /** @compilation_should_fail -@stderr undefined symbol `storeUnexisting` +@stderr non-existing method `storeUnexisting` of type `builder` @stderr .storeUnexisting() */ diff --git a/tolk-tester/tests/invalid-call-8.tolk b/tolk-tester/tests/invalid-call-8.tolk index c613d7d9..199aa681 100644 --- a/tolk-tester/tests/invalid-call-8.tolk +++ b/tolk-tester/tests/invalid-call-8.tolk @@ -1,8 +1,10 @@ +fun get_incoming_value() { return 3; } + fun main() { var incoming_ton: int = get_incoming_value().3(); } /** @compilation_should_fail -@stderr expected method name, got `3` +@stderr type `int` is not indexable */ diff --git a/tolk-tester/tests/invalid-generics-12.tolk b/tolk-tester/tests/invalid-generics-12.tolk new file mode 100644 index 00000000..62a6f5da --- /dev/null +++ b/tolk-tester/tests/invalid-generics-12.tolk @@ -0,0 +1,15 @@ +fun getTwo(): X { return 2; } + +fun cantDeduceNonArgumentGeneric() { + var t1: [int] = [0]; + t1.0 = getTwo(); // ok + var t2 = createEmptyTuple(); + t2.tuplePush(0); + t2.0 = getTwo(); // error, can't decude X +} + +/** +@compilation_should_fail +@stderr can not deduce X for generic function `getTwo` +@stderr t2.0 = getTwo(); + */ diff --git a/tolk-tester/tests/invalid-typing-13.tolk b/tolk-tester/tests/invalid-typing-13.tolk new file mode 100644 index 00000000..e356d0f3 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-13.tolk @@ -0,0 +1,9 @@ +fun failAssignToInvalidTupleIndex() { + var ab = [1, 2]; + ab.100500 = 5; +} + +/** +@compilation_should_fail +@stderr invalid tuple index, expected 0..1 + */ diff --git a/tolk-tester/tests/known-bugs.tolk b/tolk-tester/tests/known-bugs.tolk deleted file mode 100644 index 4de6a375..00000000 --- a/tolk-tester/tests/known-bugs.tolk +++ /dev/null @@ -1,27 +0,0 @@ -fun increment(mutate x: int): int { - x = x + 1; - return x; -} - -@method_id(101) -fun bugWithModifyingMethodInsideSameExpression() { - /* - The same bug existed in FunC: -#pragma allow-post-modification; -(int, int) ~increment(int x) { x = x + 5; return (x, x); } -int main() { int x = 0; x += x~increment(); return x; } - It's related to using a variable modified by ~method inside the same expression. - */ - var x = 0; - x = x + increment(mutate x); - return x; -} - -fun main() { - -} - -/** -// correct: 2 -@testcase | 101 | | 1 - */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index 9ef4858d..29cd1d10 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -210,15 +210,15 @@ fun main() { compileTimeEval1 PROC:<{ // x DUP // x x - 0 EQINT // x _1 - FALSE // x _1 _4 - TRUE // x _1 _4 _7 - FALSE // x _1 _4 _7 _11 - s0 s4 XCHG // _11 _1 _4 _7 x - 0 EQINT // _11 _1 _4 _7 _12 - -10 EQINT // _11 _1 _4 _7 _14 + 0 EQINT // x '1 + FALSE // x '1 '4 + TRUE // x '1 '4 '7 + FALSE // x '1 '4 '7 '11 + s0 s4 XCHG // '11 '1 '4 '7 x + 0 EQINT // '11 '1 '4 '7 '12 + -10 EQINT // '11 '1 '4 '7 '14 s3 s4 XCHG - s1 s3 s0 XCHG3 // _1 _4 _7 _11 _14 + s1 s3 s0 XCHG3 // '1 '4 '7 '11 '14 }> """ @@ -230,15 +230,15 @@ fun main() { OVER // x y x IFNOTJMP:<{ // x y 2DROP // - 10 PUSHINT // _2=10 + 10 PUSHINT // '2=10 }> // x y DUP // x y y IFNOTJMP:<{ // x y 2DROP // - 20 PUSHINT // _3=20 + 20 PUSHINT // '3=20 RETALT }> // x y - ADD // _4 + ADD // '4 }> """ @@ -297,10 +297,10 @@ These are moments of future optimizations. For now, it's more than enough. // a b SWAP // b a IF:<{ // b - 0 NEQINT // _2 + 0 NEQINT // '2 }>ELSE<{ // b DROP // - 0 PUSHINT // _2=0 + 0 PUSHINT // '2=0 }> }> """ @@ -310,13 +310,13 @@ These are moments of future optimizations. For now, it's more than enough. testOrSimpleCodegen PROC:<{ // a b SWAP // b a - 0 GTINT // b _3 + 0 GTINT // b '3 IF:<{ // b DROP // - -1 PUSHINT // _4=-1 + -1 PUSHINT // '4=-1 }>ELSE<{ // b - 0 GTINT // _7 - 0 NEQINT // _4 + 0 GTINT // '7 + 0 NEQINT // '4 }> }> """ @@ -332,15 +332,15 @@ These are moments of future optimizations. For now, it's more than enough. DUP // x x IFNOTJMP:<{ // x DROP // - 1 PUSHINT // _5=1 + 1 PUSHINT // '5=1 }> // x DUP // x x IFNOTJMP:<{ // x DROP // - 1 PUSHINT // _6=1 + 1 PUSHINT // '6=1 }> // x 100 THROWIFNOT - -4 PUSHINT // _9=-4 + -4 PUSHINT // '9=-4 }> """ diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index 0a5ca191..ebd07aca 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -307,7 +307,7 @@ fun main(){} ... incrementTwoInPlace CALLDICT // x y sum1 -ROT - 10 PUSHINT // sum1 x y _10=10 + 10 PUSHINT // sum1 x y '10=10 incrementTwoInPlace CALLDICT // sum1 x y sum2 s1 s3 s0 XCHG3 // x y sum1 sum2 }> @@ -317,8 +317,8 @@ fun main(){} """ load_next PROC:<{ // cs - 32 LDI // _4 cs - SWAP // cs _4 + 32 LDI // '4 cs + SWAP // cs '4 }> """ @@ -326,7 +326,7 @@ fun main(){} """ testStoreUintPureUnusedResult PROC:<{ // - 0 PUSHINT // _11=0 + 0 PUSHINT // '11=0 }> """ @@ -335,9 +335,9 @@ fun main(){} testStoreUintImpureUnusedResult PROC:<{ // NEWC // b - STIX // _2 + STIX // '2 DROP // - 0 PUSHINT // _11=0 + 0 PUSHINT // '11=0 }> """ diff --git a/tolk-tester/tests/no-spaces.tolk b/tolk-tester/tests/no-spaces.tolk index da733898..409bb342 100644 --- a/tolk-tester/tests/no-spaces.tolk +++ b/tolk-tester/tests/no-spaces.tolk @@ -104,10 +104,10 @@ fun`main`(){} DUP // fst1=-1 snd1=-1 2 PUSHINT // fst1=-1 snd1=-1 trd1=2 s1 s1 s0 PUSH3 // fst1=-1 snd1=-1 trd1=2 fst2=-1 snd2=-1 trd2=2 - add3 CALLDICT // fst1=-1 snd1=-1 trd1=2 _13 - 3 -ROLL // _13 fst1=-1 snd1=-1 trd1=2 - add3 CALLDICT // _13 _14 - PAIR // _12 + add3 CALLDICT // fst1=-1 snd1=-1 trd1=2 '13 + 3 -ROLL // '13 fst1=-1 snd1=-1 trd1=2 + add3 CALLDICT // '13 '14 + PAIR // '12 }> """ diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 40e391c8..69678434 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -99,21 +99,21 @@ fun main() { test1 PROC:<{ // PUSHNULL // numbers - 1 PUSHINT // numbers _2=1 - SWAP // _2=1 numbers + 1 PUSHINT // numbers '2=1 + SWAP // '2=1 numbers CONS // numbers - 2 PUSHINT // numbers _4=2 - SWAP // _4=2 numbers + 2 PUSHINT // numbers '4=2 + SWAP // '4=2 numbers CONS // numbers - 3 PUSHINT // numbers _6=3 - SWAP // _6=3 numbers + 3 PUSHINT // numbers '6=3 + SWAP // '6=3 numbers CONS // numbers - 4 PUSHINT // numbers _8=4 - SWAP // _8=4 numbers + 4 PUSHINT // numbers '8=4 + SWAP // '8=4 numbers CONS // numbers UNCONS // h numbers DUP // h numbers numbers - CAR // h numbers _13 + CAR // h numbers '13 """ @fif_codegen @@ -121,11 +121,11 @@ fun main() { main PROC:<{ // PUSHNULL // i - ISNULL // _2 + ISNULL // '2 IFJMP:<{ // - 1 PUSHINT // _3=1 + 1 PUSHINT // '3=1 }> // - 10 PUSHINT // _4=10 + 10 PUSHINT // '4=10 }> """ @@ -133,14 +133,14 @@ fun main() { """ test7 PROC:<{ ... - LDOPTREF // b _8 _7 + LDOPTREF // b '8 '7 DROP // b c - ISNULL // b _11 - 10 MULCONST // b _13 - SWAP // _13 b - ISNULL // _13 _14 - NOT // _13 _15 - ADD // _16 + ISNULL // b '11 + 10 MULCONST // b '13 + SWAP // '13 b + ISNULL // '13 '14 + NOT // '13 '15 + ADD // '16 }> """ */ diff --git a/tolk-tester/tests/op-priority.tolk b/tolk-tester/tests/op-priority.tolk index 8a57b394..e3c6bb6e 100644 --- a/tolk-tester/tests/op-priority.tolk +++ b/tolk-tester/tests/op-priority.tolk @@ -95,26 +95,26 @@ fun main() { unary_minus_1 PROC:<{ // a b c -ROT // c a b - ADD // c _3 - NEGATE // c _4 - SWAP // _4 c - MUL // _5 + ADD // c '3 + NEGATE // c '4 + SWAP // '4 c + MUL // '5 }> unary_minus_2 PROC:<{ // a b c -ROT // c a b - ADD // c _3 - NEGATE // c _4 - SWAP // _4 c - MUL // _5 + ADD // c '3 + NEGATE // c '4 + SWAP // '4 c + MUL // '5 }> unary_minus_3 PROC:<{ // a b c -ROT // c a b - ADD // c _3 - SWAP // _3 c - MUL // _4 - NEGATE // _5 + ADD // c '3 + SWAP // '3 c + MUL // '4 + NEGATE // '5 }> """ diff --git a/tolk-tester/tests/use-before-declare.tolk b/tolk-tester/tests/use-before-declare.tolk index 384569b9..d3e6b165 100644 --- a/tolk-tester/tests/use-before-declare.tolk +++ b/tolk-tester/tests/use-before-declare.tolk @@ -43,7 +43,7 @@ const demo_20: int = 20; """ test1 PROC:<{ // - 30 PUSHINT // _10 + 30 PUSHINT // '10 }> """ */ diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk index a0918c18..16863560 100644 --- a/tolk-tester/tests/var-apply.tolk +++ b/tolk-tester/tests/var-apply.tolk @@ -129,6 +129,15 @@ fun testVarApply3() { return (getIntAt(t, 0), getTupleFirstInt(t), getTupleLastTuple(t), getTupleLastGetter()(t)); } +@method_id(107) +fun testIndexedAccessApply() { + var functions1 = (beginCell, endCell); + var functions2 = [beginParse]; + var b = functions1.0().storeInt(1, 16); + b.storeInt(1, 16); + return functions2.0(functions1.1(b)).loadInt(32); +} + fun main() {} /** @@ -138,4 +147,5 @@ fun main() {} @testcase | 104 | | 240 @testcase | 105 | | 1 @testcase | 106 | | 1 1 [ 2 ] [ 2 ] +@testcase | 107 | | 65537 */ diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 25ddd10e..b465b72b 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -26,22 +26,28 @@ namespace tolk { * */ -void TmpVar::dump(std::ostream& os) const { - show(os); - os << " : " << v_type << " (width "; - os << v_type->calc_width_on_stack(); - os << ")"; - os << std::endl; +void TmpVar::show_as_stack_comment(std::ostream& os) const { + if (!name.empty()) { + os << name; + } else { + os << '\'' << ir_idx; + } +#ifdef TOLK_DEBUG + // uncomment for detailed stack output, like `'15(binary-op) '16(glob-var)` + // if (desc) os << desc; +#endif } -void TmpVar::show(std::ostream& os, int omit_idx) const { - if (v_sym) { - os << v_sym->name; - if (omit_idx >= 2) { - return; - } +void TmpVar::show(std::ostream& os) const { + os << '\'' << ir_idx; // vars are printed out as `'1 '2` (in stack comments, debug info, etc.) + if (!name.empty()) { + os << '_' << name; } - os << '_' << ir_idx; +#ifdef TOLK_DEBUG + if (desc) { + os << ' ' << desc; // "origin" of implicitly created tmp var, like `'15 (binary-op) '16 (glob-var)` + } +#endif } std::ostream& operator<<(std::ostream& os, const TmpVar& var) { @@ -95,7 +101,7 @@ void VarDescr::show(std::ostream& os, const char* name) const { if (name) { os << name; } - os << '_' << idx; + os << '\'' << idx; show_value(os); } @@ -333,7 +339,7 @@ void Op::show_var_list(std::ostream& os, const std::vector& idx_list, } else { os << "(" << vars.at(idx_list[0]); for (std::size_t i = 1; i < idx_list.size(); i++) { - os << "," << vars.at(idx_list[i]); + os << ", " << vars.at(idx_list[i]); } os << ")"; } @@ -378,11 +384,12 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "CODE BLOB: " << var_cnt << " variables, " << in_var_cnt << " input\n"; if ((flags & 8) != 0) { for (const auto& var : vars) { - var.dump(os); - if (var.where.is_defined() && (flags & 1) != 0) { - var.where.show(os); + var.show(os); + os << " : " << var.v_type << std::endl; + if (var.loc.is_defined() && (flags & 1) != 0) { + var.loc.show(os); os << " defined here:\n"; - var.where.show_context(os); + var.loc.show_context(os); } } } @@ -393,21 +400,25 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -std::vector CodeBlob::create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc) { +std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, std::string name) { std::vector ir_idx; - ir_idx.reserve(var_type->calc_width_on_stack()); + int stack_w = var_type->calc_width_on_stack(); + ir_idx.reserve(stack_w); if (const TypeDataTensor* t_tensor = var_type->try_as()) { - for (TypePtr item : t_tensor->items) { - std::vector nested = create_var(item, v_sym, loc); + for (int i = 0; i < t_tensor->size(); ++i) { + std::string sub_name = name.empty() ? name : name + "." + std::to_string(i); + std::vector nested = create_var(t_tensor->items[i], loc, std::move(sub_name)); ir_idx.insert(ir_idx.end(), nested.begin(), nested.end()); } } else if (var_type != TypeDataVoid::create()) { - tolk_assert(var_type->calc_width_on_stack() == 1); - vars.emplace_back(var_cnt, var_type, v_sym, loc); +#ifdef TOLK_DEBUG + tolk_assert(stack_w == 1); +#endif + vars.emplace_back(var_cnt, var_type, std::move(name), loc); ir_idx.emplace_back(var_cnt); var_cnt++; } - tolk_assert(static_cast(ir_idx.size()) == var_type->calc_width_on_stack()); + tolk_assert(static_cast(ir_idx.size()) == stack_w); return ir_idx; } diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp index 547922da..2618ed26 100644 --- a/tolk/asmops.cpp +++ b/tolk/asmops.cpp @@ -302,24 +302,13 @@ Const AsmOpList::get_const(const_idx_t idx) { } } -void AsmOpList::show_var(std::ostream& os, var_idx_t idx) const { - if (!var_names_ || (unsigned)idx >= var_names_->size()) { - os << '_' << idx; - } else { - var_names_->at(idx).show(os, 2); - } -} - void AsmOpList::show_var_ext(std::ostream& os, std::pair idx_pair) const { - auto i = idx_pair.first; - auto j = idx_pair.second; + var_idx_t i = idx_pair.first; + const_idx_t j = idx_pair.second; if (!var_names_ || (unsigned)i >= var_names_->size()) { - os << '_' << i; + os << '\'' << i; } else { - var_names_->at(i).show(os, 2); - // if (!var_names_->at(i).v_type->is_int()) { - // os << '<'; var_names_->at(i).v_type->print(os); os << '>'; - // } + var_names_->at(i).show_as_stack_comment(os); } if ((unsigned)j < constants_.size() && constants_[j].not_null()) { os << '=' << constants_[j]; diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 58592011..3eb4385a 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -405,12 +405,15 @@ static AnyExprV parse_expr80(Lexer& lex) { lex.next(); V v_ident = nullptr; V v_instantiationTs = nullptr; - if (lex.tok() == tok_identifier) { + if (lex.tok() == tok_identifier) { // obj.field / obj.method v_ident = createV(lex.cur_location(), lex.cur_str()); lex.next(); if (lex.tok() == tok_lt) { v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex); } + } else if (lex.tok() == tok_int_const) { // obj.0 (indexed access) + v_ident = createV(lex.cur_location(), lex.cur_str()); + lex.next(); } else { lex.unexpected("method name"); } diff --git a/tolk/ast.h b/tolk/ast.h index b90507e7..d2db49f8 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -529,8 +529,14 @@ private: public: - typedef const FunctionData* DotTarget; // for `t.tupleAt` target is `tupleAt` global function - DotTarget target = nullptr; // filled at type inferring + typedef std::variant< + const FunctionData*, // for `t.tupleAt` target is `tupleAt` global function + int // for `t.0` target is "indexed access" 0 + > DotTarget; + DotTarget target = static_cast(nullptr); // filled at type inferring + + bool is_target_fun_ref() const { return std::holds_alternative(target); } + bool is_target_indexed_access() const { return std::holds_alternative(target); } AnyExprV get_obj() const { return child; } auto get_identifier() const { return identifier; } diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 73aef2d9..2b207c25 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -1060,6 +1060,17 @@ AsmOp compile_tuple_at(std::vector& res, std::vector& args, return exec_op("INDEXVAR", 2, 1); } +// fun tupleSetAt(mutate self: tuple, value: X, index: int): void asm "SETINDEXVAR"; +AsmOp compile_tuple_set_at(std::vector& res, std::vector& args, SrcLocation) { + tolk_assert(args.size() == 3 && res.size() == 1); + auto& y = args[2]; + if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) { + y.unused(); + return exec_arg_op("SETINDEX", y.int_const, 1, 1); + } + return exec_op("SETINDEXVAR", 2, 1); +} + // fun __isNull(X arg): bool AsmOp compile_is_null(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(args.size() == 1 && res.size() == 1); @@ -1246,6 +1257,9 @@ void define_builtins() { define_builtin_func("tupleAt", {Tuple, Int}, typeT, declGenericT, compile_tuple_at, FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); + define_builtin_func("tupleSetAt", {Tuple, typeT, Int}, Unit, declGenericT, + compile_tuple_set_at, + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf); define_builtin_func("debugPrint", {typeT}, Unit, declGenericT, AsmOp::Custom("s0 DUMP DROP", 1, 1), 0); diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 3830f7ae..ad61b8a5 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -132,7 +132,7 @@ int Stack::drop_vars_except(const VarDescrList& var_info, int excl_var) { return dropped; } -void Stack::show(int flags) { +void Stack::show() { std::ostringstream os; for (auto i : s) { os << ' '; diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index c64fe751..f5eca22c 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -21,6 +21,7 @@ #include "type-system.h" #include "common/refint.h" #include "constant-evaluator.h" +#include /* * This pipe is the last one operating AST: it transforms AST to IR. @@ -28,38 +29,218 @@ * kernel (initially forked from FunC) comes into play. * Up to this point, all types have been inferred, all validity checks have been passed, etc. * All properties in AST nodes are assigned and can be safely used (fun_ref, etc.). - * So, if execution reaches this pass, the input is correct, and code generation should succeed. + * So, if execution reaches this pass, the input is (almost) correct, and code generation should succeed. + * The only thing additionally checked during this pass is tricky lvalue, like one and the same variable + * assigned/mutated multiple times in same expression, e.g. `(t.0, t.0) = rhs` / `f(mutate x.1.2, mutate x)`. */ namespace tolk { -struct LValGlobs { - std::vector>> globs; +// fire error on cases like `(a, a) = rhs` / `f(mutate t.1.0, mutate t.1.0)` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_variable_modified_twice_inside_same_expression(SrcLocation loc) { + throw ParseError(loc, "one variable modified twice inside the same expression"); +} - void add_modified_glob(const GlobalVarData* g_sym, std::vector local_ir_idx) { - globs.emplace_back(g_sym, std::move(local_ir_idx)); +// fire error on cases like `(m.1.0, m.1) = rhs` (m.1 inside m.1.0 is "rval inside lval") +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_variable_modified_and_read_inside_same_expression(SrcLocation loc) { + throw ParseError(loc, "one variable both modified and read inside the same expression"); +} + +// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable +// exists, but on its change, something non-trivial should happen. +// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6` +// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)` +// Of course, mixing globals with tuples should also be supported. +// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue". +// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global. +// A challenging thing is handling "unique" parts, to be read/updated only once. +// Example: `f(mutate globalTensor.0, mutate globalTensor.1)`, then globalTensor should be read/written once. +// Example: `(t.0.0, t.0.1) = rhs` (m is [[int, int]]), then t.0 should be read/updated once. +// Solving this by calculating hashes of every lvalue or rvalue inside lvalue automatically gives an ability +// to detect and fire "multiple writes inside expression", like `(a, a) = rhs` / `[t.0, (t.0.1, c)] = rhs`. +// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global). +class LValContext { + // every global variable used as lvalue is registered here + // example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after + // global tensors are stored as tuples (unpacked on reading, packed on writing), then multiple tmp vars are created + struct ModifiedGlob { + const GlobalVarData* glob_ref; + std::vector local_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors) + + void apply(CodeBlob& code, SrcLocation loc) const { + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, local_ir_idx, glob_ref); + op.set_impure_flag(); + } + }; + + // every tuple index used as lvalue is registered here + // example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after + // for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times + struct ModifiedTupleIndex { + uint64_t hash; + var_idx_t tuple_ir_idx; + var_idx_t index_ir_idx; + var_idx_t field_ir_idx; + + void apply(CodeBlob& code, SrcLocation loc) const { + const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as(); + code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx, field_ir_idx, index_ir_idx}, builtin_sym); + } + }; + + int level_rval_inside_lval = 0; + std::vector> modifications; + std::unordered_set all_modified_hashes; + + void fire_if_one_variable_modified_twice(SrcLocation loc, uint64_t modified_hash) { + if (!is_rval_inside_lval()) { + if (!all_modified_hashes.insert(modified_hash).second) { + fire_error_variable_modified_twice_inside_same_expression(loc); + } + if (all_modified_hashes.contains(~modified_hash)) { + fire_error_variable_modified_and_read_inside_same_expression(loc); + } + } else { + all_modified_hashes.insert(~modified_hash); + if (all_modified_hashes.contains(modified_hash)) { + fire_error_variable_modified_and_read_inside_same_expression(loc); + } + } } - void gen_ops_set_globs(CodeBlob& code, SrcLocation loc) const { - for (const auto& [g_sym, ir_idx] : globs) { - Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, ir_idx, g_sym); - op.set_impure_flag(); +public: + void enter_rval_inside_lval() { level_rval_inside_lval++; } + void exit_rval_inside_lval() { level_rval_inside_lval--; } + bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; } + + uint64_t register_lval(SrcLocation loc, const LocalVarData* var_ref) { + uint64_t hash = reinterpret_cast(var_ref); + fire_if_one_variable_modified_twice(loc, hash); + return hash; + } + + uint64_t register_lval(SrcLocation loc, const GlobalVarData* glob_ref) { + uint64_t hash = reinterpret_cast(glob_ref); + fire_if_one_variable_modified_twice(loc, hash); + return hash; + } + + uint64_t register_lval(SrcLocation loc, V v) { + uint64_t hash = 7; + AnyExprV leftmost_obj = v; + while (auto v_dot = leftmost_obj->try_as()) { + if (!v_dot->is_target_indexed_access()) { + break; + } + hash = hash * 1915239017 + std::get(v_dot->target); + leftmost_obj = v_dot->get_obj(); + } + if (auto v_ref = leftmost_obj->try_as()) { + hash *= reinterpret_cast(v_ref->sym); // `v.0` and `v.0` in 2 places is the same + } else { + hash *= reinterpret_cast(leftmost_obj); // unlike `f().0` and `f().0` (pointers to AST nodes differ) + } + fire_if_one_variable_modified_twice(loc, hash); + return hash; + } + + const std::vector* exists_already_known_global(const GlobalVarData* glob_ref) const { + for (const auto& m : modifications) { + if (const auto* m_glob = std::get_if(&m); m_glob && m_glob->glob_ref == glob_ref) { + return &m_glob->local_ir_idx; + } + } + return nullptr; + } + + const var_idx_t* exists_already_known_tuple_index(uint64_t hash) const { + for (const auto& m : modifications) { + if (const auto* m_tup = std::get_if(&m); m_tup && m_tup->hash == hash) { + return &m_tup->field_ir_idx; + } + } + return nullptr; + } + + void register_modified_global(const GlobalVarData* glob_ref, std::vector local_ir_idx) { + modifications.emplace_back(ModifiedGlob{glob_ref, std::move(local_ir_idx)}); + } + + void register_modified_tuple_index(uint64_t hash, var_idx_t tuple_ir_idx, var_idx_t index_ir_idx, var_idx_t field_ir_idx) { + modifications.emplace_back(ModifiedTupleIndex{hash, tuple_ir_idx, index_ir_idx, field_ir_idx}); + } + + void gen_ops_if_nonempty(CodeBlob& code, SrcLocation loc) const { + for (auto it = modifications.rbegin(); it != modifications.rend(); ++it) { // reverse, it's important + if (const auto* m_glob = std::get_if(&*it)) { + m_glob->apply(code, loc); + } else if (const auto* m_tup = std::get_if(&*it)) { + m_tup->apply(code, loc); + } } } }; -std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs = nullptr); +// The goal of VarsModificationWatcher is to detect such cases: `return (x, x += y, x)`. +// Without any changes, ops will be { _Call $2 = +($0_x, $1_y); _Return $0_x, $2, $0_x } - incorrect +// Correct will be to introduce tmp var: { _Let $3 = $0_x; _Call $2 = ...; _Return $3, $2, $0_x } +// This "introducing" is done when compiling tensors, whereas this class allows to watch vars for modification. +class VarsModificationWatcher { + struct WatchedVar { + var_idx_t ir_idx; + std::function on_modification_callback; + + WatchedVar(var_idx_t ir_idx, std::function on_modification_callback) + : ir_idx(ir_idx), on_modification_callback(std::move(on_modification_callback)) {} + }; + + std::vector all_callbacks; + +public: + + bool empty() const { return all_callbacks.empty(); } + + void push_callback(var_idx_t ir_idx, std::function callback) { + all_callbacks.emplace_back(ir_idx, std::move(callback)); + } + + void pop_callback(var_idx_t ir_idx) { + for (auto it = all_callbacks.rbegin(); it != all_callbacks.rend(); ++it) { + if (it->ir_idx == ir_idx) { + all_callbacks.erase((it + 1).base()); + return; + } + } + tolk_assert(false); + } + + void trigger_callbacks(const std::vector& left_lval_indices, SrcLocation loc) const { + for (const WatchedVar& w : all_callbacks) { + for (var_idx_t changed_var : left_lval_indices) { + if (w.ir_idx == changed_var) { + w.on_modification_callback(loc, w.ir_idx); + } + } + } + } +}; + +static VarsModificationWatcher vars_modification_watcher; + +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr); void process_any_statement(AnyV v, CodeBlob& code); static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, - LValGlobs* lval_globs) { + LValContext* lval_ctx) { const int n = static_cast(args.size()); if (n == 0) { // just `()` return {}; } if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments - return {pre_compile_expr(args[0], code, lval_globs)}; + return {pre_compile_expr(args[0], code, lval_ctx)}; } // the purpose is to handle such cases: `return (x, x += y, x)` @@ -81,9 +262,9 @@ static std::vector> pre_compile_tensor_inner(CodeBlob& co void add_and_watch_modifications(std::vector&& vars_of_ith_arg, CodeBlob& code) { for (var_idx_t ir_idx : vars_of_ith_arg) { - if (code.vars[ir_idx].v_sym && !is_watched(ir_idx)) { + if (!code.vars[ir_idx].name.empty() && !is_watched(ir_idx)) { watched_vars.emplace_back(ir_idx); - code.vars[ir_idx].on_modification.emplace_back([this, &code, ir_idx](SrcLocation loc) { + vars_modification_watcher.push_callback(ir_idx, [this, &code](SrcLocation loc, var_idx_t ir_idx) { on_var_modified(ir_idx, loc, code); }); } @@ -93,7 +274,7 @@ static std::vector> pre_compile_tensor_inner(CodeBlob& co void on_var_modified(var_idx_t ir_idx, SrcLocation loc, CodeBlob& code) { tolk_assert(is_watched(ir_idx)); - std::vector tmp_idx_arr = code.create_tmp_var(code.vars[ir_idx].v_type, loc); + std::vector tmp_idx_arr = code.create_tmp_var(code.vars[ir_idx].v_type, loc, "(pre-modified)"); tolk_assert(tmp_idx_arr.size() == 1); var_idx_t tmp_idx = tmp_idx_arr[0]; code.emplace_back(loc, Op::_Let, std::vector{tmp_idx}, std::vector{ir_idx}); @@ -102,9 +283,9 @@ static std::vector> pre_compile_tensor_inner(CodeBlob& co } } - std::vector> clear_and_stop_watching(CodeBlob& code) { + std::vector> clear_and_stop_watching() { for (var_idx_t ir_idx : watched_vars) { - code.vars[ir_idx].on_modification.pop_back(); + vars_modification_watcher.pop_callback(ir_idx); } watched_vars.clear(); return std::move(res_lists); @@ -113,15 +294,15 @@ static std::vector> pre_compile_tensor_inner(CodeBlob& co WatchingVarList watched_vars(n); for (int arg_idx = 0; arg_idx < n; ++arg_idx) { - std::vector vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, lval_globs); + std::vector vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, lval_ctx); watched_vars.add_and_watch_modifications(std::move(vars_of_ith_arg), code); } - return watched_vars.clear_and_stop_watching(code); + return watched_vars.clear_and_stop_watching(); } static std::vector pre_compile_tensor(CodeBlob& code, const std::vector& args, - LValGlobs* lval_globs = nullptr) { - std::vector> res_lists = pre_compile_tensor_inner(code, args, lval_globs); + LValContext* lval_ctx = nullptr) { + std::vector> res_lists = pre_compile_tensor_inner(code, args, lval_ctx); std::vector res; for (const std::vector& list : res_lists) { res.insert(res.end(), list.cbegin(), list.cend()); @@ -133,11 +314,11 @@ static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) { std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); - LValGlobs globs; - std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); - code.on_var_modification(left, loc); + LValContext local_lval; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); + vars_modification_watcher.trigger_callbacks(left, loc); code.emplace_back(loc, Op::_Let, std::move(left), right); - globs.gen_ops_set_globs(code, loc); + local_lval.gen_ops_if_nonempty(code, loc); return right; } // [lhs] = rhs; it's un-tuple to N left vars @@ -145,29 +326,37 @@ static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE std::vector right = pre_compile_expr(rhs, code); const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as(); std::vector types_list = inferred_tuple->items; - std::vector rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc); + std::vector rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc, "(unpack-tuple)"); code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); - LValGlobs globs; - std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); - code.on_var_modification(left, loc); + LValContext local_lval; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); + vars_modification_watcher.trigger_callbacks(left, loc); code.emplace_back(loc, Op::_Let, std::move(left), rvect); - globs.gen_ops_set_globs(code, loc); + local_lval.gen_ops_if_nonempty(code, loc); return rvect; } + // small optimization: `var x = rhs` or `local_var = rhs` (90% cases), LValContext not needed actually + if (lhs->type == ast_local_var_lhs || (lhs->type == ast_reference && lhs->as()->sym->try_as())) { + std::vector right = pre_compile_expr(rhs, code); + std::vector left = pre_compile_expr(lhs, code); // effectively, local_var->ir_idx + vars_modification_watcher.trigger_callbacks(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), right); + return right; + } // lhs = rhs std::vector right = pre_compile_expr(rhs, code); - LValGlobs globs; - std::vector left = pre_compile_expr(lhs, code, &globs); - code.on_var_modification(left, loc); + LValContext local_lval; + std::vector left = pre_compile_expr(lhs, code, &local_lval); + vars_modification_watcher.trigger_callbacks(left, loc); code.emplace_back(loc, Op::_Let, std::move(left), right); - globs.gen_ops_set_globs(code, loc); + local_lval.gen_ops_if_nonempty(code, loc); return right; } -static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation here, - std::vector&& args_vars, const FunctionData* fun_ref) { - std::vector rvect = code.create_tmp_var(ret_type, here); - Op& op = code.emplace_back(here, Op::_Call, rvect, std::move(args_vars), fun_ref); +static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation loc, + std::vector&& args_vars, const FunctionData* fun_ref, const char* debug_desc) { + std::vector rvect = code.create_tmp_var(ret_type, loc, debug_desc); + Op& op = code.emplace_back(loc, Op::_Call, rvect, std::move(args_vars), fun_ref); if (!fun_ref->is_marked_as_pure()) { op.set_impure_flag(); } @@ -175,30 +364,42 @@ static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcL } -static std::vector process_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValGlobs* lval_globs) { +static std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { if (const auto* glob_ref = sym->try_as()) { - std::vector rvect = code.create_tmp_var(glob_ref->declared_type, loc); - if (lval_globs) { - lval_globs->add_modified_glob(glob_ref, rvect); - return rvect; + if (!lval_ctx) { + // `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it + // note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type + std::vector local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)"); + code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); + return local_ir_idx; } else { - code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, glob_ref); - return rvect; + // `globalVar = rhs` / `mutate globalVar` / `globalTuple.0 = rhs` + lval_ctx->register_lval(loc, glob_ref); + if (const std::vector* local_ir_idx = lval_ctx->exists_already_known_global(glob_ref)) { + return *local_ir_idx; // `f(mutate g.0, mutate g.1)`, then g will be read only once + } + std::vector local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)"); + if (lval_ctx->is_rval_inside_lval()) { // for `globalVar.0` "globalVar" is rvalue inside lvalue + // for `globalVar = rhs` don't read a global actually, but for `globalVar.0 = rhs` do + code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); + } + lval_ctx->register_modified_global(glob_ref, local_ir_idx); + return local_ir_idx; } } if (const auto* const_ref = sym->try_as()) { if (const_ref->is_int_const()) { - std::vector rvect = code.create_tmp_var(TypeDataInt::create(), loc); + std::vector rvect = code.create_tmp_var(TypeDataInt::create(), loc, "(glob-const)"); code.emplace_back(loc, Op::_IntConst, rvect, const_ref->as_int_const()); return rvect; } else { - std::vector rvect = code.create_tmp_var(TypeDataSlice::create(), loc); + std::vector rvect = code.create_tmp_var(TypeDataSlice::create(), loc, "(glob-const)"); code.emplace_back(loc, Op::_SliceConst, rvect, const_ref->as_slice_const()); return rvect; } } if (const auto* fun_ref = sym->try_as()) { - std::vector rvect = code.create_tmp_var(fun_ref->inferred_full_type, loc); + std::vector rvect = code.create_tmp_var(fun_ref->inferred_full_type, loc, "(glob-var-fun)"); code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); return rvect; } @@ -206,9 +407,12 @@ static std::vector process_symbol(SrcLocation loc, const Symbol* sym, #ifdef TOLK_DEBUG tolk_assert(static_cast(var_ref->ir_idx.size()) == var_ref->declared_type->calc_width_on_stack()); #endif + if (lval_ctx) { + lval_ctx->register_lval(loc, var_ref); + } return var_ref->ir_idx; } - throw Fatal("process_symbol"); + throw Fatal("pre_compile_symbol"); } static std::vector process_assign(V v, CodeBlob& code) { @@ -234,7 +438,7 @@ static std::vector process_binary_operator(V v, if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(binary-op)"); } if (t == tok_logical_and || t == tok_logical_or) { // do the following transformations: @@ -249,7 +453,7 @@ static std::vector process_binary_operator(V v, v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); std::vector cond = pre_compile_expr(v->get_lhs(), code); tolk_assert(cond.size() == 1); - std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code)); @@ -265,13 +469,13 @@ static std::vector process_binary_operator(V v, static std::vector process_unary_operator(V v, CodeBlob& code) { std::vector args_vars = pre_compile_tensor(code, {v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(unary-op)"); } static std::vector process_ternary_operator(V v, CodeBlob& code) { std::vector cond = pre_compile_expr(v->get_cond(), code); tolk_assert(cond.size() == 1); - std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code)); @@ -282,13 +486,67 @@ static std::vector process_ternary_operator(V v return rvect; } -static std::vector process_dot_access(V v, CodeBlob& code, LValGlobs* lval_globs) { +static std::vector process_dot_access(V v, CodeBlob& code, LValContext* lval_ctx) { // it's NOT a method call `t.tupleSize()` (since such cases are handled by process_function_call) // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) - // currently, nothing except a global function can be a target of dot access - const FunctionData* fun_ref = v->target; + if (!v->is_target_fun_ref()) { + TypePtr obj_type = v->get_obj()->inferred_type; + int index_at = std::get(v->target); + // `tensorVar.0`; since a tensor of N elems are N vars on a stack actually, calculate offset + if (const auto* t_tensor = obj_type->try_as()) { + if (lval_ctx) lval_ctx->register_lval(v->loc, v); + if (lval_ctx) lval_ctx->enter_rval_inside_lval(); + std::vector lhs_vars = pre_compile_expr(v->get_obj(), code, lval_ctx); + if (lval_ctx) lval_ctx->exit_rval_inside_lval(); + int stack_width = t_tensor->items[index_at]->calc_width_on_stack(); + int stack_offset = 0; + for (int i = 0; i < index_at; ++i) { + stack_offset += t_tensor->items[i]->calc_width_on_stack(); + } + return {lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width}; + } + // `tupleVar.0`; not to mess up, separate rvalue and lvalue cases + if (obj_type->try_as() || obj_type->try_as()) { + if (!lval_ctx) { + // `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars + std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code); + std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); + code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); + std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); + tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values + const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); + code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); + return field_ir_idx; + } else { + // `tupleVar.0 = rhs`: finally "tupleSetAt(tupleVar, rhs, 0)" will be done + uint64_t hash = lval_ctx->register_lval(v->loc, v); + if (const var_idx_t* field_ir_idx = lval_ctx->exists_already_known_tuple_index(hash)) { + return {*field_ir_idx}; // `(t.0.0, t.0.1) = rhs`, then "t.0" will be read (tupleAt) once + } + lval_ctx->enter_rval_inside_lval(); + std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code, lval_ctx); + lval_ctx->exit_rval_inside_lval(); + std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); + code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); + std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); + if (lval_ctx->is_rval_inside_lval()) { // for `t.0.1 = rhs` "t.0" is rvalue inside lvalue + // for `t.0 = rhs` don't call tupleAt, but for `t.0.1 = rhs` do for t.0 (still don't for t.0.1) + const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); + code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); + } + lval_ctx->register_modified_tuple_index(hash, tuple_ir_idx[0], index_ir_idx[0], field_ir_idx[0]); + vars_modification_watcher.trigger_callbacks(tuple_ir_idx, v->loc); + return field_ir_idx; + } + } + tolk_assert(false); + } + + // okay, v->target refs a function, like `obj.method`, filled at type inferring + // (currently, nothing except a global function can be referenced, no object-scope methods exist) + const FunctionData* fun_ref = std::get(v->target); tolk_assert(fun_ref); - return process_symbol(v->loc, fun_ref, code, lval_globs); + return pre_compile_symbol(v->loc, fun_ref, code, lval_ctx); } static std::vector process_function_call(V v, CodeBlob& code) { @@ -304,7 +562,7 @@ static std::vector process_function_call(V v, Code std::vector tfunc = pre_compile_expr(v->get_callee(), code); tolk_assert(tfunc.size() == 1); args_vars.push_back(tfunc[0]); - std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(call-ind)"); Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars)); op.set_impure_flag(); return rvect; @@ -349,28 +607,28 @@ static std::vector process_function_call(V v, Code for (const std::vector& list : vars_per_arg) { args_vars.insert(args_vars.end(), list.cbegin(), list.cend()); } - std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref); + std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref, "(fun-call)"); if (fun_ref->has_mutate_params()) { - LValGlobs local_globs; + LValContext local_lval; std::vector left; for (int i = 0; i < delta_self + v->get_num_args(); ++i) { if (fun_ref->parameters[i].is_mutate_parameter()) { AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[i]; tolk_assert(arg_i->is_lvalue || i == 0); if (arg_i->is_lvalue) { - std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); + std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_lval); left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); } else { left.insert(left.end(), vars_per_arg[0].begin(), vars_per_arg[0].end()); } } } - std::vector rvect = code.create_tmp_var(real_ret_type, v->loc); + std::vector rvect = code.create_tmp_var(real_ret_type, v->loc, "(fun-call)"); left.insert(left.end(), rvect.begin(), rvect.end()); - code.on_var_modification(left, v->loc); + vars_modification_watcher.trigger_callbacks(left, v->loc); code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); - local_globs.gen_ops_set_globs(code, v->loc); + local_lval.gen_ops_if_nonempty(code, v->loc); rvect_apply = rvect; } @@ -385,29 +643,29 @@ static std::vector process_function_call(V v, Code return rvect_apply; } -static std::vector process_tensor(V v, CodeBlob& code, LValGlobs* lval_globs) { - return pre_compile_tensor(code, v->get_items(), lval_globs); +static std::vector process_tensor(V v, CodeBlob& code, LValContext* lval_ctx) { + return pre_compile_tensor(code, v->get_items(), lval_ctx); } -static std::vector process_typed_tuple(V v, CodeBlob& code, LValGlobs* lval_globs) { - if (lval_globs) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work +static std::vector process_typed_tuple(V v, CodeBlob& code, LValContext* lval_ctx) { + if (lval_ctx) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work v->error("[...] can not be used as lvalue here"); } - std::vector left = code.create_tmp_var(v->inferred_type, v->loc); - std::vector right = pre_compile_tensor(code, v->get_items()); + std::vector left = code.create_tmp_var(v->inferred_type, v->loc, "(pack-tuple)"); + std::vector right = pre_compile_tensor(code, v->get_items(), lval_ctx); code.emplace_back(v->loc, Op::_Tuple, left, std::move(right)); return left; } static std::vector process_int_const(V v, CodeBlob& code) { - std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(int-const)"); code.emplace_back(v->loc, Op::_IntConst, rvect, v->intval); return rvect; } static std::vector process_string_const(V v, CodeBlob& code) { ConstantValue value = eval_const_init_value(v); - std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc); + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(str-const)"); if (value.is_int()) { code.emplace_back(v->loc, Op::_IntConst, rvect, value.as_int()); } else { @@ -418,21 +676,21 @@ static std::vector process_string_const(V v, CodeBl static std::vector process_bool_const(V v, CodeBlob& code) { const FunctionData* builtin_sym = lookup_global_symbol(v->bool_val ? "__true" : "__false")->as(); - return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); + return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(bool-const)"); } static std::vector process_null_keyword(V v, CodeBlob& code) { const FunctionData* builtin_sym = lookup_global_symbol("__null")->as(); - return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); + return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(null-literal)"); } static std::vector process_local_var(V v, CodeBlob& code) { if (v->marked_as_redef) { - return process_symbol(v->loc, v->var_ref, code, nullptr); + return pre_compile_symbol(v->loc, v->var_ref, code, nullptr); } tolk_assert(v->var_ref->ir_idx.empty()); - v->var_ref->mutate()->assign_ir_idx(code.create_var(v->inferred_type, v->var_ref, v->loc)); + v->var_ref->mutate()->assign_ir_idx(code.create_var(v->inferred_type, v->loc, v->var_ref->name)); return v->var_ref->ir_idx; } @@ -444,13 +702,13 @@ static std::vector process_local_vars_declaration(V process_underscore(V v, CodeBlob& code) { // when _ is used as left side of assignment, like `(cs, _) = cs.loadAndReturn()` - return code.create_tmp_var(v->inferred_type, v->loc); + return code.create_tmp_var(v->inferred_type, v->loc, "(underscore)"); } -std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs) { +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx) { switch (v->type) { case ast_reference: - return process_symbol(v->loc, v->as()->sym, code, lval_globs); + return pre_compile_symbol(v->loc, v->as()->sym, code, lval_ctx); case ast_assign: return process_assign(v->as(), code); case ast_set_assign: @@ -462,17 +720,17 @@ std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* l case ast_ternary_operator: return process_ternary_operator(v->as(), code); case ast_cast_as_operator: - return pre_compile_expr(v->as()->get_expr(), code, lval_globs); + return pre_compile_expr(v->as()->get_expr(), code, lval_ctx); case ast_dot_access: - return process_dot_access(v->as(), code, lval_globs); + return process_dot_access(v->as(), code, lval_ctx); case ast_function_call: return process_function_call(v->as(), code); case ast_parenthesized_expression: - return pre_compile_expr(v->as()->get_expr(), code, lval_globs); + return pre_compile_expr(v->as()->get_expr(), code, lval_ctx); case ast_tensor: - return process_tensor(v->as(), code, lval_globs); + return process_tensor(v->as(), code, lval_ctx); case ast_typed_tuple: - return process_typed_tuple(v->as(), code, lval_globs); + return process_typed_tuple(v->as(), code, lval_ctx); case ast_int_const: return process_int_const(v->as(), code); case ast_string_const: @@ -515,14 +773,14 @@ static void process_assert_statement(V v, CodeBlob& code) const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as(); std::vector args_vars = pre_compile_tensor(code, args); - gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)"); } static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { if (auto v_ref = v_catch_var->try_as(); v_ref && v_ref->sym) { // not underscore const LocalVarData* var_ref = v_ref->sym->as(); tolk_assert(var_ref->ir_idx.empty()); - var_ref->mutate()->assign_ir_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc)); + var_ref->mutate()->assign_ir_idx(code.create_var(v_catch_var->inferred_type, v_catch_var->loc, var_ref->name)); } } @@ -621,11 +879,11 @@ static void process_throw_statement(V v, CodeBlob& code) { if (v->has_thrown_arg()) { const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()}); - gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)"); } else { const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_code()}); - gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)"); } } @@ -699,7 +957,7 @@ static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, Funct for (int i = 0; i < fun_ref->get_num_params(); ++i) { const LocalVarData& param_i = fun_ref->parameters[i]; - std::vector ir_idx = blob->create_var(param_i.declared_type, ¶m_i, param_i.loc); + std::vector ir_idx = blob->create_var(param_i.declared_type, param_i.loc, param_i.name); rvect_import.insert(rvect_import.end(), ir_idx.begin(), ir_idx.end()); param_i.mutate()->assign_ir_idx(std::move(ir_idx)); } @@ -716,6 +974,7 @@ static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, Funct blob->close_blk(v_body->loc_end); code_body->set_code(blob); + tolk_assert(vars_modification_watcher.empty()); } static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyAsm* asm_body) { diff --git a/tolk/pipe-check-rvalue-lvalue.cpp b/tolk/pipe-check-rvalue-lvalue.cpp index 943dfb96..a824cc5d 100644 --- a/tolk/pipe-check-rvalue-lvalue.cpp +++ b/tolk/pipe-check-rvalue-lvalue.cpp @@ -123,8 +123,8 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { // a reference to a method used as rvalue, like `var v = t.tupleAt` - if (const FunctionData* fun_ref = v->target; v->is_rvalue) { - validate_function_used_as_noncall(v, fun_ref); + if (v->is_rvalue && v->is_target_fun_ref()) { + validate_function_used_as_noncall(v, std::get(v->target)); } } diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index 011c83d7..ba5f77a7 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -124,6 +124,19 @@ static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view o throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type)); } +// fire an error on `untypedTupleVar.0` when used without a hint +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_deduce_untyped_tuple_access(SrcLocation loc, int index) { + std::string idx_access = "." + std::to_string(index); + throw ParseError(loc, "can not deduce type of `" + idx_access + "`; either assign it to variable like `var c: int = " + idx_access + "` or cast the result like `" + idx_access + " as int`"); +} + +// fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_put_non1_stack_width_arg_to_tuple(SrcLocation loc, TypePtr inferred_type) { + throw ParseError(loc, "can not put " + to_string(inferred_type) + " into a tuple, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1"); +} + // check correctness of called arguments counts and their type matching static void check_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { int delta_self = lhs_of_dot_call ? 1 : 0; @@ -466,6 +479,22 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { return TypeDataTypedTuple::create(std::move(sub_hints)); } + // `a.0 = rhs` / `b.1.0 = rhs` (remember, its target is not assigned yet) + if (auto lhs_dot = lhs->try_as()) { + TypePtr obj_hint = calc_hint_from_assignment_lhs(lhs_dot->get_obj()); + std::string_view field_name = lhs_dot->get_field_name(); + if (field_name[0] >= '0' && field_name[0] <= '9') { + int index_at = std::stoi(std::string(field_name)); + if (const auto* t_tensor = obj_hint->try_as(); t_tensor && index_at < t_tensor->size()) { + return t_tensor->items[index_at]; + } + if (const auto* t_tuple = obj_hint->try_as(); t_tuple && index_at < t_tuple->size()) { + return t_tuple->items[index_at]; + } + } + return TypeDataUnknown::create(); + } + return TypeDataUnknown::create(); } @@ -562,8 +591,8 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { return; } - // here is something strange and unhandled, like `f() = rhs` - // it will fail on later compilation steps (like rvalue/lvalue checks), but type inferring should pass + // here is something unhandled like `a.0 = rhs`, run regular inferring on rhs + // for something strange like `f() = rhs` type inferring will pass, but will fail later infer_any_expr(lhs, rhs_type); if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); @@ -839,25 +868,56 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // it's NOT a method call `t.tupleSize()` (since such cases are handled by infer_function_call) // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) infer_any_expr(v->get_obj()); + TypePtr obj_type = v->get_obj()->inferred_type; // our goal is to fill v->target knowing type of obj V v_ident = v->get_identifier(); // field/method name vertex V v_instantiationTs = v->get_instantiationTs(); std::string_view field_name = v_ident->name; - // for now, Tolk doesn't have structures, properties, and object-scoped methods - // so, only `t.tupleSize` is allowed, look up a global function - const Symbol* sym = lookup_global_symbol(field_name); - if (!sym) { - v_ident->error("undefined symbol `" + static_cast(field_name) + "`"); + // it can be indexed access (`tensorVar.0`, `tupleVar.1`) or a method (`t.tupleSize`) + // at first, check for indexed access + if (field_name[0] >= '0' && field_name[0] <= '9') { + int index_at = std::stoi(std::string(field_name)); + if (const auto* t_tensor = obj_type->try_as()) { + if (index_at >= t_tensor->size()) { + v_ident->error("invalid tensor index, expected 0.." + std::to_string(t_tensor->items.size() - 1)); + } + v->mutate()->assign_target(index_at); + assign_inferred_type(v, t_tensor->items[index_at]); + return; + } + if (const auto* t_tuple = obj_type->try_as()) { + if (index_at >= t_tuple->size()) { + v_ident->error("invalid tuple index, expected 0.." + std::to_string(t_tuple->items.size() - 1)); + } + v->mutate()->assign_target(index_at); + assign_inferred_type(v, t_tuple->items[index_at]); + return; + } + if (obj_type->try_as()) { + if (hint == nullptr) { + fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at); + } + if (hint->calc_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->loc, hint); + } + v->mutate()->assign_target(index_at); + assign_inferred_type(v, hint); + return; + } + v_ident->error("type " + to_string(obj_type) + " is not indexable"); } - const FunctionData* fun_ref = sym->try_as(); + + // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` + const Symbol* sym = lookup_global_symbol(field_name); + const FunctionData* fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref) { - v_ident->error("referencing a non-function"); + v_ident->error("non-existing field `" + static_cast(field_name) + "` of type " + to_string(obj_type)); } // `t.tupleSize` is ok, `cs.tupleSize` not - if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(v->get_obj()->inferred_type)) { - v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0]) + " with an object of type " + to_string(v->get_obj())); + if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(obj_type)) { + v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0]) + " with object of type " + to_string(obj_type)); } if (fun_ref->is_generic_function() && !v_instantiationTs) { @@ -896,21 +956,24 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } else if (auto v_dot = callee->try_as()) { // `obj.someMethod()` / `obj.someMethod()` / `getF().someMethod()` / `obj.SOME_CONST()` + // note, that dot_obj->target is not filled yet, since callee was not inferred yet delta_self = 1; dot_obj = v_dot->get_obj(); v_instantiationTs = v_dot->get_instantiationTs(); // present for `obj.someMethod()` infer_any_expr(dot_obj); - // for now, Tolk doesn't have object-scoped methods, so method resolving doesn't depend on obj type - // (in other words, `globalFunction(a)` = `a.globalFunction()`) - std::string_view method_name = v_dot->get_field_name(); - const Symbol* sym = lookup_global_symbol(method_name); - if (!sym) { - v_dot->get_identifier()->error("undefined symbol `" + static_cast(method_name) + "`"); - } - fun_ref = sym->try_as(); - if (!fun_ref) { - v_dot->get_identifier()->error("calling a non-function"); + // it can be indexed access (`tensorVar.0()`, `tupleVar.1()`) or a method (`t.tupleSize()`) + std::string_view field_name = v_dot->get_field_name(); + if (field_name[0] >= '0' && field_name[0] <= '9') { + // indexed access `ab.2()`, then treat `ab.2` just like an expression, fun_ref remains nullptr + // infer_dot_access() will be called for a callee, it will check type, index correctness, etc. + } else { + // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` + const Symbol* sym = lookup_global_symbol(field_name); + fun_ref = sym ? sym->try_as() : nullptr; + if (!fun_ref) { + v_dot->get_identifier()->error("non-existing method `" + static_cast(field_name) + "` of type " + to_string(dot_obj)); + } } } else { @@ -926,7 +989,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { assign_inferred_type(arg_i, arg_i->get_expr()); } - // handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` + // handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` / `tensorVar.0()` if (!fun_ref) { // treat callee like a usual expression, which must have "callable" inferred type infer_any_expr(callee); @@ -1017,6 +1080,9 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { for (int i = 0; i < v->size(); ++i) { AnyExprV item = v->get_item(i); infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); + if (item->inferred_type->calc_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->get_item(i)->loc, item->inferred_type); + } types_list.emplace_back(item->inferred_type); } assign_inferred_type(v, TypeDataTypedTuple::create(std::move(types_list))); diff --git a/tolk/tolk.h b/tolk/tolk.h index 7b44931e..4086d7f7 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -44,21 +44,23 @@ typedef int var_idx_t; typedef int const_idx_t; struct TmpVar { - TypePtr v_type; - var_idx_t ir_idx; - const LocalVarData* v_sym; // points to var defined in code; nullptr for implicitly created tmp vars - SrcLocation where; - std::vector> on_modification; + var_idx_t ir_idx; // every var in IR represents 1 stack slot + TypePtr v_type; // calc_width_on_stack() is 1 + std::string name; // "x" for vars originated from user sources; "x.0" for tensor components; empty for implicitly created tmp vars + SrcLocation loc; // location of var declaration in sources or where a tmp var was originated +#ifdef TOLK_DEBUG + const char* desc = nullptr; // "origin" of tmp var, for debug output like `'15 (binary-op) '16 (glob-var)` +#endif - TmpVar(var_idx_t ir_idx, TypePtr type, const LocalVarData* v_sym, SrcLocation loc) - : v_type(type) - , ir_idx(ir_idx) - , v_sym(v_sym) - , where(loc) { + TmpVar(var_idx_t ir_idx, TypePtr v_type, std::string name, SrcLocation loc) + : ir_idx(ir_idx) + , v_type(v_type) + , name(std::move(name)) + , loc(loc) { } - void show(std::ostream& os, int omit_idx = 0) const; - void dump(std::ostream& os) const; + void show_as_stack_comment(std::ostream& os) const; + void show(std::ostream& os) const; }; struct VarDescr { @@ -602,7 +604,6 @@ struct AsmOpList { } const_idx_t register_const(Const new_const); Const get_const(const_idx_t idx); - void show_var(std::ostream& os, var_idx_t idx) const; void show_var_ext(std::ostream& os, std::pair idx_pair) const; void adjust_last() { if (list_.back().is_nop()) { @@ -1018,13 +1019,10 @@ struct Stack { void rearrange_top(var_idx_t top, bool last); void merge_const(const Stack& req_stack); void merge_state(const Stack& req_stack); - void show(int _mode); - void show() { - show(mode); - } + void show(); void opt_show() { if ((mode & (_StkCmt | _Shown)) == _StkCmt) { - show(mode); + show(); } } bool operator==(const Stack& y) const & { @@ -1108,9 +1106,15 @@ struct CodeBlob { #endif return res; } - std::vector create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc); - std::vector create_tmp_var(TypePtr var_type, SrcLocation loc) { - return create_var(var_type, nullptr, loc); + std::vector create_var(TypePtr var_type, SrcLocation loc, std::string name); + std::vector create_tmp_var(TypePtr var_type, SrcLocation loc, const char* desc) { + std::vector ir_idx = create_var(var_type, loc, {}); +#ifdef TOLK_DEBUG + for (var_idx_t v : ir_idx) { + vars[v].desc = desc; + } +#endif + return ir_idx; } bool compute_used_code_vars(); bool compute_used_code_vars(std::unique_ptr& ops, const VarDescrList& var_info, bool edit) const; @@ -1135,14 +1139,6 @@ struct CodeBlob { void mark_noreturn(); void generate_code(AsmOpList& out_list, int mode = 0); void generate_code(std::ostream& os, int mode = 0, int indent = 0); - - void on_var_modification(const std::vector& left_lval_indices, SrcLocation here) const { - for (var_idx_t ir_idx : left_lval_indices) { - for (auto& f : vars.at(ir_idx).on_modification) { - f(here); - } - } - } }; // defined in builtins.cpp From 5b44e01455eed921150e38898ad79d2407bb4e76 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 27 Jan 2025 10:33:24 +0300 Subject: [PATCH 39/61] [Tolk] Allow `cell` and `slice` be valid identifiers They are not keywords anymore. > var cell = ...; > var cell: cell = ...; Motivation: in the future, when structures are implemented, this obviously should be valid: > struct a { ... } > var a = ...; Struct fields will also be allowed to have names int/slice/cell. --- tolk-tester/tests/assignment-tests.tolk | 12 +++++ tolk-tester/tests/invalid-catch-1.tolk | 6 +-- tolk-tester/tests/invalid-declaration-2.tolk | 2 +- tolk-tester/tests/invalid-declaration-4.tolk | 2 +- tolk-tester/tests/method_id.tolk | 2 + tolk/ast-from-tokens.cpp | 18 +------ tolk/lexer.cpp | 10 ---- tolk/lexer.h | 8 --- tolk/type-system.cpp | 56 ++++++++++---------- 9 files changed, 47 insertions(+), 69 deletions(-) diff --git a/tolk-tester/tests/assignment-tests.tolk b/tolk-tester/tests/assignment-tests.tolk index 89de8cf4..40761939 100644 --- a/tolk-tester/tests/assignment-tests.tolk +++ b/tolk-tester/tests/assignment-tests.tolk @@ -14,6 +14,18 @@ fun autoInferIntNull(x: int) { return x; } +fun typesAsIdentifiers(builder: builder) { + var int = 1; + var cell = builder.endCell(); + var slice = cell.beginParse(); + { + var cell: cell = cell; + var tuple: tuple = createEmptyTuple(); + var bool: bool = tuple.tupleAt(0) > 0; + } + return int; +} + fun main(value: int) { var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); if (x == null && y == null) { return null; } diff --git a/tolk-tester/tests/invalid-catch-1.tolk b/tolk-tester/tests/invalid-catch-1.tolk index 756722bb..54f6e182 100644 --- a/tolk-tester/tests/invalid-catch-1.tolk +++ b/tolk-tester/tests/invalid-catch-1.tolk @@ -1,12 +1,12 @@ fun main() { try { - } catch(int, arg) {} + } catch(if, arg) {} return 0; } /** @compilation_should_fail -@stderr expected identifier, got `int` -@stderr catch(int +@stderr expected identifier, got `if` +@stderr catch(if */ diff --git a/tolk-tester/tests/invalid-declaration-2.tolk b/tolk-tester/tests/invalid-declaration-2.tolk index 70063251..07ffb683 100644 --- a/tolk-tester/tests/invalid-declaration-2.tolk +++ b/tolk-tester/tests/invalid-declaration-2.tolk @@ -4,5 +4,5 @@ fun main(int): int { /** @compilation_should_fail -@stderr expected parameter name, got `int` +@stderr expected `: `, got `)` */ diff --git a/tolk-tester/tests/invalid-declaration-4.tolk b/tolk-tester/tests/invalid-declaration-4.tolk index 183dda96..62fd6c56 100644 --- a/tolk-tester/tests/invalid-declaration-4.tolk +++ b/tolk-tester/tests/invalid-declaration-4.tolk @@ -4,5 +4,5 @@ fun main() { /** @compilation_should_fail -@stderr probably, you use FunC-like declarations; valid syntax is `var x: int = ...` +@stderr expected `;`, got `x` */ diff --git a/tolk-tester/tests/method_id.tolk b/tolk-tester/tests/method_id.tolk index c2d0b9aa..e7e70d24 100644 --- a/tolk-tester/tests/method_id.tolk +++ b/tolk-tester/tests/method_id.tolk @@ -4,6 +4,8 @@ fun foo1(): int { return 111; } fun foo2(): int { return 222; } @method_id(10) fun foo3(): int { return 333; } +@method_id(11) +fun slice(slice: slice): slice { return slice; } fun main(): int { return 999; } /** diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 3eb4385a..f5855bc1 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -111,16 +111,6 @@ static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bits } } -// fire an error for FunC-style variable declaration, like "int i" -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_FunC_style_var_declaration(Lexer& lex) { - SrcLocation loc = lex.cur_location(); - std::string type_str = static_cast(lex.cur_str()); // int / slice / etc. - lex.next(); - std::string var_name = lex.tok() == tok_identifier ? static_cast(lex.cur_str()) : "name"; - throw ParseError(loc, "can't parse; probably, you use FunC-like declarations; valid syntax is `var " + var_name + ": " + type_str + " = ...`"); -} - // replace (a == null) and similar to isNull(a) (call of a built-in function) static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) { bool has_null = v->get_lhs()->type == ast_null_keyword || v->get_rhs()->type == ast_null_keyword; @@ -377,14 +367,8 @@ static AnyExprV parse_expr100(Lexer& lex) { } return createV(loc, v_ident, v_instantiationTs); } - default: { - // show a proper error for `int i` (FunC-style declarations) - TokenType t = lex.tok(); - if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_tuple) { - fire_error_FunC_style_var_declaration(lex); - } + default: lex.unexpected(""); - } } } diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 78ec991e..06913a5f 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -331,7 +331,6 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (str == "as") return tok_as; break; case 3: - if (str == "int") return tok_int; if (str == "var") return tok_var; if (str == "fun") return tok_fun; if (str == "asm") return tok_asm; @@ -342,18 +341,13 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { case 4: if (str == "else") return tok_else; if (str == "true") return tok_true; - if (str == "cell") return tok_cell; if (str == "null") return tok_null; - if (str == "void") return tok_void; - if (str == "bool") return tok_bool; if (str == "self") return tok_self; if (str == "tolk") return tok_tolk; if (str == "type") return tok_type; if (str == "enum") return tok_enum; break; case 5: - if (str == "slice") return tok_slice; - if (str == "tuple") return tok_tuple; if (str == "const") return tok_const; if (str == "false") return tok_false; if (str == "redef") return tok_redef; @@ -374,16 +368,12 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (str == "export") return tok_export; break; case 7: - if (str == "builder") return tok_builder; if (str == "builtin") return tok_builtin; break; case 8: if (str == "continue") return tok_continue; if (str == "operator") return tok_operator; break; - case 12: - if (str == "continuation") return tok_continuation; - break; default: break; } diff --git a/tolk/lexer.h b/tolk/lexer.h index 9dbfe3b6..58bc3640 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -118,14 +118,6 @@ enum TokenType { tok_if, tok_else, - tok_int, - tok_cell, - tok_bool, - tok_slice, - tok_builder, - tok_continuation, - tok_tuple, - tok_void, tok_arrow, tok_as, diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp index 401c72af..c7122e10 100644 --- a/tolk/type-system.cpp +++ b/tolk/type-system.cpp @@ -581,40 +581,38 @@ std::vector parse_nested_type_list_in_parenthesis(Lexer& lex) { static TypePtr parse_simple_type(Lexer& lex) { switch (lex.tok()) { - case tok_int: - lex.next(); - return TypeDataInt::create(); - case tok_bool: - lex.next(); - return TypeDataBool::create(); - case tok_cell: - lex.next(); - return TypeDataCell::create(); - case tok_builder: - lex.next(); - return TypeDataBuilder::create(); - case tok_slice: - lex.next(); - return TypeDataSlice::create(); - case tok_tuple: - lex.next(); - return TypeDataTuple::create(); - case tok_continuation: - lex.next(); - return TypeDataContinuation::create(); - case tok_null: - lex.next(); - return TypeDataNullLiteral::create(); - case tok_void: - lex.next(); - return TypeDataVoid::create(); case tok_self: case tok_identifier: { SrcLocation loc = lex.cur_location(); - std::string text = static_cast(lex.cur_str()); + std::string_view str = lex.cur_str(); lex.next(); - return TypeDataUnresolved::create(std::move(text), loc); + switch (str.size()) { + case 3: + if (str == "int") return TypeDataInt::create(); + break; + case 4: + if (str == "cell") return TypeDataCell::create(); + if (str == "void") return TypeDataVoid::create(); + if (str == "bool") return TypeDataBool::create(); + break; + case 5: + if (str == "slice") return TypeDataSlice::create(); + if (str == "tuple") return TypeDataTuple::create(); + break; + case 7: + if (str == "builder") return TypeDataBuilder::create(); + break; + case 12: + if (str == "continuation") return TypeDataContinuation::create(); + break; + default: + break; + } + return TypeDataUnresolved::create(std::string(str), loc); } + case tok_null: + lex.next(); + return TypeDataNullLiteral::create(); case tok_oppar: { std::vector items = parse_nested_type_list_in_parenthesis(lex); if (items.size() == 1) { From e9d8f1611b3370257ed50de80627f88cdb4c8ff9 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 27 Jan 2025 10:34:23 +0300 Subject: [PATCH 40/61] [Tolk] Bump version to v0.8 --- crypto/smartcont/tolk-stdlib/common.tolk | 2 +- crypto/smartcont/tolk-stdlib/gas-payments.tolk | 2 +- crypto/smartcont/tolk-stdlib/lisp-lists.tolk | 2 +- crypto/smartcont/tolk-stdlib/tvm-dicts.tolk | 2 +- crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk | 2 +- tolk/tolk-version.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index 4c0c4007..5311ec2f 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -1,7 +1,7 @@ // Standard library for Tolk (LGPL licence). // It contains common functions that are available out of the box, the user doesn't have to import anything. // More specific functions are required to be imported explicitly, like "@stdlib/tvm-dicts". -tolk 0.7 +tolk 0.8 /** Tuple manipulation primitives. diff --git a/crypto/smartcont/tolk-stdlib/gas-payments.tolk b/crypto/smartcont/tolk-stdlib/gas-payments.tolk index 83893354..2ac32f48 100644 --- a/crypto/smartcont/tolk-stdlib/gas-payments.tolk +++ b/crypto/smartcont/tolk-stdlib/gas-payments.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.7 +tolk 0.8 /** Gas and payment related primitives. diff --git a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk index 429f0cbf..0cb17841 100644 --- a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk +++ b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.7 +tolk 0.8 /** Lisp-style lists are nested 2-elements tuples: `(1, (2, (3, null)))` represents list `[1, 2, 3]`. diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk index a47fe542..5c436239 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.7 +tolk 0.8 /** Dictionaries are represented as `cell` data type (cells can store anything, dicts in particular). diff --git a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk index ef7c2afe..72a54aac 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.7 +tolk 0.8 /// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. /// The primitive returns the current value of `c3`. diff --git a/tolk/tolk-version.h b/tolk/tolk-version.h index 7eaf55a7..84326012 100644 --- a/tolk/tolk-version.h +++ b/tolk/tolk-version.h @@ -18,6 +18,6 @@ namespace tolk { -constexpr const char* TOLK_VERSION = "0.7.0"; +constexpr const char* TOLK_VERSION = "0.8.0"; } // namespace tolk From b1c9466df40d6bbfbb4d977fabdf9628aec926b3 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 27 Jan 2025 17:09:21 +0300 Subject: [PATCH 41/61] Suppress clang warning "ATOMIC_FLAG_INIT marked deprecated" (#1502) In C++20, macro 'ATOMIC_FLAG_INIT' has been marked as deprecated. We need still to use it to be able to compile for C++17. For now, just suppress this warning. --- tdutils/td/utils/SpinLock.h | 3 +++ tdutils/td/utils/logging.cpp | 3 +++ tdutils/td/utils/logging.h | 3 +++ tdutils/td/utils/port/detail/PollableFd.h | 3 +++ 4 files changed, 12 insertions(+) diff --git a/tdutils/td/utils/SpinLock.h b/tdutils/td/utils/SpinLock.h index b5bb62db..f0856f0c 100644 --- a/tdutils/td/utils/SpinLock.h +++ b/tdutils/td/utils/SpinLock.h @@ -63,7 +63,10 @@ class SpinLock { } private: +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-pragma" std::atomic_flag flag_ = ATOMIC_FLAG_INIT; +#pragma clang diagnostic pop void unlock() { flag_.clear(std::memory_order_release); } diff --git a/tdutils/td/utils/logging.cpp b/tdutils/td/utils/logging.cpp index 345615f1..03d32ee2 100644 --- a/tdutils/td/utils/logging.cpp +++ b/tdutils/td/utils/logging.cpp @@ -176,7 +176,10 @@ void TsCerr::enterCritical() { void TsCerr::exitCritical() { lock_.clear(std::memory_order_release); } +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-pragma" TsCerr::Lock TsCerr::lock_ = ATOMIC_FLAG_INIT; +#pragma clang diagnostic pop class DefaultLog : public LogInterface { public: diff --git a/tdutils/td/utils/logging.h b/tdutils/td/utils/logging.h index 5c9a0621..dbf4c64b 100644 --- a/tdutils/td/utils/logging.h +++ b/tdutils/td/utils/logging.h @@ -343,7 +343,10 @@ class TsLog : public LogInterface { private: LogInterface *log_ = nullptr; +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-pragma" std::atomic_flag lock_ = ATOMIC_FLAG_INIT; +#pragma clang diagnostic pop void enter_critical() { while (lock_.test_and_set(std::memory_order_acquire)) { // spin diff --git a/tdutils/td/utils/port/detail/PollableFd.h b/tdutils/td/utils/port/detail/PollableFd.h index dceea4f3..6cb621e8 100644 --- a/tdutils/td/utils/port/detail/PollableFd.h +++ b/tdutils/td/utils/port/detail/PollableFd.h @@ -149,7 +149,10 @@ class PollableFdInfo : private ListNode { private: NativeFd fd_{}; +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-pragma" std::atomic_flag lock_ = ATOMIC_FLAG_INIT; +#pragma clang diagnostic pop PollFlagsSet flags_; #if TD_PORT_WINDOWS SpinLock observer_lock_; From c7271d97ae1af53def8f9487dba44bb613662762 Mon Sep 17 00:00:00 2001 From: neodix42 Date: Mon, 3 Feb 2025 12:16:11 +0400 Subject: [PATCH 42/61] Add smartcont+lib folders to release (#1508) * add folders smartcont and lib only to release for having a small download link * allow usage of patter in file name * upgrade upload-release-action@v2 to v3 * Revert "upgrade upload-release-action@v2 to v3" This reverts commit 516126084a8bda7524c557197c357f0e95b05a55. * use gh cli for upload smartcont_lib * use gh cli for upload smartcont_lib * gh requires gh_token * clean up --- .github/workflows/create-release.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 652aaef6..05a3db26 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -4,6 +4,9 @@ on: [workflow_dispatch] permissions: write-all +env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + jobs: create-release: runs-on: ubuntu-22.04 @@ -498,6 +501,14 @@ jobs: asset_name: ton-linux-x86_64.zip tag: ${{ steps.tag.outputs.TAG }} + - name: Upload generic smartcont+lib artifact + run: | + mkdir smartcont_lib + cd smartcont_lib + cp -r ../artifacts/ton-x86_64-linux/{smartcont,lib} . + zip -r smartcont_lib.zip . + gh release upload ${{ steps.tag.outputs.TAG }} smartcont_lib.zip + - name: Upload Linux x86-64 single artifact - fift uses: svenstaro/upload-release-action@v2 with: From 3c245c614600df950c5ba1f44be146b16199a45e Mon Sep 17 00:00:00 2001 From: EmelyanenkoK Date: Mon, 3 Feb 2025 11:16:44 +0300 Subject: [PATCH 43/61] Add forgotten highload-v2 to unlock (#1511) --- crypto/block/transaction.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/block/transaction.cpp b/crypto/block/transaction.cpp index 63e9065b..ba50c581 100644 --- a/crypto/block/transaction.cpp +++ b/crypto/block/transaction.cpp @@ -1194,6 +1194,8 @@ static td::optional override_gas_limit(const ComputePhaseConfig& cfg .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; accounts[parse_addr("EQCkoRp4OE-SFUoMEnYfL3vF43T3AzNfW8jyTC4yzk8cJqMS")] = { .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; + accounts[parse_addr("UQBN5ICras79U8FYEm71ws34n-ZNIQ0LRNpckOUsIV3OebnC")] = { + .new_limit = 70'000'000, .from_version = 9, .until = 1740787200}; accounts[parse_addr("EQBDanbCeUqI4_v-xrnAN0_I2wRvEIaLg1Qg2ZN5c6Zl1KOh")] = { .new_limit = 225'000'000, .from_version = 9, .until = 1740787200}; return accounts; From aef538114a948115e82e89ef0c94aa03f179042b Mon Sep 17 00:00:00 2001 From: Marat S Date: Thu, 6 Feb 2025 01:31:22 +0000 Subject: [PATCH 44/61] Fix get_prev_blocks_info() at LS getConfigParams --- validator/impl/liteserver.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/validator/impl/liteserver.cpp b/validator/impl/liteserver.cpp index 723dbfe9..50938cd5 100644 --- a/validator/impl/liteserver.cpp +++ b/validator/impl/liteserver.cpp @@ -1905,6 +1905,9 @@ void LiteQuery::continue_getConfigParams(int mode, std::vector param_list) } cfg = res.move_as_ok(); } else { + if (mode & block::ConfigInfo::needPrevBlocks) { + mode |= block::ConfigInfo::needCapabilities; + } auto res = block::ConfigInfo::extract_config(mpb.root(), mode); if (res.is_error()) { fatal_error(res.move_as_error()); From ce6c29941ea1e29a10c266ef615fb3302213642d Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 13 Feb 2025 14:25:04 +0300 Subject: [PATCH 45/61] Get rid of std::cerr logs in collator/validator --- crypto/block/block.cpp | 7 +- crypto/block/mc-config.cpp | 14 +- crypto/block/output-queue-merger.cpp | 1 - crypto/block/transaction.cpp | 95 ++++++---- crypto/tl/tlblib.cpp | 7 + crypto/tl/tlblib.hpp | 7 + crypto/vm/cells/CellSlice.cpp | 7 + crypto/vm/cells/CellSlice.h | 1 + tdutils/td/utils/logging.h | 4 +- validator/impl/accept-block.cpp | 20 +- validator/impl/collator.cpp | 265 ++++++++++++++++----------- validator/impl/signature-set.cpp | 3 - validator/impl/top-shard-descr.cpp | 8 +- validator/impl/validate-query.cpp | 71 ++++--- 14 files changed, 310 insertions(+), 200 deletions(-) diff --git a/crypto/block/block.cpp b/crypto/block/block.cpp index 302a2aa4..452d78a2 100644 --- a/crypto/block/block.cpp +++ b/crypto/block/block.cpp @@ -360,7 +360,6 @@ MsgProcessedUptoCollection::MsgProcessedUptoCollection(ton::ShardIdFull _owner, z.shard = key.get_uint(64); z.mc_seqno = (unsigned)((key + 64).get_uint(32)); z.last_inmsg_lt = value.write().fetch_ulong(64); - // std::cerr << "ProcessedUpto shard " << std::hex << z.shard << std::dec << std::endl; return value.write().fetch_bits_to(z.last_inmsg_hash) && z.shard && ton::shard_contains(owner.shard, z.shard); }); } @@ -862,8 +861,10 @@ td::Status ShardState::unpack_out_msg_queue_info(Ref out_msg_queue_inf out_msg_queue_ = std::make_unique(std::move(qinfo.out_queue), 352, block::tlb::aug_OutMsgQueue); if (verbosity >= 3 * 1) { - LOG(DEBUG) << "unpacking ProcessedUpto of our previous block " << id_.to_str(); - block::gen::t_ProcessedInfo.print(std::cerr, qinfo.proc_info); + FLOG(DEBUG) { + sb << "unpacking ProcessedUpto of our previous block " << id_.to_str(); + block::gen::t_ProcessedInfo.print(sb, qinfo.proc_info); + }; } if (!block::gen::t_ProcessedInfo.validate_csr(1024, qinfo.proc_info)) { return td::Status::Error( diff --git a/crypto/block/mc-config.cpp b/crypto/block/mc-config.cpp index 14881913..48a2d613 100644 --- a/crypto/block/mc-config.cpp +++ b/crypto/block/mc-config.cpp @@ -163,8 +163,11 @@ td::Status ConfigInfo::unpack() { } gen::McStateExtra::Record extra_info; if (!tlb::unpack_cell(state_extra_root_, extra_info)) { - vm::load_cell_slice(state_extra_root_).print_rec(std::cerr); - block::gen::t_McStateExtra.print_ref(std::cerr, state_extra_root_); + FLOG(WARNING) { + sb << "state extra information is invalid: "; + vm::load_cell_slice(state_extra_root_).print_rec(sb); + block::gen::t_McStateExtra.print_ref(sb, state_extra_root_); + }; return td::Status::Error("state extra information is invalid"); } gen::ValidatorInfo::Record validator_info; @@ -1067,7 +1070,6 @@ Ref ShardConfig::get_shard_hash(ton::ShardIdFull id, bool exact) co ton::ShardIdFull true_id; vm::CellSlice cs; if (get_shard_hash_raw(cs, id, true_id, exact)) { - // block::gen::t_ShardDescr.print(std::cerr, vm::CellSlice{cs}); return McShardHash::unpack(cs, true_id); } else { return {}; @@ -1637,8 +1639,10 @@ bool ShardConfig::set_shard_info(ton::ShardIdFull shard, Ref value) { if (!gen::t_BinTree_ShardDescr.validate_ref(1024, value)) { LOG(ERROR) << "attempting to store an invalid (BinTree ShardDescr) at shard configuration position " << shard.to_str(); - gen::t_BinTree_ShardDescr.print_ref(std::cerr, value); - vm::load_cell_slice(value).print_rec(std::cerr); + FLOG(WARNING) { + gen::t_BinTree_ShardDescr.print_ref(sb, value); + vm::load_cell_slice(value).print_rec(sb); + }; return false; } auto root = shard_hashes_dict_->lookup_ref(td::BitArray<32>{shard.workchain}); diff --git a/crypto/block/output-queue-merger.cpp b/crypto/block/output-queue-merger.cpp index aa425f6b..7d258cfe 100644 --- a/crypto/block/output-queue-merger.cpp +++ b/crypto/block/output-queue-merger.cpp @@ -138,7 +138,6 @@ bool OutputQueueMerger::add_root(int src, Ref outmsg_root) { if (outmsg_root.is_null()) { return true; } - //block::gen::HashmapAug{352, block::gen::t_EnqueuedMsg, block::gen::t_uint64}.print_ref(std::cerr, outmsg_root); auto kv = std::make_unique(src, std::move(outmsg_root)); if (kv->replace_by_prefix(common_pfx.cbits(), common_pfx_len)) { heap.push_back(std::move(kv)); diff --git a/crypto/block/transaction.cpp b/crypto/block/transaction.cpp index ba50c581..46c696f5 100644 --- a/crypto/block/transaction.cpp +++ b/crypto/block/transaction.cpp @@ -446,8 +446,10 @@ bool Account::unpack(Ref shard_account, ton::UnixTime now, bool s return false; } if (verbosity > 2) { - shard_account->print_rec(std::cerr, 2); - block::gen::t_ShardAccount.print(std::cerr, *shard_account); + FLOG(INFO) { + shard_account->print_rec(sb, 2); + block::gen::t_ShardAccount.print(sb, shard_account); + }; } block::gen::ShardAccount::Record acc_info; if (!(block::tlb::t_ShardAccount.validate_csr(shard_account) && tlb::unpack_exact(shard_account.write(), acc_info))) { @@ -737,9 +739,11 @@ bool Transaction::unpack_input_msg(bool ihr_delivered, const ActionPhaseConfig* return false; } if (verbosity > 2) { - fprintf(stderr, "unpacking inbound message for a new transaction: "); - block::gen::t_Message_Any.print_ref(std::cerr, in_msg); - load_cell_slice(in_msg).print_rec(std::cerr); + FLOG(INFO) { + sb << "unpacking inbound message for a new transaction: "; + block::gen::t_Message_Any.print_ref(sb, in_msg); + load_cell_slice(in_msg).print_rec(sb); + }; } auto cs = vm::load_cell_slice(in_msg); int tag = block::gen::t_CommonMsgInfo.get_tag(cs); @@ -1550,11 +1554,13 @@ bool Transaction::run_precompiled_contract(const ComputePhaseConfig& cfg, precom cp.actions = impl.get_c5(); int out_act_num = output_actions_count(cp.actions); if (verbosity > 2) { - std::cerr << "new smart contract data: "; - bool can_be_special = true; - load_cell_slice_special(cp.new_data, can_be_special).print_rec(std::cerr); - std::cerr << "output actions: "; - block::gen::OutList{out_act_num}.print_ref(std::cerr, cp.actions); + FLOG(INFO) { + sb << "new smart contract data: "; + bool can_be_special = true; + load_cell_slice_special(cp.new_data, can_be_special).print_rec(sb); + sb << "output actions: "; + block::gen::OutList{out_act_num}.print_ref(sb, cp.actions); + }; } } cp.mode = 0; @@ -1619,7 +1625,6 @@ bool Transaction::prepare_compute_phase(const ComputePhaseConfig& cfg) { if (in_msg_state.not_null()) { LOG(DEBUG) << "HASH(in_msg_state) = " << in_msg_state->get_hash().bits().to_hex(256) << ", account_state_hash = " << account.state_hash.to_hex(); - // vm::load_cell_slice(in_msg_state).print_rec(std::cerr); } else { LOG(DEBUG) << "in_msg_state is null"; } @@ -1775,11 +1780,13 @@ bool Transaction::prepare_compute_phase(const ComputePhaseConfig& cfg) { cp.actions = vm.get_committed_state().c5; // c5 -> action list int out_act_num = output_actions_count(cp.actions); if (verbosity > 2) { - std::cerr << "new smart contract data: "; - bool can_be_special = true; - load_cell_slice_special(cp.new_data, can_be_special).print_rec(std::cerr); - std::cerr << "output actions: "; - block::gen::OutList{out_act_num}.print_ref(std::cerr, cp.actions); + FLOG(INFO) { + sb << "new smart contract data: "; + bool can_be_special = true; + load_cell_slice_special(cp.new_data, can_be_special).print_rec(sb); + sb << "output actions: "; + block::gen::OutList{out_act_num}.print_ref(sb, cp.actions); + }; } } cp.mode = 0; @@ -2725,14 +2732,18 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, } if (!block::gen::t_Message_Any.validate_ref(new_msg)) { LOG(ERROR) << "generated outbound message is not a valid (Message Any) according to automated check"; - block::gen::t_Message_Any.print_ref(std::cerr, new_msg); - vm::load_cell_slice(new_msg).print_rec(std::cerr); + FLOG(INFO) { + block::gen::t_Message_Any.print_ref(sb, new_msg); + vm::load_cell_slice(new_msg).print_rec(sb); + }; collect_fine(); return -1; } if (verbosity > 2) { - std::cerr << "converted outbound message: "; - block::gen::t_Message_Any.print_ref(std::cerr, new_msg); + FLOG(INFO) { + sb << "converted outbound message: "; + block::gen::t_Message_Any.print_ref(sb, new_msg); + }; } ap.msgs_created++; @@ -3045,8 +3056,10 @@ bool Transaction::prepare_bounce_phase(const ActionPhaseConfig& cfg) { } CHECK(cb.finalize_to(bp.out_msg)); if (verbosity > 2) { - LOG(INFO) << "generated bounced message: "; - block::gen::t_Message_Any.print_ref(std::cerr, bp.out_msg); + FLOG(INFO) { + sb << "generated bounced message: "; + block::gen::t_Message_Any.print_ref(sb, bp.out_msg); + }; } out_msgs.push_back(bp.out_msg); bp.ok = true; @@ -3167,11 +3180,13 @@ bool Transaction::compute_state() { auto frozen_state = cb2.finalize(); frozen_hash = frozen_state->get_hash().bits(); if (verbosity >= 3 * 1) { // !!!DEBUG!!! - std::cerr << "freezing state of smart contract: "; - block::gen::t_StateInit.print_ref(std::cerr, frozen_state); - CHECK(block::gen::t_StateInit.validate_ref(frozen_state)); - CHECK(block::tlb::t_StateInit.validate_ref(frozen_state)); - std::cerr << "with hash " << frozen_hash.to_hex() << std::endl; + FLOG(INFO) { + sb << "freezing state of smart contract: "; + block::gen::t_StateInit.print_ref(sb, frozen_state); + CHECK(block::gen::t_StateInit.validate_ref(frozen_state)); + CHECK(block::tlb::t_StateInit.validate_ref(frozen_state)); + sb << "with hash " << frozen_hash.to_hex(); + }; } } new_code.clear(); @@ -3229,8 +3244,10 @@ bool Transaction::compute_state() { CHECK(cb.append_data_cell_bool(std::move(storage))); new_total_state = cb.finalize(); if (verbosity > 2) { - std::cerr << "new account state: "; - block::gen::t_Account.print_ref(std::cerr, new_total_state); + FLOG(INFO) { + sb << "new account state: "; + block::gen::t_Account.print_ref(sb, new_total_state); + }; } CHECK(block::tlb::t_Account.validate_ref(new_total_state)); return true; @@ -3322,22 +3339,28 @@ bool Transaction::serialize() { return false; } if (verbosity >= 3 * 1) { - std::cerr << "new transaction: "; - block::gen::t_Transaction.print_ref(std::cerr, root); - vm::load_cell_slice(root).print_rec(std::cerr); + FLOG(INFO) { + sb << "new transaction: "; + block::gen::t_Transaction.print_ref(sb, root); + vm::load_cell_slice(root).print_rec(sb); + }; } if (!block::gen::t_Transaction.validate_ref(4096, root)) { LOG(ERROR) << "newly-generated transaction failed to pass automated validation:"; - vm::load_cell_slice(root).print_rec(std::cerr); - block::gen::t_Transaction.print_ref(std::cerr, root); + FLOG(INFO) { + vm::load_cell_slice(root).print_rec(sb); + block::gen::t_Transaction.print_ref(sb, root); + }; root.clear(); return false; } if (!block::tlb::t_Transaction.validate_ref(4096, root)) { LOG(ERROR) << "newly-generated transaction failed to pass hand-written validation:"; - vm::load_cell_slice(root).print_rec(std::cerr); - block::gen::t_Transaction.print_ref(std::cerr, root); + FLOG(INFO) { + vm::load_cell_slice(root).print_rec(sb); + block::gen::t_Transaction.print_ref(sb, root); + }; root.clear(); return false; } diff --git a/crypto/tl/tlblib.cpp b/crypto/tl/tlblib.cpp index 05ea8e1c..de5a483c 100644 --- a/crypto/tl/tlblib.cpp +++ b/crypto/tl/tlblib.cpp @@ -196,6 +196,13 @@ bool TLB::print_ref(std::ostream& os, Ref cell_ref, int indent, int re return pp.fail_unless(print_ref(pp, std::move(cell_ref))); } +bool TLB::print_ref(td::StringBuilder& sb, Ref cell_ref, int indent, int rec_limit) const { + std::ostringstream ss; + auto result = print_ref(ss, std::move(cell_ref), indent, rec_limit); + sb << ss.str(); + return result; +} + std::string TLB::as_string_skip(vm::CellSlice& cs, int indent) const { std::ostringstream os; print_skip(os, cs, indent); diff --git a/crypto/tl/tlblib.hpp b/crypto/tl/tlblib.hpp index a6350ece..c10049a9 100644 --- a/crypto/tl/tlblib.hpp +++ b/crypto/tl/tlblib.hpp @@ -246,7 +246,14 @@ class TLB { bool print(std::ostream& os, Ref cs_ref, int indent = 0, int rec_limit = 0) const { return print(os, *cs_ref, indent, rec_limit); } + bool print(td::StringBuilder& sb, Ref cs_ref, int indent = 0, int rec_limit = 0) const { + std::ostringstream ss; + auto result = print(ss, *cs_ref, indent, rec_limit); + sb << ss.str(); + return result; + } bool print_ref(std::ostream& os, Ref cell_ref, int indent = 0, int rec_limit = 0) const; + bool print_ref(td::StringBuilder& sb, Ref cell_ref, int indent = 0, int rec_limit = 0) const; bool print_ref(int rec_limit, std::ostream& os, Ref cell_ref, int indent = 0) const { return print_ref(os, std::move(cell_ref), indent, rec_limit); } diff --git a/crypto/vm/cells/CellSlice.cpp b/crypto/vm/cells/CellSlice.cpp index 4d8c3c5a..9cd3e931 100644 --- a/crypto/vm/cells/CellSlice.cpp +++ b/crypto/vm/cells/CellSlice.cpp @@ -1026,6 +1026,13 @@ bool CellSlice::print_rec(std::ostream& os, int indent) const { return print_rec(os, &limit, indent); } +bool CellSlice::print_rec(td::StringBuilder& sb, int indent) const { + std::ostringstream ss; + auto result = print_rec(ss, indent); + sb << ss.str(); + return result; +} + bool CellSlice::print_rec(int limit, std::ostream& os, int indent) const { return print_rec(os, &limit, indent); } diff --git a/crypto/vm/cells/CellSlice.h b/crypto/vm/cells/CellSlice.h index 33fad741..ecce30f5 100644 --- a/crypto/vm/cells/CellSlice.h +++ b/crypto/vm/cells/CellSlice.h @@ -257,6 +257,7 @@ class CellSlice : public td::CntObject { void dump(std::ostream& os, int level = 0, bool endl = true) const; void dump_hex(std::ostream& os, int mode = 0, bool endl = false) const; bool print_rec(std::ostream& os, int indent = 0) const; + bool print_rec(td::StringBuilder& sb, int indent = 0) const; bool print_rec(std::ostream& os, int* limit, int indent = 0) const; bool print_rec(int limit, std::ostream& os, int indent = 0) const; void error() const { diff --git a/tdutils/td/utils/logging.h b/tdutils/td/utils/logging.h index dbf4c64b..bb28f6df 100644 --- a/tdutils/td/utils/logging.h +++ b/tdutils/td/utils/logging.h @@ -264,8 +264,8 @@ class Logger { sb_ << other; return *this; } - LambdaPrintHelper operator<<(const LambdaPrint &) { - return LambdaPrintHelper{*this}; + LambdaPrintHelper operator<<(const LambdaPrint &) { + return LambdaPrintHelper{sb_}; } MutableCSlice as_cslice() { diff --git a/validator/impl/accept-block.cpp b/validator/impl/accept-block.cpp index a9dd7fe2..de48626d 100644 --- a/validator/impl/accept-block.cpp +++ b/validator/impl/accept-block.cpp @@ -308,8 +308,11 @@ bool AcceptBlockQuery::create_new_proof() { } // 10. check resulting object if (!block::gen::t_BlockProof.validate_ref(bs_cell)) { - block::gen::t_BlockProof.print_ref(std::cerr, bs_cell); - vm::load_cell_slice(bs_cell).print_rec(std::cerr); + FLOG(WARNING) { + sb << "BlockProof object just created failed to pass automated consistency checks: "; + block::gen::t_BlockProof.print_ref(sb, bs_cell); + vm::load_cell_slice(bs_cell).print_rec(sb); + }; return fatal_error("BlockProof object just created failed to pass automated consistency checks"); } // 11. create a proof object from this cell @@ -851,15 +854,12 @@ bool AcceptBlockQuery::create_top_shard_block_description() { && (root.is_null() || cb.store_ref_bool(std::move(root))) && cb.finalize_to(td_cell))) { return fatal_error("cannot serialize ShardTopBlockDescription for the newly-accepted block "s + id_.to_str()); } - if (false) { - // debug output - std::cerr << "new ShardTopBlockDescription: "; - block::gen::t_TopBlockDescr.print_ref(std::cerr, td_cell); - vm::load_cell_slice(td_cell).print_rec(std::cerr); - } if (!block::gen::t_TopBlockDescr.validate_ref(td_cell)) { - block::gen::t_TopBlockDescr.print_ref(std::cerr, td_cell); - vm::load_cell_slice(td_cell).print_rec(std::cerr); + FLOG(WARNING) { + sb << "just created ShardTopBlockDescription is invalid: "; + block::gen::t_TopBlockDescr.print_ref(sb, td_cell); + vm::load_cell_slice(td_cell).print_rec(sb); + }; return fatal_error("just created ShardTopBlockDescription for "s + id_.to_str() + " is invalid"); } auto res = vm::std_boc_serialize(td_cell, 0); diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index e171eaa1..d3378cd8 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -53,16 +53,6 @@ static constexpr int HIGH_PRIORITY_EXTERNAL = 10; // don't skip high priority e static constexpr int MAX_ATTEMPTS = 5; -#define DBG(__n) dbg(__n)&& -#define DSTART int __dcnt = 0; -#define DEB DBG(++__dcnt) - -static inline bool dbg(int c) TD_UNUSED; -static inline bool dbg(int c) { - std::cerr << '[' << (char)('0' + c / 10) << (char)('0' + c % 10) << ']'; - return true; -} - /** * Constructs a Collator object. * @@ -761,8 +751,6 @@ bool Collator::unpack_last_mc_state() { << " (upgrade validator software?)"; } // TODO: extract start_lt and end_lt from prev_mc_block as well - // std::cerr << " block::gen::ShardState::print_ref(mc_state_root) = "; - // block::gen::t_ShardState.print_ref(std::cerr, mc_state_root, 2); return true; } @@ -888,8 +876,10 @@ void Collator::got_neighbor_out_queue(int i, td::Result> res) // unpack ProcessedUpto LOG(DEBUG) << "unpacking ProcessedUpto of neighbor " << descr.blk_.to_str(); if (verbosity >= 2) { - block::gen::t_ProcessedInfo.print(std::cerr, qinfo.proc_info); - qinfo.proc_info->print_rec(std::cerr); + FLOG(INFO) { + block::gen::t_ProcessedInfo.print(sb, qinfo.proc_info); + qinfo.proc_info->print_rec(sb); + }; } descr.processed_upto = block::MsgProcessedUptoCollection::unpack(descr.shard(), qinfo.proc_info); if (!descr.processed_upto) { @@ -1756,9 +1746,11 @@ bool Collator::import_new_shard_top_blocks() { shard_conf_adjusted_ = true; } if (tb_act && verbosity >= 0) { // DEBUG - LOG(INFO) << "updated shard block configuration to "; - auto csr = shard_conf_->get_root_csr(); - block::gen::t_ShardHashes.print(std::cerr, csr.write()); + FLOG(INFO) { + sb << "updated shard block configuration to "; + auto csr = shard_conf_->get_root_csr(); + block::gen::t_ShardHashes.print(sb, csr); + }; } block::gen::ShardFeeCreated::Record fc; if (!(tlb::csr_unpack(fees_import_dict_->get_root_extra(), @@ -2279,10 +2271,12 @@ bool Collator::dequeue_message(Ref msg_envelope, ton::LogicalTime deli bool Collator::out_msg_queue_cleanup() { LOG(INFO) << "cleaning outbound queue from messages already imported by neighbors"; if (verbosity >= 2) { - auto rt = out_msg_queue_->get_root(); - std::cerr << "old out_msg_queue is "; - block::gen::t_OutMsgQueue.print(std::cerr, *rt); - rt->print_rec(std::cerr); + FLOG(INFO) { + auto rt = out_msg_queue_->get_root(); + sb << "old out_msg_queue is "; + block::gen::t_OutMsgQueue.print(sb, rt); + rt->print_rec(sb); + }; } if (after_merge_) { @@ -2422,10 +2416,12 @@ bool Collator::out_msg_queue_cleanup() { << out_msg_queue_size_; } if (verbosity >= 2) { - auto rt = out_msg_queue_->get_root(); - std::cerr << "new out_msg_queue is "; - block::gen::t_OutMsgQueue.print(std::cerr, *rt); - rt->print_rec(std::cerr); + FLOG(INFO) { + auto rt = out_msg_queue_->get_root(); + sb << "new out_msg_queue is "; + block::gen::t_OutMsgQueue.print(sb, rt); + rt->print_rec(sb); + }; } return register_out_msg_queue_op(true); } @@ -2524,19 +2520,27 @@ bool Collator::combine_account_transactions() { auto cell = cb.finalize(); auto csr = vm::load_cell_slice_ref(cell); if (verbosity > 2) { - std::cerr << "new AccountBlock for " << z.first.to_hex() << ": "; - block::gen::t_AccountBlock.print_ref(std::cerr, cell); - csr->print_rec(std::cerr); + FLOG(INFO) { + sb << "new AccountBlock for " << z.first.to_hex() << ": "; + block::gen::t_AccountBlock.print_ref(sb, cell); + csr->print_rec(sb); + }; } if (!block::gen::t_AccountBlock.validate_ref(100000, cell)) { - block::gen::t_AccountBlock.print_ref(std::cerr, cell); - csr->print_rec(std::cerr); + FLOG(WARNING) { + sb << "AccountBlock failed to pass automatic validation tests: "; + block::gen::t_AccountBlock.print_ref(sb, cell); + csr->print_rec(sb); + }; return fatal_error(std::string{"new AccountBlock for "} + z.first.to_hex() + " failed to pass automatic validation tests"); } if (!block::tlb::t_AccountBlock.validate_ref(100000, cell)) { - block::gen::t_AccountBlock.print_ref(std::cerr, cell); - csr->print_rec(std::cerr); + FLOG(WARNING) { + sb << "AccountBlock failed to pass handwritten validation tests: "; + block::gen::t_AccountBlock.print_ref(sb, cell); + csr->print_rec(sb); + }; return fatal_error(std::string{"new AccountBlock for "} + z.first.to_hex() + " failed to pass handwritten validation tests"); } @@ -2561,8 +2565,10 @@ bool Collator::combine_account_transactions() { } else if (acc.status == block::Account::acc_nonexist) { // account deleted if (verbosity > 2) { - std::cerr << "deleting account " << acc.addr.to_hex() << " with empty new value "; - block::gen::t_Account.print_ref(std::cerr, acc.total_state); + FLOG(INFO) { + sb << "deleting account " << acc.addr.to_hex() << " with empty new value "; + block::gen::t_Account.print_ref(sb, acc.total_state); + }; } if (account_dict->lookup_delete(acc.addr).is_null()) { return fatal_error(std::string{"cannot delete account "} + acc.addr.to_hex() + " from ShardAccounts"); @@ -2570,8 +2576,10 @@ bool Collator::combine_account_transactions() { } else { // existing account modified if (verbosity > 4) { - std::cerr << "modifying account " << acc.addr.to_hex() << " to "; - block::gen::t_Account.print_ref(std::cerr, acc.total_state); + FLOG(INFO) { + sb << "modifying account " << acc.addr.to_hex() << " to "; + block::gen::t_Account.print_ref(sb, acc.total_state); + }; } if (!(cb.store_ref_bool(acc.total_state) // account_descr$_ account:^Account && cb.store_bits_bool(acc.last_trans_hash_) // last_trans_hash:bits256 @@ -2594,9 +2602,11 @@ bool Collator::combine_account_transactions() { return fatal_error("cannot serialize ShardAccountBlocks"); } if (verbosity > 2) { - std::cerr << "new ShardAccountBlocks: "; - block::gen::t_ShardAccountBlocks.print_ref(std::cerr, shard_account_blocks_); - vm::load_cell_slice(shard_account_blocks_).print_rec(std::cerr); + FLOG(INFO) { + sb << "new ShardAccountBlocks: "; + block::gen::t_ShardAccountBlocks.print_ref(sb, shard_account_blocks_); + vm::load_cell_slice(shard_account_blocks_).print_rec(sb); + }; } if (!block::gen::t_ShardAccountBlocks.validate_ref(100000, shard_account_blocks_)) { return fatal_error("new ShardAccountBlocks failed to pass automatic validity tests"); @@ -2606,9 +2616,11 @@ bool Collator::combine_account_transactions() { } auto shard_accounts = account_dict->get_root(); if (verbosity > 2) { - std::cerr << "new ShardAccounts: "; - block::gen::t_ShardAccounts.print(std::cerr, *shard_accounts); - shard_accounts->print_rec(std::cerr); + FLOG(INFO) { + sb << "new ShardAccounts: "; + block::gen::t_ShardAccounts.print(sb, shard_accounts); + shard_accounts->print_rec(sb); + }; } if (verify >= 2) { LOG(INFO) << "verifying new ShardAccounts"; @@ -2659,7 +2671,9 @@ bool Collator::create_special_transaction(block::CurrencyCollection amount, Ref< addr.to_hex()); } if (verbosity >= 4) { - block::gen::t_Message_Any.print_ref(std::cerr, msg); + FLOG(INFO) { + block::gen::t_Message_Any.print_ref(sb, msg); + }; } CHECK(block::gen::t_Message_Any.validate_ref(msg)); CHECK(block::tlb::t_Message.validate_ref(msg)); @@ -3163,8 +3177,10 @@ int Collator::process_one_new_message(block::NewOutMsg msg, bool enqueue_only, R Ref msg_env; CHECK(block::tlb::pack_cell(msg_env, msg_env_rec)); if (verbosity > 2) { - std::cerr << "new (processed outbound) message envelope: "; - block::gen::t_MsgEnvelope.print_ref(std::cerr, msg_env); + FLOG(INFO) { + sb << "new (processed outbound) message envelope: "; + block::gen::t_MsgEnvelope.print_ref(sb, msg_env); + }; } // 3. create InMsg, referring to this MsgEnvelope and this Transaction vm::CellBuilder cb; @@ -3286,16 +3302,20 @@ bool Collator::enqueue_transit_message(Ref msg, Ref old_msg_ Ref out_msg = cb.finalize(); // 4.1. insert OutMsg into OutMsgDescr if (verbosity > 2) { - std::cerr << "OutMsg for a transit message: "; - block::gen::t_OutMsg.print_ref(std::cerr, out_msg); + FLOG(INFO) { + sb << "OutMsg for a transit message: "; + block::gen::t_OutMsg.print_ref(sb, out_msg); + }; } if (!insert_out_msg(out_msg)) { return fatal_error("cannot insert a new OutMsg into OutMsgDescr"); } // 4.2. insert InMsg into InMsgDescr if (verbosity > 2) { - std::cerr << "InMsg for a transit message: "; - block::gen::t_InMsg.print_ref(std::cerr, in_msg); + FLOG(INFO) { + sb << "InMsg for a transit message: "; + block::gen::t_InMsg.print_ref(sb, in_msg); + }; } if (!insert_in_msg(in_msg)) { return fatal_error("cannot insert a new InMsg into InMsgDescr"); @@ -3366,7 +3386,10 @@ bool Collator::process_inbound_message(Ref enq_msg, ton::LogicalT if (enq_msg.is_null() || enq_msg->size_ext() != 0x10040 || (enqueued_lt = enq_msg->prefetch_ulong(64)) < /* 0 */ 1 * lt) { // DEBUG if (enq_msg.not_null()) { - block::gen::t_EnqueuedMsg.print(std::cerr, *enq_msg); + FLOG(WARNING) { + sb << "inbound internal message is not a valid EnqueuedMsg: "; + block::gen::t_EnqueuedMsg.print(sb, enq_msg); + }; } LOG(ERROR) << "inbound internal message is not a valid EnqueuedMsg (created lt " << lt << ", enqueued " << enqueued_lt << ")"; @@ -3590,14 +3613,18 @@ bool Collator::process_inbound_internal_messages() { LOG(DEBUG) << "processing inbound message with (lt,hash)=(" << kv->lt << "," << kv->key.to_hex() << ") from neighbor #" << kv->source; if (verbosity > 2) { - std::cerr << "inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() << " msg="; - block::gen::t_EnqueuedMsg.print(std::cerr, *(kv->msg)); + FLOG(INFO) { + sb << "inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() << " msg="; + block::gen::t_EnqueuedMsg.print(sb, kv->msg); + }; } if (!process_inbound_message(kv->msg, kv->lt, kv->key.cbits(), neighbors_.at(kv->source))) { if (verbosity > 1) { - std::cerr << "invalid inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() - << " msg="; - block::gen::t_EnqueuedMsg.print(std::cerr, *(kv->msg)); + FLOG(INFO) { + sb << "invalid inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() + << " msg="; + block::gen::t_EnqueuedMsg.print(sb, kv->msg); + }; } return fatal_error("error processing inbound internal message"); } @@ -3884,7 +3911,10 @@ bool Collator::process_deferred_message(Ref enq_msg, StdSmcAddres LogicalTime enqueued_lt = 0; if (enq_msg.is_null() || enq_msg->size_ext() != 0x10040 || (enqueued_lt = enq_msg->prefetch_ulong(64)) != lt) { if (enq_msg.not_null()) { - block::gen::t_EnqueuedMsg.print(std::cerr, *enq_msg); + FLOG(WARNING) { + sb << "internal message in DispatchQueue is not a valid EnqueuedMsg: "; + block::gen::t_EnqueuedMsg.print(sb, enq_msg); + }; } LOG(ERROR) << "internal message in DispatchQueue is not a valid EnqueuedMsg (created lt " << lt << ", enqueued " << enqueued_lt << ")"; @@ -3986,8 +4016,10 @@ bool Collator::process_deferred_message(Ref enq_msg, StdSmcAddres */ bool Collator::insert_in_msg(Ref in_msg) { if (verbosity > 2) { - std::cerr << "InMsg being inserted into InMsgDescr: "; - block::gen::t_InMsg.print_ref(std::cerr, in_msg); + FLOG(INFO) { + sb << "InMsg being inserted into InMsgDescr: "; + block::gen::t_InMsg.print_ref(sb, in_msg); + }; } auto cs = load_cell_slice(in_msg); if (!cs.size_refs()) { @@ -4028,8 +4060,10 @@ bool Collator::insert_in_msg(Ref in_msg) { */ bool Collator::insert_out_msg(Ref out_msg) { if (verbosity > 2) { - std::cerr << "OutMsg being inserted into OutMsgDescr: "; - block::gen::t_OutMsg.print_ref(std::cerr, out_msg); + FLOG(INFO) { + sb << "OutMsg being inserted into OutMsgDescr: "; + block::gen::t_OutMsg.print_ref(sb, out_msg); + }; } auto cs = load_cell_slice(out_msg); if (!cs.size_refs()) { @@ -4125,8 +4159,10 @@ bool Collator::enqueue_message(block::NewOutMsg msg, td::RefInt256 fwd_fees_rema } // 4. insert OutMsg into OutMsgDescr if (verbosity > 2) { - std::cerr << "OutMsg for a newly-generated message: "; - block::gen::t_OutMsg.print_ref(std::cerr, out_msg); + FLOG(INFO) { + sb << "OutMsg for a newly-generated message: "; + block::gen::t_OutMsg.print_ref(sb, out_msg); + }; } if (!insert_out_msg(out_msg)) { return fatal_error("cannot insert a new OutMsg into OutMsgDescr"); @@ -4419,9 +4455,12 @@ bool Collator::create_mc_state_extra() { bool ignore_cfg_changes = false; Ref cfg0; if (!block::valid_config_data(cfg_smc_config, config_addr, true, true, old_mparams_)) { - block::gen::t_Hashmap_32_Ref_Cell.print_ref(std::cerr, cfg_smc_config); LOG(ERROR) << "configuration smart contract "s + config_addr.to_hex() + " contains an invalid configuration in its data, IGNORING CHANGES"; + FLOG(WARNING) { + sb << "ignored configuration: "; + block::gen::t_Hashmap_32_Ref_Cell.print_ref(sb, cfg_smc_config); + }; ignore_cfg_changes = true; } else { cfg0 = cfg_dict.lookup_ref(td::BitArray<32>{(long long)0}); @@ -4459,34 +4498,26 @@ bool Collator::create_mc_state_extra() { return fatal_error(wset_res.move_as_error()); } bool update_shard_cc = is_key_block_ || (now_ / ccvc.shard_cc_lifetime > prev_now_ / ccvc.shard_cc_lifetime); - // temp debug - if (verbosity >= 3 * 1) { - auto csr = shard_conf_->get_root_csr(); - LOG(INFO) << "new shard configuration before post-processing is"; - std::ostringstream os; - csr->print_rec(os); - block::gen::t_ShardHashes.print(os, csr.write()); - LOG(INFO) << os.str(); - } - // end (temp debug) if (!update_shard_config(wset_res.move_as_ok(), ccvc, update_shard_cc)) { auto csr = shard_conf_->get_root_csr(); if (csr.is_null()) { LOG(WARNING) << "new shard configuration is null (!)"; } else { LOG(WARNING) << "invalid new shard configuration is"; - std::ostringstream os; - csr->print_rec(os); - block::gen::t_ShardHashes.print(os, csr.write()); - LOG(WARNING) << os.str(); + FLOG(WARNING) { + csr->print_rec(sb); + block::gen::t_ShardHashes.print(sb, csr); + }; } return fatal_error("cannot post-process shard configuration"); } // 3. save new shard_hashes state_extra.shard_hashes = shard_conf_->get_root_csr(); - if (verbosity >= 3 * 0) { // DEBUG - std::cerr << "updated shard configuration to "; - block::gen::t_ShardHashes.print(std::cerr, *state_extra.shard_hashes); + if (verbosity >= 3) { + FLOG(INFO) { + sb << "updated shard configuration to "; + block::gen::t_ShardHashes.print(sb, state_extra.shard_hashes); + }; } if (!block::gen::t_ShardHashes.validate_upto(10000, *state_extra.shard_hashes)) { return fatal_error("new ShardHashes is invalid"); @@ -4587,13 +4618,18 @@ bool Collator::create_mc_state_extra() { if (verify >= 2) { LOG(INFO) << "verifying new BlockCreateStats"; if (!block::gen::t_BlockCreateStats.validate_csr(100000, cs)) { - cs->print_rec(std::cerr); - block::gen::t_BlockCreateStats.print(std::cerr, *cs); + FLOG(WARNING) { + sb << "BlockCreateStats in the new masterchain state failed to pass automated validity checks: "; + cs->print_rec(sb); + block::gen::t_BlockCreateStats.print(sb, cs); + }; return fatal_error("BlockCreateStats in the new masterchain state failed to pass automated validity checks"); } } if (verbosity >= 4 * 1) { - block::gen::t_BlockCreateStats.print(std::cerr, *cs); + FLOG(INFO) { + block::gen::t_BlockCreateStats.print(sb, cs); + }; } } else { state_extra.r1.block_create_stats.clear(); @@ -4628,7 +4664,6 @@ bool Collator::update_block_creator_count(td::ConstBitPtr key, unsigned shard_in if (!block::unpack_CreatorStats(std::move(cs), mc_cnt, shard_cnt)) { return fatal_error("cannot unpack CreatorStats for "s + key.to_hex(256) + " from previous masterchain state"); } - // std::cerr << mc_cnt.to_str() << " " << shard_cnt.to_str() << std::endl; if (mc_incr && !mc_cnt.increase_by(mc_incr, now_)) { return fatal_error(PSTRING() << "cannot increase masterchain block counter in CreatorStats for " << key.to_hex(256) << " by " << mc_incr << " (old value is " << mc_cnt.to_str() << ")"); @@ -4999,9 +5034,11 @@ bool Collator::update_public_libraries() { } } if (libraries_changed_ && verbosity >= 2) { - std::cerr << "New public libraries: "; - block::gen::t_HashmapE_256_LibDescr.print(std::cerr, shard_libraries_->get_root()); - shard_libraries_->get_root()->print_rec(std::cerr); + FLOG(INFO) { + sb << "New public libraries: "; + block::gen::t_HashmapE_256_LibDescr.print(sb, shard_libraries_->get_root()); + shard_libraries_->get_root()->print_rec(sb); + }; } return true; } @@ -5124,9 +5161,11 @@ bool Collator::create_shard_state() { } LOG(DEBUG) << "min_ref_mc_seqno is " << min_ref_mc_seqno_; if (verbosity > 2) { - std::cerr << "new ShardState: "; - block::gen::t_ShardState.print_ref(std::cerr, state_root); - vm::load_cell_slice(state_root).print_rec(std::cerr); + FLOG(INFO) { + sb << "new ShardState: "; + block::gen::t_ShardState.print_ref(sb, state_root); + vm::load_cell_slice(state_root).print_rec(sb); + }; } if (verify >= 2) { LOG(INFO) << "verifying new ShardState"; @@ -5139,9 +5178,11 @@ bool Collator::create_shard_state() { return fatal_error("cannot create Merkle update for ShardState"); } if (verbosity > 2) { - std::cerr << "Merkle Update for ShardState: "; - vm::CellSlice cs{vm::NoVm{}, state_update}; - cs.print_rec(std::cerr); + FLOG(INFO) { + sb << "Merkle Update for ShardState: "; + vm::CellSlice cs{vm::NoVm{}, state_update}; + cs.print_rec(sb); + }; } LOG(INFO) << "updating block profile statistics"; block_limit_status_->add_proof(state_root); @@ -5186,10 +5227,12 @@ bool Collator::update_processed_upto() { */ bool Collator::compute_out_msg_queue_info(Ref& out_msg_queue_info) { if (verbosity >= 2) { - auto rt = out_msg_queue_->get_root(); - std::cerr << "resulting out_msg_queue is "; - block::gen::t_OutMsgQueue.print(std::cerr, *rt); - rt->print_rec(std::cerr); + FLOG(INFO) { + auto rt = out_msg_queue_->get_root(); + sb << "resulting out_msg_queue is "; + block::gen::t_OutMsgQueue.print(sb, rt); + rt->print_rec(sb); + }; } vm::CellBuilder cb; // out_msg_queue_extra#0 dispatch_queue:DispatchQueue out_queue_size:(Maybe uint48) = OutMsgQueueExtra; @@ -5239,8 +5282,10 @@ bool Collator::compute_total_balance() { } vm::CellSlice cs{*(in_msg_dict->get_root_extra())}; if (verbosity > 2) { - block::gen::t_ImportFees.print(std::cerr, vm::CellSlice{*(in_msg_dict->get_root_extra())}); - cs.print_rec(std::cerr); + FLOG(INFO) { + block::gen::t_ImportFees.print(sb, in_msg_dict->get_root_extra()); + cs.print_rec(sb); + }; } auto new_import_fees = block::tlb::t_Grams.as_integer_skip(cs); if (new_import_fees.is_null()) { @@ -5468,9 +5513,11 @@ bool Collator::create_block() { return fatal_error("cannot create new Block"); } if (verbosity >= 3 * 1) { - std::cerr << "new Block: "; - block::gen::t_Block.print_ref(std::cerr, new_block); - vm::load_cell_slice(new_block).print_rec(std::cerr); + FLOG(INFO) { + sb << "new Block: "; + block::gen::t_Block.print_ref(sb, new_block); + vm::load_cell_slice(new_block).print_rec(sb); + }; } if (verify >= 1) { LOG(INFO) << "verifying new Block"; @@ -5508,9 +5555,11 @@ Ref Collator::collate_shard_block_descr_set() { return {}; } if (verbosity >= 4 * 1) { - std::cerr << "serialized TopBlockDescrSet for collated data is: "; - block::gen::t_TopBlockDescrSet.print_ref(std::cerr, cell); - vm::load_cell_slice(cell).print_rec(std::cerr); + FLOG(INFO) { + sb << "serialized TopBlockDescrSet for collated data is: "; + block::gen::t_TopBlockDescrSet.print_ref(sb, cell); + vm::load_cell_slice(cell).print_rec(sb); + }; } return cell; } @@ -5717,8 +5766,10 @@ td::Result Collator::register_external_message_cell(Ref ext_msg, return td::Status::Error("inbound external message has destination address not in this shard"); } if (verbosity > 2) { - std::cerr << "registered external message: "; - block::gen::t_Message_Any.print_ref(std::cerr, ext_msg); + FLOG(INFO) { + sb << "registered external message: "; + block::gen::t_Message_Any.print_ref(sb, ext_msg); + }; } ext_msg_map.emplace(hash, 1); ext_msg_list_.push_back({std::move(ext_msg), ext_hash, priority}); diff --git a/validator/impl/signature-set.cpp b/validator/impl/signature-set.cpp index c7298216..0078a115 100644 --- a/validator/impl/signature-set.cpp +++ b/validator/impl/signature-set.cpp @@ -42,9 +42,6 @@ td::BufferSlice BlockSignatureSetQ::serialize() const { } Ref root; CHECK(serialize_to(root)); - //std::cerr << "serializing BlockSignatureSet: "; - //vm::CellSlice{vm::NoVm{}, root}.print_rec(std::cerr); - //std::cerr << std::endl; auto res = vm::std_boc_serialize(std::move(root)); LOG_CHECK(res.is_ok()) << res.move_as_error(); return res.move_as_ok(); diff --git a/validator/impl/top-shard-descr.cpp b/validator/impl/top-shard-descr.cpp index 8ff8862d..9eadeef3 100644 --- a/validator/impl/top-shard-descr.cpp +++ b/validator/impl/top-shard-descr.cpp @@ -175,9 +175,11 @@ td::Status ShardTopBlockDescrQ::unpack() { block::gen::TopBlockDescr::Record rec; if (!(block::gen::t_TopBlockDescr.force_validate_ref(root_) && tlb::unpack_cell(root_, rec) && block::tlb::t_BlockIdExt.unpack(rec.proof_for.write(), block_id_))) { - std::cerr << "invalid ShardTopBlockDescr: "; - block::gen::t_TopBlockDescr.print_ref(std::cerr, root_); - vm::load_cell_slice(root_).print_rec(std::cerr); + FLOG(INFO) { + sb << "invalid ShardTopBlockDescr: "; + block::gen::t_TopBlockDescr.print_ref(sb, root_); + vm::load_cell_slice(root_).print_rec(sb); + }; return td::Status::Error(-666, "Shard top block description is not a valid TopBlockDescr TL-B object"); } LOG(DEBUG) << "unpacking a ShardTopBlockDescr for " << block_id_.to_str() << " with " << rec.len << " links"; diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index 9e4d406e..583b1d86 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -1553,8 +1553,10 @@ void ValidateQuery::got_neighbor_out_queue(int i, td::Result> // unpack ProcessedUpto LOG(DEBUG) << "unpacking ProcessedUpto of neighbor " << descr.blk_.to_str(); if (verbosity >= 2) { - block::gen::t_ProcessedInfo.print(std::cerr, qinfo.proc_info); - qinfo.proc_info->print_rec(std::cerr); + FLOG(INFO) { + block::gen::t_ProcessedInfo.print(sb, qinfo.proc_info); + qinfo.proc_info->print_rec(sb); + }; } descr.processed_upto = block::MsgProcessedUptoCollection::unpack(descr.shard(), qinfo.proc_info); if (!descr.processed_upto) { @@ -2656,7 +2658,6 @@ bool ValidateQuery::unpack_precheck_value_flow(Ref value_flow_root) { " but the sum over all accounts present in the new state is " + cc.to_str()); } auto msg_extra = in_msg_dict_->get_root_extra(); - // block::gen::t_ImportFees.print(std::cerr, msg_extra); if (!(block::tlb::t_Grams.as_integer_skip_to(msg_extra.write(), import_fees_) && cc.unpack(std::move(msg_extra)))) { return reject_query("cannot unpack ImportFees from the augmentation of the InMsgDescr dictionary"); } @@ -2760,20 +2761,22 @@ bool ValidateQuery::precheck_one_account_update(td::ConstBitPtr acc_id, Reflookup(acc_id, 256); if (acc_blk_root.is_null()) { if (verbosity >= 3 * 0) { - std::cerr << "state of account " << workchain() << ":" << acc_id.to_hex(256) - << " in the old shardchain state:" << std::endl; - if (old_value.not_null()) { - block::gen::t_ShardAccount.print(std::cerr, *old_value); - } else { - std::cerr << "" << std::endl; - } - std::cerr << "state of account " << workchain() << ":" << acc_id.to_hex(256) - << " in the new shardchain state:" << std::endl; - if (new_value.not_null()) { - block::gen::t_ShardAccount.print(std::cerr, *new_value); - } else { - std::cerr << "" << std::endl; - } + FLOG(INFO) { + sb << "state of account " << workchain() << ":" << acc_id.to_hex(256) + << " in the old shardchain state:" << "\n"; + if (old_value.not_null()) { + block::gen::t_ShardAccount.print(sb, old_value); + } else { + sb << "" << "\n"; + } + sb << "state of account " << workchain() << ":" << acc_id.to_hex(256) + << " in the new shardchain state:" << "\n"; + if (new_value.not_null()) { + block::gen::t_ShardAccount.print(sb, new_value); + } else { + sb << "" << "\n"; + } + }; } return reject_query("the state of account "s + acc_id.to_hex(256) + " changed in the new state with respect to the old state, but the block contains no " @@ -2931,8 +2934,6 @@ bool ValidateQuery::precheck_one_account_block(td::ConstBitPtr acc_id, Refprint_rec(std::cerr); - // block::gen::t_AccountBlock.print(std::cerr, acc_blk_root); block::gen::AccountBlock::Record acc_blk; block::gen::HASH_UPDATE::Record hash_upd; if (!(tlb::csr_unpack(acc_blk_root, acc_blk) && @@ -3860,7 +3861,9 @@ bool ValidateQuery::check_in_msg(td::ConstBitPtr key, Ref in_msg) ton::LogicalTime trans_lt; CHECK(block::get_transaction_id(transaction, trans_addr, trans_lt)); if (dest_addr != trans_addr) { - block::gen::t_InMsg.print(std::cerr, *in_msg); + FLOG(INFO) { + block::gen::t_InMsg.print(sb, in_msg); + }; return reject_query(PSTRING() << "InMsg corresponding to inbound message with hash " << key.to_hex(256) << " and destination address " << dest_addr.to_hex() << " claims that the message is processed by transaction " << trans_lt @@ -4408,7 +4411,9 @@ bool ValidateQuery::check_out_msg(td::ConstBitPtr key, Ref out_ms ton::LogicalTime trans_lt; CHECK(block::get_transaction_id(transaction, trans_addr, trans_lt)); if (src_addr != trans_addr) { - block::gen::t_OutMsg.print(std::cerr, *out_msg); + FLOG(INFO) { + block::gen::t_OutMsg.print(sb, out_msg); + }; return reject_query(PSTRING() << "OutMsg corresponding to outbound message with hash " << key.to_hex(256) << " and source address " << src_addr.to_hex() << " claims that the message was created by transaction " << trans_lt @@ -5022,15 +5027,19 @@ bool ValidateQuery::check_in_queue() { LOG(DEBUG) << "processing inbound message with (lt,hash)=(" << kv->lt << "," << kv->key.to_hex() << ") from neighbor #" << kv->source; if (verbosity > 3) { - std::cerr << "inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() << " msg="; - block::gen::t_EnqueuedMsg.print(std::cerr, *(kv->msg)); + FLOG(INFO) { + sb << "inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() << " msg="; + block::gen::t_EnqueuedMsg.print(sb, kv->msg); + }; } bool unprocessed = false; if (!check_neighbor_outbound_message(kv->msg, kv->lt, kv->key.cbits(), neighbors_.at(kv->source), unprocessed)) { if (verbosity > 1) { - std::cerr << "invalid neighbor outbound message: lt=" << kv->lt << " from=" << kv->source - << " key=" << kv->key.to_hex() << " msg="; - block::gen::t_EnqueuedMsg.print(std::cerr, *(kv->msg)); + FLOG(INFO) { + sb << "invalid neighbor outbound message: lt=" << kv->lt << " from=" << kv->source + << " key=" << kv->key.to_hex() << " msg="; + block::gen::t_EnqueuedMsg.print(sb, kv->msg); + }; } return reject_query("error processing outbound internal message "s + kv->key.to_hex() + " of neighbor " + neighbors_.at(kv->source).blk_.to_str()); @@ -5636,10 +5645,12 @@ bool ValidateQuery::check_one_transaction(block::Account& account, ton::LogicalT // now compare the re-created transaction with the one we have if (trans_root2->get_hash() != trans_root->get_hash()) { if (verbosity >= 3 * 0) { - std::cerr << "original transaction " << lt << " of " << addr.to_hex() << ": "; - block::gen::t_Transaction.print_ref(std::cerr, trans_root); - std::cerr << "re-created transaction " << lt << " of " << addr.to_hex() << ": "; - block::gen::t_Transaction.print_ref(std::cerr, trans_root2); + FLOG(INFO) { + sb << "original transaction " << lt << " of " << addr.to_hex() << ": "; + block::gen::t_Transaction.print_ref(sb, trans_root); + sb << "re-created transaction " << lt << " of " << addr.to_hex() << ": "; + block::gen::t_Transaction.print_ref(sb, trans_root2); + }; } return reject_query(PSTRING() << "the transaction " << lt << " of " << addr.to_hex() << " has hash " << trans_root->get_hash().to_hex() From 9d94e04d2031ab0200699d7aec7f4a97fd1d0563 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Mon, 17 Feb 2025 10:13:17 +0300 Subject: [PATCH 46/61] Add more stats to validator getstats 1) Liteserver queries count 2) Collated/validated blocks count, number of active sessions 3) Persistent state sizes 4) Initial sync progress --- tdutils/td/utils/Time.h | 4 + validator-engine/validator-engine.cpp | 10 ++- validator-engine/validator-engine.hpp | 4 + validator/db/archive-manager.cpp | 24 ++++++ validator/db/archive-manager.hpp | 2 + validator/db/celldb.cpp | 19 +++- validator/db/rootdb.cpp | 1 + validator/downloaders/download-state.cpp | 7 ++ validator/downloaders/download-state.hpp | 3 + validator/impl/collator-impl.h | 2 +- validator/impl/collator.cpp | 2 + validator/impl/liteserver.cpp | 14 +-- validator/impl/liteserver.hpp | 1 - validator/impl/validate-query.cpp | 12 +-- validator/impl/validate-query.hpp | 2 +- validator/interfaces/validator-manager.h | 6 +- validator/manager-init.cpp | 6 ++ validator/manager-init.hpp | 4 + validator/manager.cpp | 82 +++++++++++++++--- validator/manager.hpp | 26 +++++- validator/net/download-state.cpp | 10 ++- validator/net/download-state.hpp | 4 + validator/state-serializer.cpp | 44 ++++++++++ validator/state-serializer.hpp | 8 ++ validator/stats-provider.h | 105 +++++++++++++++++++++++ validator/validator.h | 8 ++ 26 files changed, 365 insertions(+), 45 deletions(-) create mode 100644 validator/stats-provider.h diff --git a/tdutils/td/utils/Time.h b/tdutils/td/utils/Time.h index ece822d4..c7795ae4 100644 --- a/tdutils/td/utils/Time.h +++ b/tdutils/td/utils/Time.h @@ -128,6 +128,10 @@ inline Timestamp &operator+=(Timestamp &a, double b) { return a; } +inline double operator-(const Timestamp &a, const Timestamp &b) { + return a.at() - b.at(); +} + template void store(const Timestamp ×tamp, StorerT &storer) { storer.store_binary(timestamp.at() - Time::now() + Clocks::system()); diff --git a/validator-engine/validator-engine.cpp b/validator-engine/validator-engine.cpp index cc7c57b3..81b8278f 100644 --- a/validator-engine/validator-engine.cpp +++ b/validator-engine/validator-engine.cpp @@ -1957,7 +1957,8 @@ void ValidatorEngine::started_overlays() { void ValidatorEngine::start_validator() { validator_options_.write().set_allow_blockchain_init(config_.validators.size() > 0); - validator_options_.write().set_state_serializer_enabled(config_.state_serializer_enabled); + validator_options_.write().set_state_serializer_enabled(config_.state_serializer_enabled && + !state_serializer_disabled_flag_); load_collator_options(); validator_manager_ = ton::validator::ValidatorManagerFactory::create( @@ -3973,7 +3974,7 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_setStateS promise.set_value(ton::create_serialize_tl_object()); return; } - validator_options_.write().set_state_serializer_enabled(query.enabled_); + validator_options_.write().set_state_serializer_enabled(query.enabled_ && !state_serializer_disabled_flag_); td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::update_options, validator_options_); config_.state_serializer_enabled = query.enabled_; @@ -4556,6 +4557,11 @@ int main(int argc, char *argv[]) { td::actor::send_closure(x, &ValidatorEngine::set_validator_telemetry_filename, s); }); }); + p.add_option( + '\0', "disable-state-serializer", + "disable persistent state serializer (similar to set-state-serializer-enabled 0 in validator console)", [&]() { + acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_state_serializer_disabled_flag); }); + }); auto S = p.run(argc, argv); if (S.is_error()) { LOG(ERROR) << "failed to parse options: " << S.move_as_error(); diff --git a/validator-engine/validator-engine.hpp b/validator-engine/validator-engine.hpp index b7abb0b1..6c2f5c4b 100644 --- a/validator-engine/validator-engine.hpp +++ b/validator-engine/validator-engine.hpp @@ -228,6 +228,7 @@ class ValidatorEngine : public td::actor::Actor { std::string validator_telemetry_filename_; bool not_all_shards_ = false; std::vector add_shard_cmds_; + bool state_serializer_disabled_flag_ = false; std::set unsafe_catchains_; std::map> unsafe_catchain_rotations_; @@ -325,6 +326,9 @@ class ValidatorEngine : public td::actor::Actor { void add_shard_cmd(ton::ShardIdFull shard) { add_shard_cmds_.push_back(shard); } + void set_state_serializer_disabled_flag() { + state_serializer_disabled_flag_ = true; + } void start_up() override; ValidatorEngine() { diff --git a/validator/db/archive-manager.cpp b/validator/db/archive-manager.cpp index d349f9d8..8c7cde17 100644 --- a/validator/db/archive-manager.cpp +++ b/validator/db/archive-manager.cpp @@ -1196,6 +1196,30 @@ void ArchiveManager::set_async_mode(bool mode, td::Promise promise) { } } +void ArchiveManager::prepare_stats(td::Promise>> promise) { + std::vector> stats; + { + std::map states; + for (auto &[key, file] : perm_states_) { + BlockSeqno seqno = key.first; + auto r_stat = td::stat(db_root_ + "/archive/states/" + file.filename_short()); + if (r_stat.is_error()) { + LOG(WARNING) << "Cannot stat persistent state file " << file.filename_short() << " : " << r_stat.move_as_error(); + } else { + states[seqno] += r_stat.move_as_ok().size_; + } + } + td::StringBuilder sb; + for (auto &[seqno, size] : states) { + sb << seqno << ":" << td::format::as_size(size) << " "; + } + if (!sb.as_cslice().empty()) { + stats.emplace_back("persistent_states", sb.as_cslice().str()); + } + } + promise.set_value(std::move(stats)); +} + void ArchiveManager::truncate(BlockSeqno masterchain_seqno, ConstBlockHandle handle, td::Promise promise) { index_->begin_transaction().ensure(); td::MultiPromise mp; diff --git a/validator/db/archive-manager.hpp b/validator/db/archive-manager.hpp index 90fc6a0b..d919e32e 100644 --- a/validator/db/archive-manager.hpp +++ b/validator/db/archive-manager.hpp @@ -81,6 +81,8 @@ class ArchiveManager : public td::actor::Actor { cur_shard_split_depth_ = value; } + void prepare_stats(td::Promise>> promise); + static constexpr td::uint32 archive_size() { return 20000; } diff --git a/validator/db/celldb.cpp b/validator/db/celldb.cpp index 9dcecdb3..e86a373d 100644 --- a/validator/db/celldb.cpp +++ b/validator/db/celldb.cpp @@ -158,6 +158,17 @@ void CellDbIn::start_up() { }, td::Timestamp::now()); } + + { + std::string key = "stats.last_deleted_mc_seqno", value; + auto R = cell_db_->get(td::as_slice(key), value); + R.ensure(); + if (R.ok() == td::KeyValue::GetStatus::Ok) { + auto r_value = td::to_integer_safe(value); + r_value.ensure(); + last_deleted_mc_state_ = r_value.move_as_ok(); + } + } } void CellDbIn::load_cell(RootHash hash, td::Promise> promise) { @@ -452,6 +463,11 @@ void CellDbIn::gc_cont2(BlockHandle handle) { cell_db_->erase(get_key(key_hash)).ensure(); set_block(F.prev, std::move(P)); set_block(F.next, std::move(N)); + if (handle->id().is_masterchain()) { + last_deleted_mc_state_ = handle->id().seqno(); + std::string key = "stats.last_deleted_mc_seqno", value = td::to_string(last_deleted_mc_state_); + cell_db_->set(td::as_slice(key), td::as_slice(value)); + } cell_db_->commit_write_batch().ensure(); alarm_timestamp() = td::Timestamp::now(); timer_write_batch.reset(); @@ -475,9 +491,6 @@ void CellDbIn::gc_cont2(BlockHandle handle) { if (!opts_->get_disable_rocksdb_stats()) { cell_db_statistics_.gc_cell_time_.insert(timer.elapsed() * 1e6); } - if (handle->id().is_masterchain()) { - last_deleted_mc_state_ = handle->id().seqno(); - } LOG(DEBUG) << "Deleted state " << handle->id().to_str(); timer_finish.reset(); timer_all.reset(); diff --git a/validator/db/rootdb.cpp b/validator/db/rootdb.cpp index e0579d57..8d83e7a7 100644 --- a/validator/db/rootdb.cpp +++ b/validator/db/rootdb.cpp @@ -438,6 +438,7 @@ void RootDb::allow_block_gc(BlockIdExt block_id, td::Promise promise) { void RootDb::prepare_stats(td::Promise>> promise) { auto merger = StatsMerger::create(std::move(promise)); td::actor::send_closure(cell_db_, &CellDb::prepare_stats, merger.make_promise("celldb.")); + td::actor::send_closure(archive_db_, &ArchiveManager::prepare_stats, merger.make_promise("archive.")); } void RootDb::truncate(BlockSeqno seqno, ConstBlockHandle handle, td::Promise promise) { diff --git a/validator/downloaders/download-state.cpp b/validator/downloaders/download-state.cpp index 32978ea5..8473cb22 100644 --- a/validator/downloaders/download-state.cpp +++ b/validator/downloaders/download-state.cpp @@ -38,6 +38,7 @@ DownloadShardState::DownloadShardState(BlockIdExt block_id, BlockIdExt mastercha } void DownloadShardState::start_up() { + status_ = ProcessStatus(manager_, "process.download_state"); alarm_timestamp() = timeout_; auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result R) { @@ -81,6 +82,7 @@ void DownloadShardState::download_state() { }); td::actor::send_closure(manager_, &ValidatorManager::send_get_block_proof_link_request, block_id_, priority_, std::move(P)); + status_.set_status(PSTRING() << block_id_.id.to_str() << " : downloading proof"); } void DownloadShardState::downloaded_proof_link(td::BufferSlice data) { @@ -123,6 +125,7 @@ void DownloadShardState::checked_proof_link() { td::actor::send_closure(manager_, &ValidatorManager::send_get_persistent_state_request, block_id_, masterchain_block_id_, priority_, std::move(P)); } + status_.set_status(PSTRING() << block_id_.id.to_str() << " : downloading state"); } void DownloadShardState::download_zero_state() { @@ -152,6 +155,7 @@ void DownloadShardState::downloaded_zero_state(td::BufferSlice data) { } void DownloadShardState::downloaded_shard_state(td::BufferSlice data) { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : processing downloaded state"); auto S = create_shard_state(block_id_, data.clone()); if (S.is_error()) { fail_handler(actor_id(this), S.move_as_error()); @@ -174,6 +178,7 @@ void DownloadShardState::downloaded_shard_state(td::BufferSlice data) { } void DownloadShardState::checked_shard_state() { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : storing state file"); LOG(WARNING) << "checked shard state " << block_id_.to_str(); auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result R) { R.ensure(); @@ -189,6 +194,7 @@ void DownloadShardState::checked_shard_state() { } void DownloadShardState::written_shard_state_file() { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : storing state to celldb"); LOG(WARNING) << "written shard state file " << block_id_.to_str(); auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { R.ensure(); @@ -198,6 +204,7 @@ void DownloadShardState::written_shard_state_file() { } void DownloadShardState::written_shard_state(td::Ref state) { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : finishing"); state_ = std::move(state); handle_->set_unix_time(state_->get_unix_time()); handle_->set_is_key_block(block_id_.is_masterchain()); diff --git a/validator/downloaders/download-state.hpp b/validator/downloaders/download-state.hpp index 02984c53..bde80aae 100644 --- a/validator/downloaders/download-state.hpp +++ b/validator/downloaders/download-state.hpp @@ -19,6 +19,7 @@ #pragma once #include "validator/interfaces/validator-manager.h" +#include "stats-provider.h" namespace ton { @@ -67,6 +68,8 @@ class DownloadShardState : public td::actor::Actor { td::BufferSlice data_; td::Ref state_; + + ProcessStatus status_; }; } // namespace validator diff --git a/validator/impl/collator-impl.h b/validator/impl/collator-impl.h index a781968d..ce21bc5e 100644 --- a/validator/impl/collator-impl.h +++ b/validator/impl/collator-impl.h @@ -50,7 +50,7 @@ class Collator final : public td::actor::Actor { using LtCellRef = block::LtCellRef; using NewOutMsg = block::NewOutMsg; const ShardIdFull shard_; - ton::BlockId new_id; + ton::BlockId new_id{workchainInvalid, 0, 0}; bool busy_{false}; bool before_split_{false}; bool after_split_{false}; diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index d3378cd8..d5c41853 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -352,6 +352,8 @@ bool Collator::fatal_error(td::Status error) { attempt_idx_ + 1); } else { main_promise(std::move(error)); + td::actor::send_closure(manager, &ValidatorManager::record_collate_query_stats, BlockIdExt{new_id, RootHash::zero(), FileHash::zero()}, + work_timer_.elapsed(), cpu_work_timer_.elapsed(), td::optional{}); } busy_ = false; } diff --git a/validator/impl/liteserver.cpp b/validator/impl/liteserver.cpp index 723dbfe9..83f39c45 100644 --- a/validator/impl/liteserver.cpp +++ b/validator/impl/liteserver.cpp @@ -85,19 +85,13 @@ void LiteQuery::abort_query(td::Status reason) { if (acc_state_promise_) { acc_state_promise_.set_error(std::move(reason)); } else if (promise_) { + td::actor::send_closure(manager_, &ValidatorManager::add_lite_query_stats, query_obj_ ? query_obj_->get_id() : 0, + false); promise_.set_error(std::move(reason)); } stop(); } -void LiteQuery::abort_query_ext(td::Status reason, std::string comment) { - LOG(INFO) << "aborted liteserver query: " << comment << " : " << reason.to_string(); - if (promise_) { - promise_.set_error(reason.move_as_error_prefix(comment + " : ")); - } - stop(); -} - bool LiteQuery::fatal_error(td::Status error) { abort_query(std::move(error)); return false; @@ -120,6 +114,8 @@ bool LiteQuery::finish_query(td::BufferSlice result, bool skip_cache_update) { td::actor::send_closure(cache_, &LiteServerCache::update, cache_key_, result.clone()); } if (promise_) { + td::actor::send_closure(manager_, &ValidatorManager::add_lite_query_stats, query_obj_ ? query_obj_->get_id() : 0, + true); promise_.set_result(std::move(result)); stop(); return true; @@ -139,7 +135,6 @@ void LiteQuery::start_up() { auto F = fetch_tl_object(query_, true); if (F.is_error()) { - td::actor::send_closure(manager_, &ValidatorManager::add_lite_query_stats, 0); // unknown abort_query(F.move_as_error()); return; } @@ -192,7 +187,6 @@ bool LiteQuery::use_cache() { } void LiteQuery::perform() { - td::actor::send_closure(manager_, &ValidatorManager::add_lite_query_stats, query_obj_->get_id()); lite_api::downcast_call( *query_obj_, td::overloaded( diff --git a/validator/impl/liteserver.hpp b/validator/impl/liteserver.hpp index 447e1dad..fc873533 100644 --- a/validator/impl/liteserver.hpp +++ b/validator/impl/liteserver.hpp @@ -97,7 +97,6 @@ class LiteQuery : public td::actor::Actor { bool fatal_error(std::string err_msg, int err_code = -400); bool fatal_error(int err_code, std::string err_msg = ""); void abort_query(td::Status reason); - void abort_query_ext(td::Status reason, std::string err_msg); bool finish_query(td::BufferSlice result, bool skip_cache_update = false); void alarm() override; void start_up() override; diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index 583b1d86..31c30e90 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -115,7 +115,7 @@ bool ValidateQuery::reject_query(std::string error, td::BufferSlice reason) { error = error_ctx() + error; LOG(ERROR) << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error; if (main_promise) { - record_stats(); + record_stats(false); errorlog::ErrorLog::log(PSTRING() << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error << ": data=" << block_candidate.id.file_hash.to_hex() << " collated_data=" << block_candidate.collated_file_hash.to_hex()); @@ -153,7 +153,7 @@ bool ValidateQuery::soft_reject_query(std::string error, td::BufferSlice reason) error = error_ctx() + error; LOG(ERROR) << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error; if (main_promise) { - record_stats(); + record_stats(false); errorlog::ErrorLog::log(PSTRING() << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error << ": data=" << block_candidate.id.file_hash.to_hex() << " collated_data=" << block_candidate.collated_file_hash.to_hex()); @@ -176,7 +176,7 @@ bool ValidateQuery::fatal_error(td::Status error) { error.ensure_error(); LOG(ERROR) << "aborting validation of block candidate for " << shard_.to_str() << " : " << error.to_string(); if (main_promise) { - record_stats(); + record_stats(false); auto c = error.code(); if (c <= -667 && c >= -670) { errorlog::ErrorLog::log(PSTRING() << "FATAL ERROR: aborting validation of block candidate for " << shard_.to_str() @@ -234,7 +234,7 @@ bool ValidateQuery::fatal_error(std::string err_msg, int err_code) { */ void ValidateQuery::finish_query() { if (main_promise) { - record_stats(); + record_stats(true); LOG(WARNING) << "validate query done"; main_promise.set_result(now_); } @@ -6928,13 +6928,13 @@ void ValidateQuery::written_candidate() { /** * Sends validation work time to manager. */ -void ValidateQuery::record_stats() { +void ValidateQuery::record_stats(bool success) { double work_time = work_timer_.elapsed(); double cpu_work_time = cpu_work_timer_.elapsed(); LOG(WARNING) << "validation took " << perf_timer_.elapsed() << "s"; LOG(WARNING) << "Validate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s"; td::actor::send_closure(manager, &ValidatorManager::record_validate_query_stats, block_candidate.id, work_time, - cpu_work_time); + cpu_work_time, success); } } // namespace validator diff --git a/validator/impl/validate-query.hpp b/validator/impl/validate-query.hpp index 98cd2493..90c368ff 100644 --- a/validator/impl/validate-query.hpp +++ b/validator/impl/validate-query.hpp @@ -400,7 +400,7 @@ class ValidateQuery : public td::actor::Actor { td::Timer work_timer_{true}; td::ThreadCpuTimer cpu_work_timer_{true}; - void record_stats(); + void record_stats(bool success); }; } // namespace validator diff --git a/validator/interfaces/validator-manager.h b/validator/interfaces/validator-manager.h index 20d4bd62..00fb77e1 100644 --- a/validator/interfaces/validator-manager.h +++ b/validator/interfaces/validator-manager.h @@ -205,13 +205,13 @@ class ValidatorManager : public ValidatorManagerInterface { td::optional shard, td::Promise> promise) = 0; - virtual void add_lite_query_stats(int lite_query_id) { + virtual void add_lite_query_stats(int lite_query_id, bool success) { } virtual void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, - CollationStats stats) { + td::optional stats) { } - virtual void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { + virtual void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, bool success) { } virtual void add_persistent_state_description(td::Ref desc) = 0; diff --git a/validator/manager-init.cpp b/validator/manager-init.cpp index c2944b25..6f304680 100644 --- a/validator/manager-init.cpp +++ b/validator/manager-init.cpp @@ -32,6 +32,8 @@ namespace ton { namespace validator { void ValidatorManagerMasterchainReiniter::start_up() { + status_ = ProcessStatus(manager_, "process.initial_sync"); + status_.set_status(PSTRING() << "starting, init block seqno " << block_id_.seqno()); LOG(INFO) << "init_block_id=" << block_id_; CHECK(block_id_.is_masterchain()); CHECK(block_id_.id.shard == shardIdAll); @@ -58,6 +60,7 @@ void ValidatorManagerMasterchainReiniter::got_masterchain_handle(BlockHandle han key_blocks_.push_back(handle_); if (opts_->initial_sync_disabled()) { + status_.set_status(PSTRING() << "downloading masterchain state " << handle_->id().seqno()); auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { R.ensure(); td::actor::send_closure(SelfId, &ValidatorManagerMasterchainReiniter::download_masterchain_state); @@ -181,6 +184,7 @@ void ValidatorManagerMasterchainReiniter::got_next_key_blocks(std::vector(key_blocks_.size()); key_blocks_.resize(key_blocks_.size() + vec.size(), nullptr); @@ -247,6 +251,7 @@ void ValidatorManagerMasterchainReiniter::choose_masterchain_state() { } void ValidatorManagerMasterchainReiniter::download_masterchain_state() { + status_.set_status(PSTRING() << "downloading masterchain state " << block_id_.seqno()); auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { if (R.is_error()) { LOG(WARNING) << "failed to download masterchain state: " << R.move_as_error(); @@ -274,6 +279,7 @@ void ValidatorManagerMasterchainReiniter::downloaded_masterchain_state(td::Ref("shardclient", opts_, handle_, state_, manager_, std::move(P)); + status_.set_status(PSTRING() << "downloading all shard states, mc seqno " << block_id_.seqno()); } void ValidatorManagerMasterchainReiniter::downloaded_all_shards() { diff --git a/validator/manager-init.hpp b/validator/manager-init.hpp index 7dce4e47..901b826b 100644 --- a/validator/manager-init.hpp +++ b/validator/manager-init.hpp @@ -27,6 +27,8 @@ #include "manager-init.h" +#include + namespace ton { namespace validator { @@ -77,6 +79,8 @@ class ValidatorManagerMasterchainReiniter : public td::actor::Actor { td::uint32 pending_ = 0; td::actor::ActorOwn client_; + + ProcessStatus status_; }; class ValidatorManagerMasterchainStarter : public td::actor::Actor { diff --git a/validator/manager.cpp b/validator/manager.cpp index 068ea5eb..8dce764d 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -451,11 +451,9 @@ void ValidatorManagerImpl::check_external_message(td::BufferSlice data, td::Prom promise = [self = this, wc, addr, promise = std::move(promise), SelfId = actor_id(this)](td::Result> R) mutable { - if (R.is_error()) { - promise.set_error(R.move_as_error()); - return; - } - td::actor::send_lambda(SelfId, [=, promise = std::move(promise), message = R.move_as_ok()]() mutable { + td::actor::send_lambda(SelfId, [=, promise = std::move(promise), R = std::move(R)]() mutable { + ++(R.is_ok() ? self->total_check_ext_messages_ok_ : self->total_check_ext_messages_error_); + TRY_RESULT_PROMISE(promise, message, std::move(R)); if (self->checked_ext_msg_counter_.inc_msg_count(wc, addr) > max_ext_msg_per_addr()) { promise.set_error( td::Status::Error(PSTRING() << "too many external messages to address " << wc << ":" << addr.to_hex())); @@ -2131,7 +2129,7 @@ void ValidatorManagerImpl::update_shards() { } } - bool validating_masterchain = false; + active_validator_groups_master_ = active_validator_groups_shard_ = 0; if (allow_validate_) { for (auto &desc : new_shards) { auto shard = desc.first; @@ -2148,9 +2146,7 @@ void ValidatorManagerImpl::update_shards() { auto validator_id = get_validator(shard, val_set); if (!validator_id.is_zero()) { - if (shard.is_masterchain()) { - validating_masterchain = true; - } + ++(shard.is_masterchain() ? active_validator_groups_master_ : active_validator_groups_shard_); auto val_group_id = get_validator_set_id(shard, val_set, opts_hash, key_seqno, opts); if (force_recover) { @@ -2845,8 +2841,8 @@ void ValidatorManagerImpl::prepare_stats(td::Promiseid().to_str()); vec.emplace_back("rotatemasterchainblock", last_rotate_block_id_.to_str()); //vec.emplace_back("shardclientmasterchainseqno", td::to_string(min_confirmed_masterchain_seqno_)); - vec.emplace_back("stateserializermasterchainseqno", td::to_string(state_serializer_masterchain_seqno_)); } + td::NamedThreadSafeCounter::get_default().for_each([&](auto key, auto value) { vec.emplace_back("counter." + key, PSTRING() << value); }); @@ -2864,9 +2860,48 @@ void ValidatorManagerImpl::prepare_stats(td::Promiseget_state_serializer_enabled(); + if (is_validator() && last_masterchain_state_->get_global_id() == -239) { + serializer_enabled = false; + } + vec.emplace_back("stateserializerenabled", serializer_enabled ? "true" : "false"); + merger.make_promise("").set_value(std::move(vec)); + if (!serializer_.empty()) { + td::actor::send_closure(serializer_, &AsyncStateSerializer::prepare_stats, merger.make_promise("")); + } + td::actor::send_closure(db_, &Db::prepare_stats, merger.make_promise("db.")); + for (auto &[_, p] : stats_providers_) { + p.second(merger.make_promise(p.first)); + } } void ValidatorManagerImpl::prepare_perf_timer_stats(td::Promise> promise) { @@ -3353,17 +3388,28 @@ td::actor::ActorOwn ValidatorManagerFactory::create( } void ValidatorManagerImpl::record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, - CollationStats stats) { + td::optional stats) { + if (!stats) { + ++(block_id.is_masterchain() ? total_collated_blocks_master_error_ : total_collated_blocks_shard_error_); + return; + } auto &record = new_block_stats_record(block_id); record.collator_work_time_ = work_time; record.collator_cpu_work_time_ = cpu_work_time; - record.collator_stats_ = std::move(stats); + record.collator_stats_ = std::move(stats.value()); + ++(block_id.is_masterchain() ? total_collated_blocks_master_ok_ : total_collated_blocks_shard_ok_); } -void ValidatorManagerImpl::record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { +void ValidatorManagerImpl::record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, + bool success) { auto &record = new_block_stats_record(block_id); record.validator_work_time_ = work_time; record.validator_cpu_work_time_ = cpu_work_time; + if (success) { + ++(block_id.is_masterchain() ? total_validated_blocks_master_ok_ : total_validated_blocks_shard_ok_); + } else { + ++(block_id.is_masterchain() ? total_validated_blocks_master_error_ : total_validated_blocks_shard_error_); + } } ValidatorManagerImpl::RecordedBlockStats &ValidatorManagerImpl::new_block_stats_record(BlockIdExt block_id) { @@ -3377,6 +3423,16 @@ ValidatorManagerImpl::RecordedBlockStats &ValidatorManagerImpl::new_block_stats_ return recorded_block_stats_[block_id]; } +void ValidatorManagerImpl::register_stats_provider( + td::uint64 idx, std::string prefix, + std::function>>)> callback) { + stats_providers_[idx] = {std::move(prefix), std::move(callback)}; +} + +void ValidatorManagerImpl::unregister_stats_provider(td::uint64 idx) { + stats_providers_.erase(idx); +} + size_t ValidatorManagerImpl::CheckedExtMsgCounter::get_msg_count(WorkchainId wc, StdSmcAddress addr) { before_query(); auto it1 = counter_cur_.find({wc, addr}); diff --git a/validator/manager.hpp b/validator/manager.hpp index 519cab12..9e54c3f3 100644 --- a/validator/manager.hpp +++ b/validator/manager.hpp @@ -655,8 +655,9 @@ class ValidatorManagerImpl : public ValidatorManager { td::optional shard, td::Promise> promise) override; - void add_lite_query_stats(int lite_query_id) override { + void add_lite_query_stats(int lite_query_id, bool success) override { ++ls_stats_[lite_query_id]; + ++(success ? total_ls_queries_ok_ : total_ls_queries_error_)[lite_query_id]; } private: @@ -747,6 +748,16 @@ class ValidatorManagerImpl : public ValidatorManager { std::map ls_stats_; // lite_api ID -> count, 0 for unknown td::uint32 ls_stats_check_ext_messages_{0}; + UnixTime started_at_ = (UnixTime)td::Clocks::system(); + std::map total_ls_queries_ok_, total_ls_queries_error_; // lite_api ID -> count, 0 for unknown + td::uint64 total_check_ext_messages_ok_{0}, total_check_ext_messages_error_{0}; + td::uint64 total_collated_blocks_master_ok_{0}, total_collated_blocks_master_error_{0}; + td::uint64 total_validated_blocks_master_ok_{0}, total_validated_blocks_master_error_{0}; + td::uint64 total_collated_blocks_shard_ok_{0}, total_collated_blocks_shard_error_{0}; + td::uint64 total_validated_blocks_shard_ok_{0}, total_validated_blocks_shard_error_{0}; + + size_t active_validator_groups_master_{0}, active_validator_groups_shard_{0}; + td::actor::ActorOwn candidates_buffer_; struct RecordedBlockStats { @@ -760,16 +771,25 @@ class ValidatorManagerImpl : public ValidatorManager { std::queue recorded_block_stats_lru_; void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, - CollationStats stats) override; - void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) override; + td::optional stats) override; + void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, bool success) override; RecordedBlockStats &new_block_stats_record(BlockIdExt block_id); + void register_stats_provider( + td::uint64 idx, std::string prefix, + std::function>>)> callback) override; + void unregister_stats_provider(td::uint64 idx) override; + std::map> validator_telemetry_; void init_validator_telemetry(); std::map> persistent_state_descriptions_; std::map> persistent_state_blocks_; + + std::map>>)>>> + stats_providers_; }; } // namespace validator diff --git a/validator/net/download-state.cpp b/validator/net/download-state.cpp index 2b373ef3..6735a2b5 100644 --- a/validator/net/download-state.cpp +++ b/validator/net/download-state.cpp @@ -70,6 +70,7 @@ void DownloadState::finish_query() { } void DownloadState::start_up() { + status_ = ProcessStatus(validator_manager_, "process.download_state_net"); alarm_timestamp() = timeout_; td::actor::send_closure(validator_manager_, &ValidatorManagerInterface::get_persistent_state, block_id_, @@ -190,6 +191,7 @@ void DownloadState::got_block_state_description(td::BufferSlice data) { td::Timestamp::in(3.0), std::move(P)); } })); + status_.set_status(PSTRING() << block_id_.id.to_str() << " : 0 bytes, 0B/s"); } void DownloadState::got_block_state_part(td::BufferSlice data, td::uint32 requested_size) { @@ -198,14 +200,18 @@ void DownloadState::got_block_state_part(td::BufferSlice data, td::uint32 reques parts_.push_back(std::move(data)); double elapsed = prev_logged_timer_.elapsed(); - if (elapsed > 10.0) { + if (elapsed > 5.0) { prev_logged_timer_ = td::Timer(); + auto speed = (td::uint64)((double)(sum_ - prev_logged_sum_) / elapsed); LOG(WARNING) << "downloading state " << block_id_.to_str() << ": " << td::format::as_size(sum_) << " (" - << td::format::as_size((td::uint64)(double(sum_ - prev_logged_sum_) / elapsed)) << "/s)"; + << td::format::as_size(speed) << "/s)"; + status_.set_status(PSTRING() << block_id_.id.to_str() << " : " << sum_ << " bytes, " << td::format::as_size(speed) + << "/s"); prev_logged_sum_ = sum_; } if (last_part) { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : " << sum_ << " bytes, finishing"); td::BufferSlice res{td::narrow_cast(sum_)}; auto S = res.as_slice(); for (auto &p : parts_) { diff --git a/validator/net/download-state.hpp b/validator/net/download-state.hpp index 19c44beb..470c5431 100644 --- a/validator/net/download-state.hpp +++ b/validator/net/download-state.hpp @@ -23,6 +23,8 @@ #include "validator/validator.h" #include "adnl/adnl-ext-client.h" +#include + namespace ton { namespace validator { @@ -75,6 +77,8 @@ class DownloadState : public td::actor::Actor { td::uint64 prev_logged_sum_ = 0; td::Timer prev_logged_timer_; + + ProcessStatus status_; }; } // namespace fullnode diff --git a/validator/state-serializer.cpp b/validator/state-serializer.cpp index b693232b..bc3d7b5e 100644 --- a/validator/state-serializer.cpp +++ b/validator/state-serializer.cpp @@ -58,6 +58,12 @@ void AsyncStateSerializer::got_self_state(AsyncSerializerState state) { }); td::actor::send_closure(manager_, &ValidatorManager::get_block_handle, last_block_id_, true, std::move(P)); } + + inited_block_id_ = true; + for (auto& promise : wait_init_block_id_) { + promise.set_value(td::Unit()); + } + wait_init_block_id_.clear(); } void AsyncStateSerializer::got_init_handle(BlockHandle handle) { @@ -186,6 +192,9 @@ void AsyncStateSerializer::next_iteration() { td::actor::send_closure(SelfId, &AsyncStateSerializer::request_previous_state_files); }, td::Timestamp::in(delay)); + current_status_ = PSTRING() << "delay before serializing seqno=" << masterchain_handle_->id().seqno() << " " + << (int)delay << "s"; + current_status_ts_ = td::Timestamp::now(); return; } if (next_idx_ < shards_.size()) { @@ -379,9 +388,14 @@ void AsyncStateSerializer::got_masterchain_state(td::Ref state td::actor::send_closure(manager_, &ValidatorManager::store_persistent_state_file_gen, masterchain_handle_->id(), masterchain_handle_->id(), write_data, std::move(P)); + + current_status_ = PSTRING() << "serializing masterchain state " << state->get_block_id().id.to_str(); + current_status_ts_ = td::Timestamp::now(); } void AsyncStateSerializer::stored_masterchain_state() { + current_status_ = "pending"; + current_status_ts_ = {}; LOG(ERROR) << "finished serializing masterchain state " << masterchain_handle_->id().id.to_str(); running_ = false; next_iteration(); @@ -444,9 +458,14 @@ void AsyncStateSerializer::got_shard_state(BlockHandle handle, td::Refid(), masterchain_handle_->id(), write_data, std::move(P)); + current_status_ = PSTRING() << "serializing shard state " << next_idx_ << "/" << shards_.size() << " " + << state->get_block_id().id.to_str(); + current_status_ts_ = td::Timestamp::now(); } void AsyncStateSerializer::fail_handler(td::Status reason) { + current_status_ = PSTRING() << "pending, " << reason; + current_status_ts_ = {}; VLOG(VALIDATOR_NOTICE) << "failure: " << reason; attempt_++; delay_action( @@ -460,6 +479,8 @@ void AsyncStateSerializer::fail_handler_cont() { } void AsyncStateSerializer::success_handler() { + current_status_ = "pending"; + current_status_ts_ = {}; running_ = false; next_iteration(); } @@ -478,6 +499,29 @@ void AsyncStateSerializer::auto_disable_serializer(bool disabled) { } } +void AsyncStateSerializer::prepare_stats(td::Promise>> promise) { + if (!inited_block_id_) { + wait_init_block_id_.push_back( + [SelfId = actor_id(this), promise = std::move(promise)](td::Result R) mutable { + TRY_STATUS_PROMISE(promise, R.move_as_status()); + td::actor::send_closure(SelfId, &AsyncStateSerializer::prepare_stats, std::move(promise)); + }); + return; + } + std::vector> vec; + vec.emplace_back("stateserializermasterchainseqno", td::to_string(last_block_id_.seqno())); + td::StringBuilder sb; + sb << current_status_; + if (current_status_ts_) { + sb << " (started " << (int)(td::Timestamp::now() - current_status_ts_) << "s ago)"; + } + if (!opts_->get_state_serializer_enabled() || auto_disabled_) { + sb << " (disabled)"; + } + vec.emplace_back("stateserializerstatus", sb.as_cslice().str()); + promise.set_result(std::move(vec)); +} + bool AsyncStateSerializer::need_serialize(BlockHandle handle) { if (handle->id().id.seqno == 0 || !handle->is_key_block()) { return false; diff --git a/validator/state-serializer.hpp b/validator/state-serializer.hpp index 1e7f5c9c..406ac350 100644 --- a/validator/state-serializer.hpp +++ b/validator/state-serializer.hpp @@ -36,6 +36,9 @@ class AsyncStateSerializer : public td::actor::Actor { UnixTime last_key_block_ts_ = 0; bool saved_to_db_ = true; + bool inited_block_id_ = false; + std::vector> wait_init_block_id_; + td::Ref opts_; bool auto_disabled_ = false; td::CancellationTokenSource cancellation_token_source_; @@ -95,6 +98,8 @@ class AsyncStateSerializer : public td::actor::Actor { promise.set_result(last_block_id_.id.seqno); } + void prepare_stats(td::Promise>> promise); + void update_last_known_key_block_ts(UnixTime ts) { last_known_key_block_ts_ = std::max(last_known_key_block_ts_, ts); } @@ -111,6 +116,9 @@ class AsyncStateSerializer : public td::actor::Actor { void update_options(td::Ref opts); void auto_disable_serializer(bool disabled); + + std::string current_status_ = "pending"; + td::Timestamp current_status_ts_ = td::Timestamp::never(); }; } // namespace validator diff --git a/validator/stats-provider.h b/validator/stats-provider.h new file mode 100644 index 00000000..e0a7f565 --- /dev/null +++ b/validator/stats-provider.h @@ -0,0 +1,105 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "validator.h" +#include "common/AtomicRef.h" + +#include + +namespace ton { + +namespace validator { + +class StatsProvider { + public: + StatsProvider() = default; + StatsProvider(td::actor::ActorId manager, std::string prefix, + std::function>>)> callback) + : inited_(true), manager_(std::move(manager)) { + static std::atomic cur_idx{0}; + idx_ = cur_idx.fetch_add(1); + td::actor::send_closure(manager_, &ValidatorManagerInterface::register_stats_provider, idx_, std::move(prefix), + std::move(callback)); + } + StatsProvider(const StatsProvider&) = delete; + StatsProvider(StatsProvider&& other) noexcept + : inited_(other.inited_), idx_(other.idx_), manager_(std::move(other.manager_)) { + other.inited_ = false; + } + ~StatsProvider() { + if (inited_) { + td::actor::send_closure(manager_, &ValidatorManagerInterface::unregister_stats_provider, idx_); + } + } + + StatsProvider& operator=(const StatsProvider&) = delete; + StatsProvider& operator=(StatsProvider&& other) noexcept { + if (this != &other) { + inited_ = other.inited_; + idx_ = other.idx_; + manager_ = std::move(other.manager_); + other.inited_ = false; + } + return *this; + } + + bool inited() const { + return inited_; + } + + private: + bool inited_ = false; + td::uint64 idx_ = 0; + td::actor::ActorId manager_; +}; + +class ProcessStatus { + public: + ProcessStatus() = default; + ProcessStatus(td::actor::ActorId manager, std::string name) + : stats_provider_(std::move(manager), std::move(name), [value = value_](auto promise) { + auto status = value->load(); + if (status.is_null()) { + promise.set_error(td::Status::Error("empty")); + return; + } + std::vector> vec; + vec.emplace_back("", *status); + promise.set_value(std::move(vec)); + }) { + } + ProcessStatus(const ProcessStatus&) = delete; + ProcessStatus(ProcessStatus&& other) noexcept = default; + ProcessStatus& operator=(const ProcessStatus&) = delete; + ProcessStatus& operator=(ProcessStatus&& other) noexcept = default; + + void set_status(std::string s) { + if (!value_) { + return; + } + value_->store(td::Ref>(true, std::move(s))); + } + + private: + std::shared_ptr>> value_ = std::make_shared>>(); + StatsProvider stats_provider_; +}; + +} // namespace validator + +} // namespace ton diff --git a/validator/validator.h b/validator/validator.h index 73065aa9..42b3f69a 100644 --- a/validator/validator.h +++ b/validator/validator.h @@ -20,6 +20,7 @@ #include #include +#include #include "td/actor/actor.h" @@ -292,6 +293,13 @@ class ValidatorManagerInterface : public td::actor::Actor { virtual void get_out_msg_queue_size(BlockIdExt block_id, td::Promise promise) = 0; virtual void update_options(td::Ref opts) = 0; + + virtual void register_stats_provider( + td::uint64 idx, std::string prefix, + std::function>>)> callback) { + } + virtual void unregister_stats_provider(td::uint64 idx) { + } }; } // namespace validator From aca51a8dae297c6b853860af9f0565be3d25527d Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Mon, 17 Feb 2025 10:14:12 +0300 Subject: [PATCH 47/61] Don't check external messages if out of sync --- validator/manager.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/validator/manager.cpp b/validator/manager.cpp index 8dce764d..2cc38211 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -430,6 +430,10 @@ void ValidatorManagerImpl::add_external_message(td::Ref msg, int pri ext_messages_hashes_[id.hash] = {priority, id}; } void ValidatorManagerImpl::check_external_message(td::BufferSlice data, td::Promise> promise) { + if (!started_) { + promise.set_error(td::Status::Error(ErrorCode::notready, "node not synced")); + return; + } auto state = do_get_last_liteserver_state(); if (state.is_null()) { promise.set_error(td::Status::Error(ErrorCode::notready, "not ready")); From 04f2bc13605e0189346f041baa515fbccab729f4 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Wed, 19 Feb 2025 12:44:50 +0300 Subject: [PATCH 48/61] Fix downloading persistent states in WaitBlockState --- validator/manager.cpp | 25 ++++++++++++++++--------- validator/manager.hpp | 2 +- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/validator/manager.cpp b/validator/manager.cpp index 2cc38211..b0ac5409 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -700,11 +700,10 @@ void ValidatorManagerImpl::wait_block_state(BlockHandle handle, td::uint32 prior auto P = td::PromiseCreator::lambda([SelfId = actor_id(this), handle](td::Result> R) { td::actor::send_closure(SelfId, &ValidatorManagerImpl::finished_wait_state, handle, std::move(R)); }); - auto id = - td::actor::create_actor("waitstate", handle, priority, actor_id(this), - td::Timestamp::at(timeout.at() + 10.0), std::move(P), - get_block_persistent_state(handle->id())) - .release(); + auto id = td::actor::create_actor("waitstate", handle, priority, actor_id(this), + td::Timestamp::at(timeout.at() + 10.0), std::move(P), + get_block_persistent_state_to_download(handle->id())) + .release(); wait_state_[handle->id()].actor_ = id; it = wait_state_.find(handle->id()); } @@ -1150,9 +1149,10 @@ void ValidatorManagerImpl::finished_wait_state(BlockHandle handle, td::Result> R) { td::actor::send_closure(SelfId, &ValidatorManagerImpl::finished_wait_state, handle, std::move(R)); }); - auto id = td::actor::create_actor("waitstate", handle, X.second, actor_id(this), X.first, - std::move(P), get_block_persistent_state(handle->id())) - .release(); + auto id = + td::actor::create_actor("waitstate", handle, X.second, actor_id(this), X.first, + std::move(P), get_block_persistent_state_to_download(handle->id())) + .release(); it->second.actor_ = id; return; } @@ -3375,11 +3375,18 @@ void ValidatorManagerImpl::got_persistent_state_descriptions(std::vector ValidatorManagerImpl::get_block_persistent_state(BlockIdExt block_id) { +td::Ref ValidatorManagerImpl::get_block_persistent_state_to_download(BlockIdExt block_id) { + if (block_id.is_masterchain()) { + return {}; + } auto it = persistent_state_blocks_.find(block_id); if (it == persistent_state_blocks_.end()) { return {}; } + if (it->second->masterchain_id.seqno() + 16 >= min_confirmed_masterchain_seqno_) { + // Do not download persistent states during ordinary shard client sync + return {}; + } return it->second; } diff --git a/validator/manager.hpp b/validator/manager.hpp index 9e54c3f3..418deb35 100644 --- a/validator/manager.hpp +++ b/validator/manager.hpp @@ -734,7 +734,7 @@ class ValidatorManagerImpl : public ValidatorManager { void got_persistent_state_descriptions(std::vector> descs); void add_persistent_state_description_impl(td::Ref desc); - td::Ref get_block_persistent_state(BlockIdExt block_id); + td::Ref get_block_persistent_state_to_download(BlockIdExt block_id); private: bool need_monitor(ShardIdFull shard) const { From 8a08bf67a24519fcaef293d21266612c673761fa Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 20 Feb 2025 17:32:24 +0300 Subject: [PATCH 49/61] Experimental flags for speeding up broadcasts --- catchain/catchain-receiver.cpp | 6 ++-- overlay/overlay-fec.cpp | 12 ++++--- overlay/overlay-fec.hpp | 5 +-- overlay/overlay.cpp | 6 ++-- overlay/overlays.h | 1 + ton/ton-types.h | 1 + validator-engine/validator-engine.cpp | 43 ++++++++++++++++++++++++- validator-engine/validator-engine.hpp | 12 +++++++ validator/full-node-private-overlay.cpp | 16 ++++++--- validator/full-node-private-overlay.hpp | 16 ++++----- validator/full-node-shard.cpp | 13 ++++---- validator/full-node-shard.h | 2 +- validator/full-node-shard.hpp | 6 ++-- validator/full-node.cpp | 16 ++++----- validator/full-node.h | 8 ++++- validator/full-node.hpp | 4 +-- validator/validator-group.cpp | 1 + validator/validator-options.hpp | 7 ++++ validator/validator.h | 2 ++ 19 files changed, 131 insertions(+), 46 deletions(-) diff --git a/catchain/catchain-receiver.cpp b/catchain/catchain-receiver.cpp index edef9065..a6160383 100644 --- a/catchain/catchain-receiver.cpp +++ b/catchain/catchain-receiver.cpp @@ -526,10 +526,12 @@ void CatChainReceiverImpl::start_up() { for (td::uint32 i = 0; i < get_sources_cnt(); i++) { root_keys.emplace(get_source(i)->get_hash(), OVERLAY_MAX_ALLOWED_PACKET_SIZE); } - td::actor::send_closure(overlay_manager_, &overlay::Overlays::create_private_overlay, + overlay::OverlayOptions overlay_options; + overlay_options.broadcast_speed_multiplier_ = opts_.broadcast_speed_multiplier; + td::actor::send_closure(overlay_manager_, &overlay::Overlays::create_private_overlay_ex, get_source(local_idx_)->get_adnl_id(), overlay_full_id_.clone(), std::move(ids), make_callback(), overlay::OverlayPrivacyRules{0, 0, std::move(root_keys)}, - R"({ "type": "catchain" })"); + R"({ "type": "catchain" })", std::move(overlay_options)); CHECK(root_block_); diff --git a/overlay/overlay-fec.cpp b/overlay/overlay-fec.cpp index b29fce22..817d3b7c 100644 --- a/overlay/overlay-fec.cpp +++ b/overlay/overlay-fec.cpp @@ -32,7 +32,7 @@ void OverlayOutboundFecBroadcast::alarm() { fec_type_.size(), flags_, std::move(X.data), X.id, fec_type_, date_); } - alarm_timestamp() = td::Timestamp::in(0.010); + alarm_timestamp() = td::Timestamp::in(delay_); if (seqno_ >= to_send_) { stop(); @@ -46,8 +46,9 @@ void OverlayOutboundFecBroadcast::start_up() { OverlayOutboundFecBroadcast::OverlayOutboundFecBroadcast(td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, - PublicKeyHash local_id) + PublicKeyHash local_id, double speed_multiplier) : flags_(flags) { + delay_ /= speed_multiplier; CHECK(data.size() <= (1 << 27)); local_id_ = local_id; overlay_ = std::move(overlay); @@ -63,9 +64,10 @@ OverlayOutboundFecBroadcast::OverlayOutboundFecBroadcast(td::BufferSlice data, t } td::actor::ActorId OverlayOutboundFecBroadcast::create( - td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, PublicKeyHash local_id) { - return td::actor::create_actor(td::actor::ActorOptions().with_name("bcast"), - std::move(data), flags, overlay, local_id) + td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, PublicKeyHash local_id, + double speed_multiplier) { + return td::actor::create_actor( + td::actor::ActorOptions().with_name("bcast"), std::move(data), flags, overlay, local_id, speed_multiplier) .release(); } diff --git a/overlay/overlay-fec.hpp b/overlay/overlay-fec.hpp index a9cc3634..b72e830e 100644 --- a/overlay/overlay-fec.hpp +++ b/overlay/overlay-fec.hpp @@ -37,6 +37,7 @@ class OverlayOutboundFecBroadcast : public td::actor::Actor { PublicKeyHash local_id_; Overlay::BroadcastDataHash data_hash_; td::uint32 flags_ = 0; + double delay_ = 0.010; td::int32 date_; std::unique_ptr encoder_; td::actor::ActorId overlay_; @@ -45,9 +46,9 @@ class OverlayOutboundFecBroadcast : public td::actor::Actor { public: static td::actor::ActorId create(td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, - PublicKeyHash local_id); + PublicKeyHash local_id, double speed_multiplier = 1.0); OverlayOutboundFecBroadcast(td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, - PublicKeyHash local_id); + PublicKeyHash local_id, double speed_multiplier = 1.0); void alarm() override; void start_up() override; diff --git a/overlay/overlay.cpp b/overlay/overlay.cpp index 429c6a9c..30a40b1c 100644 --- a/overlay/overlay.cpp +++ b/overlay/overlay.cpp @@ -63,7 +63,7 @@ td::actor::ActorOwn Overlay::create_private( return td::actor::create_actor( overlay_actor_name(overlay_id), keyring, adnl, manager, dht_node, local_id, std::move(overlay_id), OverlayType::FixedMemberList, std::move(nodes), std::vector(), OverlayMemberCertificate{}, - std::move(callback), std::move(rules), std::move(scope)); + std::move(callback), std::move(rules), std::move(scope), std::move(opts)); } td::actor::ActorOwn Overlay::create_semiprivate( @@ -99,6 +99,7 @@ OverlayImpl::OverlayImpl(td::actor::ActorId keyring, td::actor overlay_id_ = id_full_.compute_short_id(); frequent_dht_lookup_ = opts_.frequent_dht_lookup_; peer_list_.local_member_flags_ = opts_.local_overlay_member_flags_; + opts_.broadcast_speed_multiplier_ = std::max(opts_.broadcast_speed_multiplier_, 1e-9); VLOG(OVERLAY_INFO) << this << ": creating"; @@ -490,7 +491,8 @@ void OverlayImpl::send_broadcast_fec(PublicKeyHash send_as, td::uint32 flags, td VLOG(OVERLAY_WARNING) << "broadcast source certificate is invalid"; return; } - OverlayOutboundFecBroadcast::create(std::move(data), flags, actor_id(this), send_as); + OverlayOutboundFecBroadcast::create(std::move(data), flags, actor_id(this), send_as, + opts_.broadcast_speed_multiplier_); } void OverlayImpl::print(td::StringBuilder &sb) { diff --git a/overlay/overlays.h b/overlay/overlays.h index c0385fc7..5eb63b13 100644 --- a/overlay/overlays.h +++ b/overlay/overlays.h @@ -269,6 +269,7 @@ struct OverlayOptions { td::uint32 nodes_to_send_ = 4; td::uint32 propagate_broadcast_to_ = 5; td::uint32 default_permanent_members_flags_ = 0; + double broadcast_speed_multiplier_ = 1.0; }; class Overlays : public td::actor::Actor { diff --git a/ton/ton-types.h b/ton/ton-types.h index 2447a8c5..c7aff644 100644 --- a/ton/ton-types.h +++ b/ton/ton-types.h @@ -493,6 +493,7 @@ struct CatChainOptions { td::uint64 max_block_height_coeff = 0; bool debug_disable_db = false; + double broadcast_speed_multiplier = 1.0; }; struct ValidatorSessionConfig { diff --git a/validator-engine/validator-engine.cpp b/validator-engine/validator-engine.cpp index 81b8278f..2ea04e18 100644 --- a/validator-engine/validator-engine.cpp +++ b/validator-engine/validator-engine.cpp @@ -1504,6 +1504,7 @@ td::Status ValidatorEngine::load_global_config() { } validator_options_.write().set_hardforks(std::move(h)); validator_options_.write().set_fast_state_serializer_enabled(fast_state_serializer_enabled_); + validator_options_.write().set_catchain_broadcast_speed_multiplier(broadcast_speed_multiplier_catchain_); return td::Status::OK(); } @@ -2004,9 +2005,13 @@ void ValidatorEngine::start_full_node() { R.ensure(); td::actor::send_closure(SelfId, &ValidatorEngine::started_full_node); }); + ton::validator::fullnode::FullNodeOptions full_node_options{ + .config_ = config_.full_node_config, + .public_broadcast_speed_multiplier_ = broadcast_speed_multiplier_public_, + .private_broadcast_speed_multiplier_ = broadcast_speed_multiplier_private_}; full_node_ = ton::validator::fullnode::FullNode::create( short_id, ton::adnl::AdnlNodeIdShort{config_.full_node}, validator_options_->zero_block_id().file_hash, - config_.full_node_config, keyring_.get(), adnl_.get(), rldp_.get(), rldp2_.get(), + full_node_options, keyring_.get(), adnl_.get(), rldp_.get(), rldp2_.get(), default_dht_node_.is_zero() ? td::actor::ActorId{} : dht_nodes_[default_dht_node_].get(), overlay_manager_.get(), validator_manager_.get(), full_node_client_.get(), db_root_, std::move(P)); for (auto &v : config_.validators) { @@ -4562,6 +4567,42 @@ int main(int argc, char *argv[]) { "disable persistent state serializer (similar to set-state-serializer-enabled 0 in validator console)", [&]() { acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_state_serializer_disabled_flag); }); }); + p.add_checked_option( + '\0', "broadcast-speed-catchain", + "multiplier for broadcast speed in catchain overlays (experimental, default is 1.0, which is ~300 KB/s)", + [&](td::Slice s) -> td::Status { + auto v = td::to_double(s); + if (v <= 0.0) { + return td::Status::Error("broadcast-speed-catchain should be positive"); + } + acts.push_back( + [&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_broadcast_speed_multiplier_catchain, v); }); + return td::Status::OK(); + }); + p.add_checked_option( + '\0', "broadcast-speed-public", + "multiplier for broadcast speed in public shard overlays (experimental, default is 1.0, which is ~300 KB/s)", + [&](td::Slice s) -> td::Status { + auto v = td::to_double(s); + if (v <= 0.0) { + return td::Status::Error("broadcast-speed-public should be positive"); + } + acts.push_back( + [&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_broadcast_speed_multiplier_public, v); }); + return td::Status::OK(); + }); + p.add_checked_option( + '\0', "broadcast-speed-private", + "multiplier for broadcast speed in private block overlays (experimental, default is 1.0, which is ~300 KB/s)", + [&](td::Slice s) -> td::Status { + auto v = td::to_double(s); + if (v <= 0.0) { + return td::Status::Error("broadcast-speed-private should be positive"); + } + acts.push_back( + [&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_broadcast_speed_multiplier_private, v); }); + return td::Status::OK(); + }); auto S = p.run(argc, argv); if (S.is_error()) { LOG(ERROR) << "failed to parse options: " << S.move_as_error(); diff --git a/validator-engine/validator-engine.hpp b/validator-engine/validator-engine.hpp index 6c2f5c4b..e0dc91f1 100644 --- a/validator-engine/validator-engine.hpp +++ b/validator-engine/validator-engine.hpp @@ -229,6 +229,9 @@ class ValidatorEngine : public td::actor::Actor { bool not_all_shards_ = false; std::vector add_shard_cmds_; bool state_serializer_disabled_flag_ = false; + double broadcast_speed_multiplier_catchain_ = 1.0; + double broadcast_speed_multiplier_public_ = 1.0; + double broadcast_speed_multiplier_private_ = 1.0; std::set unsafe_catchains_; std::map> unsafe_catchain_rotations_; @@ -329,6 +332,15 @@ class ValidatorEngine : public td::actor::Actor { void set_state_serializer_disabled_flag() { state_serializer_disabled_flag_ = true; } + void set_broadcast_speed_multiplier_catchain(double value) { + broadcast_speed_multiplier_catchain_ = value; + } + void set_broadcast_speed_multiplier_public(double value) { + broadcast_speed_multiplier_public_ = value; + } + void set_broadcast_speed_multiplier_private(double value) { + broadcast_speed_multiplier_private_ = value; + } void start_up() override; ValidatorEngine() { diff --git a/validator/full-node-private-overlay.cpp b/validator/full-node-private-overlay.cpp index 1acfbd4e..f86323fc 100644 --- a/validator/full-node-private-overlay.cpp +++ b/validator/full-node-private-overlay.cpp @@ -264,8 +264,11 @@ void FullNodePrivateBlockOverlay::init() { overlay::OverlayPrivacyRules rules{overlay::Overlays::max_fec_broadcast_size(), overlay::CertificateFlags::AllowFec | overlay::CertificateFlags::Trusted, {}}; - td::actor::send_closure(overlays_, &overlay::Overlays::create_private_overlay, local_id_, overlay_id_full_.clone(), - nodes_, std::make_unique(actor_id(this)), rules, R"({ "type": "private-blocks" })"); + overlay::OverlayOptions overlay_options; + overlay_options.broadcast_speed_multiplier_ = opts_.private_broadcast_speed_multiplier_; + td::actor::send_closure(overlays_, &overlay::Overlays::create_private_overlay_ex, local_id_, overlay_id_full_.clone(), + nodes_, std::make_unique(actor_id(this)), rules, R"({ "type": "private-blocks" })", + overlay_options); td::actor::send_closure(rldp_, &rldp::Rldp::add_id, local_id_); td::actor::send_closure(rldp2_, &rldp2::Rldp::add_id, local_id_); @@ -366,7 +369,7 @@ void FullNodeCustomOverlay::receive_broadcast(PublicKeyHash src, td::BufferSlice } void FullNodeCustomOverlay::send_external_message(td::BufferSlice data) { - if (!inited_ || config_.ext_messages_broadcast_disabled_) { + if (!inited_ || opts_.config_.ext_messages_broadcast_disabled_) { return; } VLOG(FULL_NODE_DEBUG) << "Sending external message to custom overlay \"" << name_ << "\""; @@ -472,10 +475,13 @@ void FullNodeCustomOverlay::init() { authorized_keys[sender.pubkey_hash()] = overlay::Overlays::max_fec_broadcast_size(); } overlay::OverlayPrivacyRules rules{overlay::Overlays::max_fec_broadcast_size(), 0, std::move(authorized_keys)}; + overlay::OverlayOptions overlay_options; + overlay_options.broadcast_speed_multiplier_ = opts_.private_broadcast_speed_multiplier_; td::actor::send_closure( - overlays_, &overlay::Overlays::create_private_overlay, local_id_, overlay_id_full_.clone(), nodes_, + overlays_, &overlay::Overlays::create_private_overlay_ex, local_id_, overlay_id_full_.clone(), nodes_, std::make_unique(actor_id(this)), rules, - PSTRING() << R"({ "type": "custom-overlay", "name": ")" << td::format::Escaped{name_} << R"(" })"); + PSTRING() << R"({ "type": "custom-overlay", "name": ")" << td::format::Escaped{name_} << R"(" })", + overlay_options); td::actor::send_closure(rldp_, &rldp::Rldp::add_id, local_id_); td::actor::send_closure(rldp2_, &rldp2::Rldp::add_id, local_id_); diff --git a/validator/full-node-private-overlay.hpp b/validator/full-node-private-overlay.hpp index 1e29f8c9..70e196ea 100644 --- a/validator/full-node-private-overlay.hpp +++ b/validator/full-node-private-overlay.hpp @@ -50,14 +50,14 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { void collect_validator_telemetry(std::string filename); void set_config(FullNodeConfig config) { - config_ = std::move(config); + opts_.config_ = std::move(config); } void start_up() override; void tear_down() override; FullNodePrivateBlockOverlay(adnl::AdnlNodeIdShort local_id, std::vector nodes, - FileHash zero_state_file_hash, FullNodeConfig config, + FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, @@ -66,7 +66,7 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { : local_id_(local_id) , nodes_(std::move(nodes)) , zero_state_file_hash_(zero_state_file_hash) - , config_(config) + , opts_(opts) , keyring_(keyring) , adnl_(adnl) , rldp_(rldp) @@ -80,7 +80,7 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { adnl::AdnlNodeIdShort local_id_; std::vector nodes_; FileHash zero_state_file_hash_; - FullNodeConfig config_; + FullNodeOptions opts_; bool enable_compression_ = true; td::actor::ActorId keyring_; @@ -126,14 +126,14 @@ class FullNodeCustomOverlay : public td::actor::Actor { td::BufferSlice data); void set_config(FullNodeConfig config) { - config_ = std::move(config); + opts_.config_ = std::move(config); } void start_up() override; void tear_down() override; FullNodeCustomOverlay(adnl::AdnlNodeIdShort local_id, CustomOverlayParams params, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, td::actor::ActorId validator_manager, @@ -144,7 +144,7 @@ class FullNodeCustomOverlay : public td::actor::Actor { , msg_senders_(std::move(params.msg_senders_)) , block_senders_(std::move(params.block_senders_)) , zero_state_file_hash_(zero_state_file_hash) - , config_(config) + , opts_(opts) , keyring_(keyring) , adnl_(adnl) , rldp_(rldp) @@ -161,7 +161,7 @@ class FullNodeCustomOverlay : public td::actor::Actor { std::map msg_senders_; std::set block_senders_; FileHash zero_state_file_hash_; - FullNodeConfig config_; + FullNodeOptions opts_; td::actor::ActorId keyring_; td::actor::ActorId adnl_; diff --git a/validator/full-node-shard.cpp b/validator/full-node-shard.cpp index 7d33a195..ac0eb768 100644 --- a/validator/full-node-shard.cpp +++ b/validator/full-node-shard.cpp @@ -105,6 +105,7 @@ void FullNodeShardImpl::create_overlay() { }; overlay::OverlayOptions opts; opts.announce_self_ = active_; + opts.broadcast_speed_multiplier_ = opts_.public_broadcast_speed_multiplier_; td::actor::send_closure(overlays_, &overlay::Overlays::create_public_overlay_ex, adnl_id_, overlay_id_full_.clone(), std::make_unique(actor_id(this)), rules_, PSTRING() << "{ \"type\": \"shard\", \"shard_id\": " << get_shard() @@ -132,7 +133,7 @@ void FullNodeShardImpl::check_broadcast(PublicKeyHash src, td::BufferSlice broad if (!processed_ext_msg_broadcasts_.insert(hash).second) { return promise.set_error(td::Status::Error("duplicate external message broadcast")); } - if (config_.ext_messages_broadcast_disabled_) { + if (opts_.config_.ext_messages_broadcast_disabled_) { promise.set_error(td::Status::Error("rebroadcasting external messages is disabled")); promise = [manager = validator_manager_, message = q->message_->data_.clone()](td::Result R) mutable { if (R.is_ok()) { @@ -850,7 +851,7 @@ void FullNodeShardImpl::send_ihr_message(td::BufferSlice data) { } void FullNodeShardImpl::send_external_message(td::BufferSlice data) { - if (config_.ext_messages_broadcast_disabled_) { + if (opts_.config_.ext_messages_broadcast_disabled_) { return; } if (!client_.empty()) { @@ -1367,7 +1368,7 @@ void FullNodeShardImpl::get_stats_extra(td::Promise promise) { } FullNodeShardImpl::FullNodeShardImpl(ShardIdFull shard, PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, - FileHash zero_state_file_hash, FullNodeConfig config, + FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, @@ -1387,17 +1388,17 @@ FullNodeShardImpl::FullNodeShardImpl(ShardIdFull shard, PublicKeyHash local_id, , client_(client) , full_node_(full_node) , active_(active) - , config_(config) { + , opts_(opts) { } td::actor::ActorOwn FullNodeShard::create( ShardIdFull shard, PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, td::actor::ActorId adnl, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, td::actor::ActorId validator_manager, td::actor::ActorId client, td::actor::ActorId full_node, bool active) { return td::actor::create_actor(PSTRING() << "tonnode" << shard.to_str(), shard, local_id, adnl_id, - zero_state_file_hash, config, keyring, adnl, rldp, rldp2, overlays, + zero_state_file_hash, opts, keyring, adnl, rldp, rldp2, overlays, validator_manager, client, full_node, active); } diff --git a/validator/full-node-shard.h b/validator/full-node-shard.h index 16945325..5898db80 100644 --- a/validator/full-node-shard.h +++ b/validator/full-node-shard.h @@ -76,7 +76,7 @@ class FullNodeShard : public td::actor::Actor { static td::actor::ActorOwn create( ShardIdFull shard, PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, td::actor::ActorId adnl, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, td::actor::ActorId validator_manager, td::actor::ActorId client, td::actor::ActorId full_node, bool active); diff --git a/validator/full-node-shard.hpp b/validator/full-node-shard.hpp index 86748134..fb3eef76 100644 --- a/validator/full-node-shard.hpp +++ b/validator/full-node-shard.hpp @@ -86,7 +86,7 @@ class FullNodeShardImpl : public FullNodeShard { void set_active(bool active) override; void set_config(FullNodeConfig config) override { - config_ = config; + opts_.config_ = config; } void try_get_next_block(td::Timestamp timestamp, td::Promise promise); @@ -222,7 +222,7 @@ class FullNodeShardImpl : public FullNodeShard { } FullNodeShardImpl(ShardIdFull shard, PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, - FileHash zero_state_file_hash, FullNodeConfig config, td::actor::ActorId keyring, + FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, td::actor::ActorId validator_manager, @@ -269,7 +269,7 @@ class FullNodeShardImpl : public FullNodeShard { bool active_; - FullNodeConfig config_; + FullNodeOptions opts_; std::set my_ext_msg_broadcasts_; std::set processed_ext_msg_broadcasts_; diff --git a/validator/full-node.cpp b/validator/full-node.cpp index 658cb34e..e1951c36 100644 --- a/validator/full-node.cpp +++ b/validator/full-node.cpp @@ -139,7 +139,7 @@ void FullNodeImpl::update_adnl_id(adnl::AdnlNodeIdShort adnl_id, td::Promise state, std void FullNodeImpl::update_shard_actor(ShardIdFull shard, bool active) { ShardInfo &info = shards_[shard]; if (info.actor.empty()) { - info.actor = FullNodeShard::create(shard, local_id_, adnl_id_, zero_state_file_hash_, config_, keyring_, adnl_, rldp_, + info.actor = FullNodeShard::create(shard, local_id_, adnl_id_, zero_state_file_hash_, opts_, keyring_, adnl_, rldp_, rldp2_, overlays_, validator_manager_, client_, actor_id(this), active); if (!all_validators_.empty()) { td::actor::send_closure(info.actor, &FullNodeShard::update_validators, all_validators_, sign_cert_by_); @@ -717,7 +717,7 @@ void FullNodeImpl::create_private_block_overlay(PublicKeyHash key) { nodes.push_back(p.second); } private_block_overlays_[key] = td::actor::create_actor( - "BlocksPrivateOverlay", current_validators_[key], std::move(nodes), zero_state_file_hash_, config_, keyring_, + "BlocksPrivateOverlay", current_validators_[key], std::move(nodes), zero_state_file_hash_, opts_, keyring_, adnl_, rldp_, rldp2_, overlays_, validator_manager_, actor_id(this)); update_validator_telemetry_collector(); } @@ -735,7 +735,7 @@ void FullNodeImpl::update_custom_overlay(CustomOverlayInfo &overlay) { old_actors.erase(it); } else { overlay.actors_[local_id] = td::actor::create_actor( - "CustomOverlay", local_id, params, zero_state_file_hash_, config_, keyring_, adnl_, rldp_, rldp2_, + "CustomOverlay", local_id, params, zero_state_file_hash_, opts_, keyring_, adnl_, rldp_, rldp2_, overlays_, validator_manager_, actor_id(this)); } } @@ -794,7 +794,7 @@ void FullNodeImpl::send_block_candidate_broadcast_to_custom_overlays(const Block } FullNodeImpl::FullNodeImpl(PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId dht, td::actor::ActorId overlays, @@ -814,16 +814,16 @@ FullNodeImpl::FullNodeImpl(PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id , client_(client) , db_root_(db_root) , started_promise_(std::move(started_promise)) - , config_(config) { + , opts_(opts) { } td::actor::ActorOwn FullNode::create( - ton::PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, FullNodeConfig config, + ton::PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId dht, td::actor::ActorId overlays, td::actor::ActorId validator_manager, td::actor::ActorId client, std::string db_root, td::Promise started_promise) { - return td::actor::create_actor("fullnode", local_id, adnl_id, zero_state_file_hash, config, keyring, + return td::actor::create_actor("fullnode", local_id, adnl_id, zero_state_file_hash, opts, keyring, adnl, rldp, rldp2, dht, overlays, validator_manager, client, db_root, std::move(started_promise)); } diff --git a/validator/full-node.h b/validator/full-node.h index fdb1bf3b..555082dc 100644 --- a/validator/full-node.h +++ b/validator/full-node.h @@ -55,6 +55,12 @@ struct FullNodeConfig { bool ext_messages_broadcast_disabled_ = false; }; +struct FullNodeOptions { + FullNodeConfig config_; + double public_broadcast_speed_multiplier_ = 1.0; + double private_broadcast_speed_multiplier_ = 1.0; +}; + struct CustomOverlayParams { std::string name_; std::vector nodes_; @@ -107,7 +113,7 @@ class FullNode : public td::actor::Actor { enum { broadcast_mode_public = 1, broadcast_mode_private_block = 2, broadcast_mode_custom = 4 }; static td::actor::ActorOwn create( - ton::PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, FullNodeConfig config, + ton::PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId dht, td::actor::ActorId overlays, td::actor::ActorId validator_manager, diff --git a/validator/full-node.hpp b/validator/full-node.hpp index 0ea6fa0b..b4c79363 100644 --- a/validator/full-node.hpp +++ b/validator/full-node.hpp @@ -98,7 +98,7 @@ class FullNodeImpl : public FullNode { void start_up() override; FullNodeImpl(PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, td::actor::ActorId adnl, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId dht, td::actor::ActorId overlays, td::actor::ActorId validator_manager, @@ -141,7 +141,7 @@ class FullNodeImpl : public FullNode { std::set local_keys_; td::Promise started_promise_; - FullNodeConfig config_; + FullNodeOptions opts_; std::map> private_block_overlays_; bool broadcast_block_candidates_in_public_overlay_ = false; diff --git a/validator/validator-group.cpp b/validator/validator-group.cpp index 1817180d..110ccd81 100644 --- a/validator/validator-group.cpp +++ b/validator/validator-group.cpp @@ -373,6 +373,7 @@ void ValidatorGroup::create_session() { } CHECK(found); + config_.catchain_opts.broadcast_speed_multiplier = opts_->get_catchain_broadcast_speed_multiplier(); if (!config_.new_catchain_ids) { session_ = validatorsession::ValidatorSession::create(session_id_, config_, local_id_, std::move(vec), make_validator_session_callback(), keyring_, adnl_, rldp_, diff --git a/validator/validator-options.hpp b/validator/validator-options.hpp index e958d886..ace6b106 100644 --- a/validator/validator-options.hpp +++ b/validator/validator-options.hpp @@ -154,6 +154,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { bool get_fast_state_serializer_enabled() const override { return fast_state_serializer_enabled_; } + double get_catchain_broadcast_speed_multiplier() const override { + return catchain_broadcast_speed_multipliers_; + } void set_zero_block_id(BlockIdExt block_id) override { zero_block_id_ = block_id; @@ -249,6 +252,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { void set_fast_state_serializer_enabled(bool value) override { fast_state_serializer_enabled_ = value; } + void set_catchain_broadcast_speed_multiplier(double value) override { + catchain_broadcast_speed_multipliers_ = value; + } ValidatorManagerOptionsImpl *make_copy() const override { return new ValidatorManagerOptionsImpl(*this); @@ -302,6 +308,7 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { bool state_serializer_enabled_ = true; td::Ref collator_options_{true}; bool fast_state_serializer_enabled_ = false; + double catchain_broadcast_speed_multipliers_; }; } // namespace validator diff --git a/validator/validator.h b/validator/validator.h index 42b3f69a..5d6c0173 100644 --- a/validator/validator.h +++ b/validator/validator.h @@ -116,6 +116,7 @@ struct ValidatorManagerOptions : public td::CntObject { virtual bool get_state_serializer_enabled() const = 0; virtual td::Ref get_collator_options() const = 0; virtual bool get_fast_state_serializer_enabled() const = 0; + virtual double get_catchain_broadcast_speed_multiplier() const = 0; virtual void set_zero_block_id(BlockIdExt block_id) = 0; virtual void set_init_block_id(BlockIdExt block_id) = 0; @@ -148,6 +149,7 @@ struct ValidatorManagerOptions : public td::CntObject { virtual void set_state_serializer_enabled(bool value) = 0; virtual void set_collator_options(td::Ref value) = 0; virtual void set_fast_state_serializer_enabled(bool value) = 0; + virtual void set_catchain_broadcast_speed_multiplier(double value) = 0; static td::Ref create( BlockIdExt zero_block_id, BlockIdExt init_block_id, From 61b9155d15d5a115f65fbb33bf095a7d53f96fba Mon Sep 17 00:00:00 2001 From: Sild Date: Fri, 21 Feb 2025 08:46:33 +0100 Subject: [PATCH 50/61] dont use instance after std::move (#1528) Co-authored-by: Dmitrii Korchagin --- tonlib/tonlib/TonlibClient.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tonlib/tonlib/TonlibClient.cpp b/tonlib/tonlib/TonlibClient.cpp index d917a57a..d73e715c 100644 --- a/tonlib/tonlib/TonlibClient.cpp +++ b/tonlib/tonlib/TonlibClient.cpp @@ -4619,6 +4619,8 @@ void TonlibClient::get_libraries(ton::BlockIdExt blkid, std::vector std::vector> result_entries; result_entries.reserve(library_list.size()); std::vector not_cached_hashes; + not_cached_hashes.reserve(library_list.size()); + for (auto& library_hash : library_list) { if (libraries.key_exists(library_hash)) { auto library_content = vm::std_boc_serialize(libraries.lookup_ref(library_hash)).move_as_ok().as_slice().str(); @@ -4633,7 +4635,8 @@ void TonlibClient::get_libraries(ton::BlockIdExt blkid, std::vector return; } - client_.send_query(ton::lite_api::liteServer_getLibrariesWithProof(ton::create_tl_lite_block_id(blkid), 1, std::move(not_cached_hashes)), + auto missed_lib_ids = not_cached_hashes; + client_.send_query(ton::lite_api::liteServer_getLibrariesWithProof(ton::create_tl_lite_block_id(blkid), 1, std::move(missed_lib_ids)), promise.wrap([self=this, blkid, result_entries = std::move(result_entries), not_cached_hashes] (td::Result> r_libraries) mutable -> td::Result> { From 1e8fdc05618bad116db34399ae691401e5cae1fb Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Fri, 21 Feb 2025 10:52:23 +0300 Subject: [PATCH 51/61] Fix updateInit offset in storage (#1525) --- storage/PeerActor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/storage/PeerActor.cpp b/storage/PeerActor.cpp index 48d45626..e140b4ce 100644 --- a/storage/PeerActor.cpp +++ b/storage/PeerActor.cpp @@ -251,7 +251,7 @@ void PeerActor::loop_update_init() { } s = s.substr(peer_init_offset_, UPDATE_INIT_BLOCK_SIZE); auto query = create_update_query(ton::create_tl_object( - td::BufferSlice(s), (int)peer_init_offset_, to_ton_api(node_state))); + td::BufferSlice(s), (int)peer_init_offset_ * 8, to_ton_api(node_state))); // take care about update_state_query initial state update_state_query_.state = node_state; @@ -502,11 +502,11 @@ void PeerActor::process_update_peer_parts(const tl_object_ptr(offset + i)); } From 1b70e483272c193926633e41eb357a700b0bc293 Mon Sep 17 00:00:00 2001 From: Marat <98183742+dungeon-master-666@users.noreply.github.com> Date: Sun, 23 Feb 2025 13:01:33 +0100 Subject: [PATCH 52/61] Add option to build static tonlibjson and emulator (#1527) * add option to build static tonlibjson and emulator * do not export cmake project in case of static tonlibjson --- emulator/CMakeLists.txt | 8 +++----- tonlib/CMakeLists.txt | 8 +++++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/emulator/CMakeLists.txt b/emulator/CMakeLists.txt index a0799541..663c8fd2 100644 --- a/emulator/CMakeLists.txt +++ b/emulator/CMakeLists.txt @@ -1,8 +1,6 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) -if (NOT OPENSSL_FOUND) - find_package(OpenSSL REQUIRED) -endif() +option(EMULATOR_STATIC "Build emulator as static library" OFF) set(EMULATOR_STATIC_SOURCE transaction-emulator.cpp @@ -22,7 +20,7 @@ include(GenerateExportHeader) add_library(emulator_static STATIC ${EMULATOR_STATIC_SOURCE}) target_link_libraries(emulator_static PUBLIC ton_crypto smc-envelope) -if (USE_EMSCRIPTEN) +if (EMULATOR_STATIC OR USE_EMSCRIPTEN) add_library(emulator STATIC ${EMULATOR_SOURCE}) else() add_library(emulator SHARED ${EMULATOR_SOURCE}) @@ -35,7 +33,7 @@ else() endif() generate_export_header(emulator EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/emulator_export.h) -if (USE_EMSCRIPTEN) +if (EMULATOR_STATIC OR USE_EMSCRIPTEN) target_compile_definitions(emulator PUBLIC EMULATOR_STATIC_DEFINE) endif() target_include_directories(emulator PUBLIC diff --git a/tonlib/CMakeLists.txt b/tonlib/CMakeLists.txt index eb538361..3dbd628d 100644 --- a/tonlib/CMakeLists.txt +++ b/tonlib/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) +option(TONLIBJSON_STATIC "Build tonlibjson as static library" OFF) + if (NOT OPENSSL_FOUND) find_package(OpenSSL REQUIRED) endif() @@ -90,7 +92,7 @@ set(TONLIB_JSON_HEADERS tonlib/tonlib_client_json.h) set(TONLIB_JSON_SOURCE tonlib/tonlib_client_json.cpp) include(GenerateExportHeader) -if (USE_EMSCRIPTEN) +if (TONLIBJSON_STATIC OR USE_EMSCRIPTEN) add_library(tonlibjson STATIC ${TONLIB_JSON_SOURCE}) else() add_library(tonlibjson SHARED ${TONLIB_JSON_SOURCE}) @@ -103,7 +105,7 @@ else() endif() generate_export_header(tonlibjson EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/tonlib/tonlibjson_export.h) -if (USE_EMSCRIPTEN) +if (TONLIBJSON_STATIC OR USE_EMSCRIPTEN) target_compile_definitions(tonlibjson PUBLIC TONLIBJSON_STATIC_DEFINE) endif() target_include_directories(tonlibjson PUBLIC @@ -157,7 +159,7 @@ endif() install(FILES ${TONLIB_JSON_HEADERS} ${CMAKE_CURRENT_BINARY_DIR}/tonlib/tonlibjson_export.h DESTINATION include/tonlib/) -if (NOT USE_EMSCRIPTEN) +if (NOT USE_EMSCRIPTEN AND NOT TONLIBJSON_STATIC) install(EXPORT Tonlib FILE TonlibTargets.cmake NAMESPACE Tonlib:: From 1389ff678910f737e0fcd1ed55152d90620de66a Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 10 Feb 2025 12:57:25 +0400 Subject: [PATCH 53/61] [Tolk] Change order of assignment evaluation, lhs first In FunC (and in Tolk before), the assignment > lhs = rhs evaluation order (at IR level) was "rhs first, lhs second". In practice, this did not matter, because lhs could only be a primitive: > (v1, v2) = getValue() Left side of assignment actually has no "evaluation". Since Tolk implemented indexed access, there could be > getTensor().0 = getValue() or (in the future) > getObject().field = getValue() where evaluation order becomes significant. Now evaluation order will be to "lhs first, rhs second" (more expected from user's point of view), which will become significant when building control flow graph. --- tolk-tester/tests/a10.tolk | 17 +- .../tests/allow_post_modification.tolk | 2 +- tolk-tester/tests/assignment-tests.tolk | 210 ++++++++ tolk-tester/tests/codegen_check_demo.tolk | 2 +- tolk-tester/tests/indexed-access.tolk | 84 +++- tolk-tester/tests/invalid-assign-1.tolk | 6 +- tolk-tester/tests/invalid-assign-2.tolk | 9 +- tolk-tester/tests/invalid-assign-3.tolk | 7 +- tolk-tester/tests/invalid-assign-4.tolk | 10 - tolk-tester/tests/invalid-assign-5.tolk | 9 - tolk-tester/tests/invalid-assign-6.tolk | 9 - tolk-tester/tests/invalid-assign-7.tolk | 8 - tolk-tester/tests/invalid-call-10.tolk | 2 +- tolk-tester/tests/mutate-methods.tolk | 2 +- tolk-tester/tests/null-keyword.tolk | 2 +- tolk/ast-stringifier.h | 2 +- tolk/pipe-ast-to-legacy.cpp | 461 +++++++++--------- tolk/pipe-constant-folding.cpp | 10 + tolk/pipe-infer-types-and-calls.cpp | 168 ++----- tolk/pipe-optimize-boolean-expr.cpp | 38 -- 20 files changed, 602 insertions(+), 456 deletions(-) delete mode 100644 tolk-tester/tests/invalid-assign-4.tolk delete mode 100644 tolk-tester/tests/invalid-assign-5.tolk delete mode 100644 tolk-tester/tests/invalid-assign-6.tolk delete mode 100644 tolk-tester/tests/invalid-assign-7.tolk diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index 755a3bfb..031e29c9 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -144,15 +144,16 @@ fun test95() { """ test95 PROC:<{ ... - next GETGLOB // '10 - 3 PUSHINT // '10 '12=3 - 4 PUSHINT // '10 '12=3 '13=4 - 5 PUSHINT // '10 '12=3 '13=4 '14=5 - TRIPLE // '15 '16 - next SETGLOB + next GETGLOB // g_next + 3 PUSHINT // g_next '14=3 + 4 PUSHINT // g_next '14=3 '15=4 + 5 PUSHINT // g_next '14=3 '15=4 '16=5 + TRIPLE // '10 '11 + SWAP cur SETGLOB - cur GETGLOB // '17 - next GETGLOB // '17 '18 + next SETGLOB + cur GETGLOB // g_cur + next GETGLOB // g_cur g_next }> """ */ diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index df758a1e..e20e8218 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -147,5 +147,5 @@ fun main() { // x.0 x.1 """ -@code_hash 7627024945492125068389905298530400936797031708759561372406088054030801992712 +@code_hash 61280273714870328160131559159866470128402169974050439159015534193532598351244 */ diff --git a/tolk-tester/tests/assignment-tests.tolk b/tolk-tester/tests/assignment-tests.tolk index 40761939..34dd3e84 100644 --- a/tolk-tester/tests/assignment-tests.tolk +++ b/tolk-tester/tests/assignment-tests.tolk @@ -26,6 +26,185 @@ fun typesAsIdentifiers(builder: builder) { return int; } +global callOrder: tuple; + +fun getTensor_12() { + callOrder.tuplePush(100); + return (1, 2); +} +fun getTensor_1X(x: int) { + callOrder.tuplePush(101); + return (1, x); +} +fun getTuple_12() { + callOrder.tuplePush(110); + return [1, 2]; +} +fun getTuple_1X(x: int) { + callOrder.tuplePush(111); + return [1, x]; +} +fun getUntypedTuple_12() { + callOrder.tuplePush(120); + var t = createEmptyTuple(); t.tuplePush(1); t.tuplePush(2); + return t; +} +fun getUntypedTuple_1X(x: int) { + callOrder.tuplePush(121); + var t = createEmptyTuple(); t.tuplePush(1); t.tuplePush(x); + return t; +} +fun getIntValue5() { + callOrder.tuplePush(10); + return 5; +} +fun getIntValueX(x: int) { + callOrder.tuplePush(11); + return x; +} + +@method_id(102) +fun test102() { + callOrder = createEmptyTuple(); + var x = 0; + getTensor_12().0 = getIntValue5(); + getTensor_1X(5).1 = getIntValue5(); + getTensor_1X(x = 10).0 = getIntValueX(x); + return (callOrder, x); +} + +@method_id(103) +fun test103() { + callOrder = createEmptyTuple(); + var x = 0; + getTuple_12().0 = getIntValue5(); + getTuple_1X(5).1 = getIntValue5(); + getTuple_1X(x = 10).0 = getIntValueX(x); + return (callOrder, x); +} + +@method_id(104) +fun test104() { + callOrder = createEmptyTuple(); + var x = 0; + getUntypedTuple_12().0 = getIntValue5(); + getUntypedTuple_1X(5).1 = getIntValue5(); + getUntypedTuple_1X(x = 10).0 = getIntValueX(x); + return (callOrder, x); +} + +@method_id(105) +fun test105() { + callOrder = createEmptyTuple(); + getTensor_12().0 = getTensor_1X(getIntValue5()).1 = getIntValueX(getTensor_12().1); + return callOrder; +} + +@method_id(106) +fun test106() { + callOrder = createEmptyTuple(); + getTuple_12().0 = getTuple_1X(getIntValue5()).1 = getIntValueX(getTuple_12().1); + return callOrder; +} + +global t107: (int, int); + +@method_id(107) +fun test107() { + ((t107 = (1, 2)).0, (t107 = (3, 4)).1) = (5, 6); + return t107; +} + +global g108: int; +fun assertEq(a: int, b: int) { + assert(a == b, 10); + return b; +} + +@method_id(108) +fun test108() { + callOrder = createEmptyTuple(); + g108 = 0; + getTensor_1X(g108 = 8).1 = assertEq(g108, 8); + return (callOrder, g108); +} + +@method_id(109) +fun test109() { + callOrder = createEmptyTuple(); + var x = 0; + [getTuple_12().0, getTuple_1X(x = getIntValue5()).1, getTuple_1X(x += 10).0] = [getIntValue5(), getIntValue5(), getIntValueX(x)]; + return (callOrder, x); +} + +global g110: int; +global t110: (int, int); + +@method_id(110) +fun test110() { + callOrder = createEmptyTuple(); + var xy = [0, 0]; + [xy.0, getTuple_1X(g110 = 8).0] = [g110 += 5, getIntValueX(g110 += 10)]; + [xy.1, getTuple_1X((t110 = (8, 9)).0).1] = [t110.0 += 5, getIntValueX(t110.1 += 10)]; + return (xy, callOrder, g110, t110); +} + +@method_id(111) +fun test111() { + callOrder = createEmptyTuple(); + var z = -1; + var xy = [0, z = 0]; + var rhs = [getIntValueX(xy.1 += 10), xy.1, xy.0, z += 50]; + [xy.0, getTuple_1X(g110 = 8 + getIntValueX(xy.1)).0, xy.1, z] = rhs; + return (xy, g110, callOrder, z); +} + +@method_id(112) +fun test112() { + var xy = [1, 2]; + ((((xy))).0, ((xy.1))) = ((xy).1, ((xy.0))); + return xy; +} + +@method_id(113) +fun test113() { + var (a, t, z) = (1, [2,3], (-1,-1)); + (a, t, a, z, t.1, z.1) = (10, [a,12], 13, (a, t.1), 14, t.1); + return (a, t, z); +} + +global g114: int; +global t114: [int, int]; +global z114: (int, int); + +@method_id(114) +fun test114() { + g114 = 1; + t114 = [2, 3]; + (g114, t114, g114, z114, t114.1, z114.1) = (10, [g114,12], 13, (g114, t114.1), 14, t114.1); + return (g114, t114, z114); +} + +@method_id(115) +fun test115() { + callOrder = createEmptyTuple(); + var x = 0; + var y = 0; + [getTensor_1X(x = 5).0, y] = getTuple_1X(x = 9); + return (callOrder, x, y); +} + +@method_id(116) +fun test116() { + var (a,b,c,d) = (0,0,0,0); + var rhs = [1, 2, 3, 4]; + var rhs2 = ([a,b,c,d] = rhs); + __expect_type(rhs2, "[int, int, int, int]"); + return (a, b, c, d, rhs2); +} + + + fun main(value: int) { var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); if (x == null && y == null) { return null; } @@ -37,4 +216,35 @@ fun main(value: int) { @testcase | 0 | 6 | -1 @testcase | 0 | 11 | (null) @testcase | 101 | 78 | 88 +@testcase | 102 | | [ 100 10 101 10 101 11 ] 10 +@testcase | 103 | | [ 110 10 111 10 111 11 ] 10 +@testcase | 104 | | [ 120 10 121 10 121 11 ] 10 +@testcase | 105 | | [ 100 10 101 100 11 ] +@testcase | 106 | | [ 110 10 111 110 11 ] +@testcase | 107 | | 3 4 +@testcase | 108 | | [ 101 ] 8 +@testcase | 109 | | [ 110 10 111 111 10 10 11 ] 15 +@testcase | 110 | | [ 13 13 ] [ 111 11 111 11 ] 23 13 19 +@testcase | 111 | | [ 10 0 ] 18 [ 11 11 111 ] 50 +@testcase | 112 | | [ 2 1 ] +@testcase | 113 | | 13 [ 1 14 ] 1 3 +@testcase | 114 | | 13 [ 1 14 ] 1 3 +@testcase | 115 | | [ 101 111 ] 9 9 +@testcase | 116 | | 1 2 3 4 [ 1 2 3 4 ] + + +@fif_codegen +""" + test116 PROC:<{ + // + 1 PUSHINT // '10=1 + 2 PUSHINT // '10=1 '11=2 + 3 PUSHINT // '10=1 '11=2 '12=3 + 4 PUSHINT // '10=1 '11=2 '12=3 '13=4 + 4 TUPLE // rhs + DUP // rhs rhs + 4 UNTUPLE // rhs2 a b c d + 4 ROLL // a b c d rhs2 + }> +""" */ diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk index b355a9b7..5b46c093 100644 --- a/tolk-tester/tests/codegen_check_demo.tolk +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -35,7 +35,7 @@ Below, I just give examples of @fif_codegen tag: """ main PROC:<{ // s - 17 PUSHINT // s '1=17 + 17 PUSHINT // s '3=17 OVER // s z=17 t WHILE:<{ ... diff --git a/tolk-tester/tests/indexed-access.tolk b/tolk-tester/tests/indexed-access.tolk index 38094fa5..ab7995cf 100644 --- a/tolk-tester/tests/indexed-access.tolk +++ b/tolk-tester/tests/indexed-access.tolk @@ -21,6 +21,26 @@ fun plus(mutate self: int, y: int): int { fun eq(v: X): X { return v; } +global gTup: [int]; +global gTens: (int, int); + +@method_id(100) +fun testCodegenSimple() { + var t1 = [1]; + t1.0 = 2; + debugPrintString(""); + var t2 = [[1]]; + t2.0.0 = 2; + debugPrintString(""); + gTup = [1]; + gTup.0 = 2; + debugPrintString(""); + gTens = (1,2); + gTens.1 = 4; + debugPrintString(""); + return (t1, t2, gTup, gTens); +} + @method_id(101) fun test101() { var t = (1, (2, 3), [4, 5, [6, 7]], 8); @@ -241,30 +261,60 @@ fun main(){} @fif_codegen """ - testCodegenNoPureIndexedAccess PROC:<{ + testCodegenSimple PROC:<{ // - 0 PUSHINT // '8=0 + 1 PUSHINT // '2=1 + SINGLE // t1 + 2 PUSHINT // t1 '3=2 + 0 SETINDEX // t1 + x{} PUSHSLICE // t1 '6 + STRDUMP DROP + 1 PUSHINT // t1 '10=1 + SINGLE // t1 '9 + SINGLE // t1 t2 + 2 PUSHINT // t1 t2 '11=2 + OVER // t1 t2 '11=2 t2 + 0 INDEX // t1 t2 '11=2 '14 + SWAP // t1 t2 '14 '11=2 + 0 SETINDEX // t1 t2 '14 + 0 SETINDEX // t1 t2 + x{} PUSHSLICE // t1 t2 '17 + STRDUMP DROP + 1 PUSHINT // t1 t2 '20=1 + SINGLE // t1 t2 '18 + gTup SETGLOB + 2 PUSHINT // t1 t2 '21=2 + gTup GETGLOB // t1 t2 '21=2 g_gTup + SWAP // t1 t2 g_gTup '21=2 + 0 SETINDEX // t1 t2 g_gTup + gTup SETGLOB + x{} PUSHSLICE // t1 t2 '25 + STRDUMP DROP + 1 PUSHINT // t1 t2 '28=1 + 2 PUSHINT // t1 t2 '26=1 '27=2 + PAIR + gTens SETGLOB + 4 PUSHINT // t1 t2 g_gTens.1=4 + gTens GETGLOB + UNPAIR // t1 t2 g_gTens.1=4 g_gTens.0 g_gTens.1 + DROP // t1 t2 g_gTens.1=4 g_gTens.0 + SWAP // t1 t2 g_gTens.0 g_gTens.1=4 + PAIR + gTens SETGLOB + x{} PUSHSLICE // t1 t2 '36 + STRDUMP DROP + gTup GETGLOB // t1 t2 g_gTup + gTens GETGLOB + UNPAIR // t1 t2 g_gTup g_gTens.0 g_gTens.1 }> """ @fif_codegen """ - test104 PROC:<{ + testCodegenNoPureIndexedAccess PROC:<{ // - 5 PUSHINT // '2=5 - DUP // '2=5 '3=5 - PAIR // '1 - SINGLE // m - 10 PUSHINT // m '5=10 - 20 PUSHINT // m '5=10 '6=20 - s2 PUSH // m '5=10 '6=20 m - 0 INDEX // m '10=10 '12=20 '8 - SWAP // m '10=10 '8 '12=20 - 1 SETINDEX // m '10=10 '8 - SWAP // m '8 '10=10 - 0 SETINDEX // m '8 - 0 SETINDEX // m - ... + 0 PUSHINT // '8=0 + }> """ @fif_codegen diff --git a/tolk-tester/tests/invalid-assign-1.tolk b/tolk-tester/tests/invalid-assign-1.tolk index f605056e..799176df 100644 --- a/tolk-tester/tests/invalid-assign-1.tolk +++ b/tolk-tester/tests/invalid-assign-1.tolk @@ -1,9 +1,9 @@ fun main() { - var c = 1; - (c, c) = (2, 3); + var t = createEmptyTuple(); + t.0 = (1, 2); } /** @compilation_should_fail -@stderr one variable modified twice inside the same expression +@stderr a tuple can not have `(int, int)` inside, because it occupies 2 stack slots in TVM, not 1 */ diff --git a/tolk-tester/tests/invalid-assign-2.tolk b/tolk-tester/tests/invalid-assign-2.tolk index 2838ed9a..6a33e696 100644 --- a/tolk-tester/tests/invalid-assign-2.tolk +++ b/tolk-tester/tests/invalid-assign-2.tolk @@ -1,11 +1,8 @@ -fun incThree(mutate a: int, mutate b: int, mutate c: int) {} - -fun main() { - var c = [[[1, 2]]]; - incThree(mutate c.0.0.0, mutate c.0.0.1, mutate c.0.0.0); +fun main(cs: slice) { + var cb = cs.tupleSize; } /** @compilation_should_fail -@stderr one variable modified twice inside the same expression +@stderr referencing a method for `tuple` with object of type `slice` */ diff --git a/tolk-tester/tests/invalid-assign-3.tolk b/tolk-tester/tests/invalid-assign-3.tolk index d3f5d1f1..567ace33 100644 --- a/tolk-tester/tests/invalid-assign-3.tolk +++ b/tolk-tester/tests/invalid-assign-3.tolk @@ -1,10 +1,9 @@ -global gg: (int, int); - fun main() { - [gg.0, gg.1, gg.0] = [0, 1, 0]; + var t = createEmptyTuple(); + var xy = t.0 as (int, int); } /** @compilation_should_fail -@stderr one variable modified twice inside the same expression +@stderr a tuple can not have `(int, int)` inside, because it occupies 2 stack slots in TVM, not 1 */ diff --git a/tolk-tester/tests/invalid-assign-4.tolk b/tolk-tester/tests/invalid-assign-4.tolk deleted file mode 100644 index 67340b20..00000000 --- a/tolk-tester/tests/invalid-assign-4.tolk +++ /dev/null @@ -1,10 +0,0 @@ -global gg: (int, [int, int]); - -fun main() { - (gg.1.0, gg.1, gg.1.1) = (0, [1, 2], 3); -} - -/** -@compilation_should_fail -@stderr one variable both modified and read inside the same expression -*/ diff --git a/tolk-tester/tests/invalid-assign-5.tolk b/tolk-tester/tests/invalid-assign-5.tolk deleted file mode 100644 index f3fe59f7..00000000 --- a/tolk-tester/tests/invalid-assign-5.tolk +++ /dev/null @@ -1,9 +0,0 @@ -fun main() { - var ab = (1, 2); - (ab, ab.1) = ((2, 3), 4); -} - -/** -@compilation_should_fail -@stderr one variable both modified and read inside the same expression -*/ diff --git a/tolk-tester/tests/invalid-assign-6.tolk b/tolk-tester/tests/invalid-assign-6.tolk deleted file mode 100644 index 59d769e9..00000000 --- a/tolk-tester/tests/invalid-assign-6.tolk +++ /dev/null @@ -1,9 +0,0 @@ -fun main() { - var t = createEmptyTuple(); - t.0 = (1, 2); -} - -/** -@compilation_should_fail -@stderr can not put `(int, int)` into a tuple, because it occupies 2 stack slots in TVM, not 1 -*/ diff --git a/tolk-tester/tests/invalid-assign-7.tolk b/tolk-tester/tests/invalid-assign-7.tolk deleted file mode 100644 index 6a33e696..00000000 --- a/tolk-tester/tests/invalid-assign-7.tolk +++ /dev/null @@ -1,8 +0,0 @@ -fun main(cs: slice) { - var cb = cs.tupleSize; -} - -/** -@compilation_should_fail -@stderr referencing a method for `tuple` with object of type `slice` -*/ diff --git a/tolk-tester/tests/invalid-call-10.tolk b/tolk-tester/tests/invalid-call-10.tolk index 9a28c004..4da85f4f 100644 --- a/tolk-tester/tests/invalid-call-10.tolk +++ b/tolk-tester/tests/invalid-call-10.tolk @@ -7,5 +7,5 @@ fun main() { /** @compilation_should_fail -@stderr can not put `(int, builder)` into a tuple, because it occupies 2 stack slots in TVM, not 1 +@stderr a tuple can not have `(int, builder)` inside, because it occupies 2 stack slots in TVM, not 1 */ diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index ebd07aca..9ebf8b1d 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -307,7 +307,7 @@ fun main(){} ... incrementTwoInPlace CALLDICT // x y sum1 -ROT - 10 PUSHINT // sum1 x y '10=10 + 10 PUSHINT // sum1 x y '11=10 incrementTwoInPlace CALLDICT // sum1 x y sum2 s1 s3 s0 XCHG3 // x y sum1 sum2 }> diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 69678434..eb02b624 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -133,7 +133,7 @@ fun main() { """ test7 PROC:<{ ... - LDOPTREF // b '8 '7 + LDOPTREF // b '9 '8 DROP // b c ISNULL // b '11 10 MULCONST // b '13 diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index 4ec72cdd..1211d63f 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -193,7 +193,7 @@ class ASTStringifier final : public ASTVisitor { } case ast_local_var_lhs: { std::ostringstream os; - os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); + os << (v->as()->inferred_type ? v->as()->inferred_type->as_human_readable() : v->as()->declared_type->as_human_readable()); if (v->as()->get_name().empty()) { return "_: " + os.str(); } diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index f5eca22c..867c05ec 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -30,158 +30,17 @@ * Up to this point, all types have been inferred, all validity checks have been passed, etc. * All properties in AST nodes are assigned and can be safely used (fun_ref, etc.). * So, if execution reaches this pass, the input is (almost) correct, and code generation should succeed. - * The only thing additionally checked during this pass is tricky lvalue, like one and the same variable - * assigned/mutated multiple times in same expression, e.g. `(t.0, t.0) = rhs` / `f(mutate x.1.2, mutate x)`. + * (previously, there was a check for one variable modified twice like `(t.0, t.0) = rhs`, but after changing + * execution order of assignment to "first lhs, then lhs", it was removed for several reasons) */ namespace tolk { -// fire error on cases like `(a, a) = rhs` / `f(mutate t.1.0, mutate t.1.0)` -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_variable_modified_twice_inside_same_expression(SrcLocation loc) { - throw ParseError(loc, "one variable modified twice inside the same expression"); -} +class LValContext; +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr); +std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx); +void process_any_statement(AnyV v, CodeBlob& code); -// fire error on cases like `(m.1.0, m.1) = rhs` (m.1 inside m.1.0 is "rval inside lval") -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_variable_modified_and_read_inside_same_expression(SrcLocation loc) { - throw ParseError(loc, "one variable both modified and read inside the same expression"); -} - -// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable -// exists, but on its change, something non-trivial should happen. -// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6` -// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)` -// Of course, mixing globals with tuples should also be supported. -// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue". -// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global. -// A challenging thing is handling "unique" parts, to be read/updated only once. -// Example: `f(mutate globalTensor.0, mutate globalTensor.1)`, then globalTensor should be read/written once. -// Example: `(t.0.0, t.0.1) = rhs` (m is [[int, int]]), then t.0 should be read/updated once. -// Solving this by calculating hashes of every lvalue or rvalue inside lvalue automatically gives an ability -// to detect and fire "multiple writes inside expression", like `(a, a) = rhs` / `[t.0, (t.0.1, c)] = rhs`. -// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global). -class LValContext { - // every global variable used as lvalue is registered here - // example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after - // global tensors are stored as tuples (unpacked on reading, packed on writing), then multiple tmp vars are created - struct ModifiedGlob { - const GlobalVarData* glob_ref; - std::vector local_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors) - - void apply(CodeBlob& code, SrcLocation loc) const { - Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, local_ir_idx, glob_ref); - op.set_impure_flag(); - } - }; - - // every tuple index used as lvalue is registered here - // example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after - // for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times - struct ModifiedTupleIndex { - uint64_t hash; - var_idx_t tuple_ir_idx; - var_idx_t index_ir_idx; - var_idx_t field_ir_idx; - - void apply(CodeBlob& code, SrcLocation loc) const { - const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as(); - code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx, field_ir_idx, index_ir_idx}, builtin_sym); - } - }; - - int level_rval_inside_lval = 0; - std::vector> modifications; - std::unordered_set all_modified_hashes; - - void fire_if_one_variable_modified_twice(SrcLocation loc, uint64_t modified_hash) { - if (!is_rval_inside_lval()) { - if (!all_modified_hashes.insert(modified_hash).second) { - fire_error_variable_modified_twice_inside_same_expression(loc); - } - if (all_modified_hashes.contains(~modified_hash)) { - fire_error_variable_modified_and_read_inside_same_expression(loc); - } - } else { - all_modified_hashes.insert(~modified_hash); - if (all_modified_hashes.contains(modified_hash)) { - fire_error_variable_modified_and_read_inside_same_expression(loc); - } - } - } - -public: - void enter_rval_inside_lval() { level_rval_inside_lval++; } - void exit_rval_inside_lval() { level_rval_inside_lval--; } - bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; } - - uint64_t register_lval(SrcLocation loc, const LocalVarData* var_ref) { - uint64_t hash = reinterpret_cast(var_ref); - fire_if_one_variable_modified_twice(loc, hash); - return hash; - } - - uint64_t register_lval(SrcLocation loc, const GlobalVarData* glob_ref) { - uint64_t hash = reinterpret_cast(glob_ref); - fire_if_one_variable_modified_twice(loc, hash); - return hash; - } - - uint64_t register_lval(SrcLocation loc, V v) { - uint64_t hash = 7; - AnyExprV leftmost_obj = v; - while (auto v_dot = leftmost_obj->try_as()) { - if (!v_dot->is_target_indexed_access()) { - break; - } - hash = hash * 1915239017 + std::get(v_dot->target); - leftmost_obj = v_dot->get_obj(); - } - if (auto v_ref = leftmost_obj->try_as()) { - hash *= reinterpret_cast(v_ref->sym); // `v.0` and `v.0` in 2 places is the same - } else { - hash *= reinterpret_cast(leftmost_obj); // unlike `f().0` and `f().0` (pointers to AST nodes differ) - } - fire_if_one_variable_modified_twice(loc, hash); - return hash; - } - - const std::vector* exists_already_known_global(const GlobalVarData* glob_ref) const { - for (const auto& m : modifications) { - if (const auto* m_glob = std::get_if(&m); m_glob && m_glob->glob_ref == glob_ref) { - return &m_glob->local_ir_idx; - } - } - return nullptr; - } - - const var_idx_t* exists_already_known_tuple_index(uint64_t hash) const { - for (const auto& m : modifications) { - if (const auto* m_tup = std::get_if(&m); m_tup && m_tup->hash == hash) { - return &m_tup->field_ir_idx; - } - } - return nullptr; - } - - void register_modified_global(const GlobalVarData* glob_ref, std::vector local_ir_idx) { - modifications.emplace_back(ModifiedGlob{glob_ref, std::move(local_ir_idx)}); - } - - void register_modified_tuple_index(uint64_t hash, var_idx_t tuple_ir_idx, var_idx_t index_ir_idx, var_idx_t field_ir_idx) { - modifications.emplace_back(ModifiedTupleIndex{hash, tuple_ir_idx, index_ir_idx, field_ir_idx}); - } - - void gen_ops_if_nonempty(CodeBlob& code, SrcLocation loc) const { - for (auto it = modifications.rbegin(); it != modifications.rend(); ++it) { // reverse, it's important - if (const auto* m_glob = std::get_if(&*it)) { - m_glob->apply(code, loc); - } else if (const auto* m_tup = std::get_if(&*it)) { - m_tup->apply(code, loc); - } - } - } -}; // The goal of VarsModificationWatcher is to detect such cases: `return (x, x += y, x)`. // Without any changes, ops will be { _Call $2 = +($0_x, $1_y); _Return $0_x, $2, $0_x } - incorrect @@ -229,8 +88,176 @@ public: static VarsModificationWatcher vars_modification_watcher; -std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr); -void process_any_statement(AnyV v, CodeBlob& code); + +// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable +// exists, but on its change, something non-trivial should happen. +// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6` +// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)` +// Of course, mixing globals with tuples should also be supported. +// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue". +// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global. +// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global). +class LValContext { + // every global variable used as lvalue is registered here + // example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after + struct ModifiedGlobal { + const GlobalVarData* glob_ref; + std::vector lval_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors) + + // for 1-slot globals int/cell/slice, assigning to them is just SETGLOB + // same for tensors, if they are fully rewritten in an expression: `gTensor = (5,6)` + void apply_fully_rewrite(CodeBlob& code, SrcLocation loc) const { + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, lval_ir_idx, glob_ref); + op.set_impure_flag(); + } + + // for N-slot globals tensor/struct/union, assigning to their parts, like `gTensor.1 = 6` + // we need to read gTensor as a whole (0-th and 1-th component), rewrite 1-th component, and SETGLOB a whole back + void apply_partially_rewrite(CodeBlob& code, SrcLocation loc, std::vector&& was_modified_by_let) const { + LValContext local_lval; + local_lval.enter_rval_inside_lval(); + std::vector local_ir_idx = pre_compile_symbol(loc, glob_ref, code, &local_lval); + for (size_t i = 0; i < local_ir_idx.size(); ++i) { + if (was_modified_by_let[i]) { + code.emplace_back(loc, Op::_Let, std::vector{local_ir_idx[i]}, std::vector{lval_ir_idx[i]}); + } + } + + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, local_ir_idx, glob_ref); + op.set_impure_flag(); + } + }; + + // every tensor index, when a tensor is a global, is registered here (same for structs and fields) + // example: `global v: (int, int); v.1 = 5`, implicit var is created `$tmp = 5`, and when it's modified, + // we need to partially update w; essentially, apply_partially_rewrite() above will be called + struct ModifiedFieldOfGlobal { + AnyExprV tensor_obj; + int index_at; + std::vector lval_ir_idx; + + void apply(CodeBlob& code, SrcLocation loc) const { + LValContext local_lval; + local_lval.enter_rval_inside_lval(); + std::vector obj_ir_idx = pre_compile_expr(tensor_obj, code, &local_lval); + const TypeDataTensor* t_tensor = tensor_obj->inferred_type->try_as(); + tolk_assert(t_tensor); + int stack_width = t_tensor->items[index_at]->calc_width_on_stack(); + int stack_offset = 0; + for (int i = 0; i < index_at; ++i) { + stack_offset += t_tensor->items[i]->calc_width_on_stack(); + } + std::vector field_ir_idx = {obj_ir_idx.begin() + stack_offset, obj_ir_idx.begin() + stack_offset + stack_width}; + tolk_assert(field_ir_idx.size() == lval_ir_idx.size()); + + vars_modification_watcher.trigger_callbacks(field_ir_idx, loc); + code.emplace_back(loc, Op::_Let, field_ir_idx, lval_ir_idx); + local_lval.after_let(std::move(field_ir_idx), code, loc); + } + }; + + // every tuple index used as lvalue is registered here + // example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after + // for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times + struct ModifiedTupleIndex { + AnyExprV tuple_obj; + int index_at; + std::vector lval_ir_idx; + + void apply(CodeBlob& code, SrcLocation loc) const { + LValContext local_lval; + local_lval.enter_rval_inside_lval(); + std::vector tuple_ir_idx = pre_compile_expr(tuple_obj, code, &local_lval); + std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), loc, "(tuple-idx)"); + code.emplace_back(loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); + + vars_modification_watcher.trigger_callbacks(tuple_ir_idx, loc); + const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as(); + code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx[0], lval_ir_idx[0], index_ir_idx[0]}, builtin_sym); + local_lval.after_let(std::move(tuple_ir_idx), code, loc); + } + }; + + int level_rval_inside_lval = 0; + std::vector> modifications; + + static bool vector_contains(const std::vector& ir_vars, var_idx_t ir_idx) { + for (var_idx_t var_in_vector : ir_vars) { + if (var_in_vector == ir_idx) { + return true; + } + } + return false; + } + +public: + void enter_rval_inside_lval() { level_rval_inside_lval++; } + void exit_rval_inside_lval() { level_rval_inside_lval--; } + bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; } + + void capture_global_modification(const GlobalVarData* glob_ref, std::vector lval_ir_idx) { + modifications.emplace_back(ModifiedGlobal{glob_ref, std::move(lval_ir_idx)}); + } + + void capture_field_of_global_modification(AnyExprV tensor_obj, int index_at, std::vector lval_ir_idx) { + modifications.emplace_back(ModifiedFieldOfGlobal{tensor_obj, index_at, std::move(lval_ir_idx)}); + } + + void capture_tuple_index_modification(AnyExprV tuple_obj, int index_at, std::vector lval_ir_idx) { + modifications.emplace_back(ModifiedTupleIndex{tuple_obj, index_at, std::move(lval_ir_idx)}); + } + + void after_let(std::vector&& let_left_vars, CodeBlob& code, SrcLocation loc) const { + for (const auto& modification : modifications) { + if (const auto* m_glob = std::get_if(&modification)) { + int n_modified_by_let = 0; + std::vector was_modified_by_let; + was_modified_by_let.resize(m_glob->lval_ir_idx.size()); + for (size_t i = 0; i < m_glob->lval_ir_idx.size(); ++i) { + if (vector_contains(let_left_vars, m_glob->lval_ir_idx[i])) { + was_modified_by_let[i] = true; + n_modified_by_let++; + } + } + if (n_modified_by_let == static_cast(m_glob->lval_ir_idx.size())) { + m_glob->apply_fully_rewrite(code, loc); + } else if (n_modified_by_let > 0) { + m_glob->apply_partially_rewrite(code, loc, std::move(was_modified_by_let)); + } + } else if (const auto* m_tup = std::get_if(&modification)) { + bool was_tuple_index_modified = false; + for (var_idx_t field_ir_idx : m_tup->lval_ir_idx) { + was_tuple_index_modified |= vector_contains(let_left_vars, field_ir_idx); + } + if (was_tuple_index_modified) { + m_tup->apply(code, loc); + } + } else if (const auto* m_tens = std::get_if(&modification)) { + bool was_tensor_index_modified = false; + for (var_idx_t field_ir_idx : m_tens->lval_ir_idx) { + was_tensor_index_modified |= vector_contains(let_left_vars, field_ir_idx); + } + if (was_tensor_index_modified) { + m_tens->apply(code, loc); + } + } + } + } +}; + +// given `{some_expr}.{i}`, check it for pattern `some_var.0` / `some_var.0.1` / etc. +// return some_var if satisfies (it may be a local or a global var, a tensor or a tuple) +// return nullptr otherwise: `f().0` / `(v = rhs).0` / `some_var.method().0` / etc. +static V calc_sink_leftmost_obj(V v) { + AnyExprV leftmost_obj = v->get_obj(); + while (auto v_dot = leftmost_obj->try_as()) { + if (!v_dot->is_target_indexed_access()) { + break; + } + leftmost_obj = v_dot->get_obj(); + } + return leftmost_obj->type == ast_reference ? leftmost_obj->as() : nullptr; +} static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, @@ -313,43 +340,45 @@ static std::vector pre_compile_tensor(CodeBlob& code, const std::vect static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) { // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) { - std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); LValContext local_lval; std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); vars_modification_watcher.trigger_callbacks(left, loc); - code.emplace_back(loc, Op::_Let, std::move(left), right); - local_lval.gen_ops_if_nonempty(code, loc); + std::vector rvect = pre_compile_tensor(code, rhs->as()->get_items()); + code.emplace_back(loc, Op::_Let, left, rvect); + local_lval.after_let(std::move(left), code, loc); + std::vector right = code.create_tmp_var(TypeDataTuple::create(), loc, "(tuple)"); + code.emplace_back(lhs->loc, Op::_Tuple, right, std::move(rvect)); return right; } // [lhs] = rhs; it's un-tuple to N left vars if (lhs->type == ast_typed_tuple) { + LValContext local_lval; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); + vars_modification_watcher.trigger_callbacks(left, loc); std::vector right = pre_compile_expr(rhs, code); const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as(); std::vector types_list = inferred_tuple->items; std::vector rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc, "(unpack-tuple)"); code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); - LValContext local_lval; - std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); - vars_modification_watcher.trigger_callbacks(left, loc); - code.emplace_back(loc, Op::_Let, std::move(left), rvect); - local_lval.gen_ops_if_nonempty(code, loc); - return rvect; + code.emplace_back(loc, Op::_Let, left, rvect); + local_lval.after_let(std::move(left), code, loc); + return right; } // small optimization: `var x = rhs` or `local_var = rhs` (90% cases), LValContext not needed actually if (lhs->type == ast_local_var_lhs || (lhs->type == ast_reference && lhs->as()->sym->try_as())) { - std::vector right = pre_compile_expr(rhs, code); std::vector left = pre_compile_expr(lhs, code); // effectively, local_var->ir_idx vars_modification_watcher.trigger_callbacks(left, loc); + std::vector right = pre_compile_expr(rhs, code); code.emplace_back(loc, Op::_Let, std::move(left), right); return right; } // lhs = rhs - std::vector right = pre_compile_expr(rhs, code); LValContext local_lval; std::vector left = pre_compile_expr(lhs, code, &local_lval); vars_modification_watcher.trigger_callbacks(left, loc); - code.emplace_back(loc, Op::_Let, std::move(left), right); - local_lval.gen_ops_if_nonempty(code, loc); + std::vector right = pre_compile_expr(rhs, code); + code.emplace_back(loc, Op::_Let, left, right); + local_lval.after_let(std::move(left), code, loc); return right; } @@ -364,28 +393,22 @@ static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcL } -static std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { +std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { if (const auto* glob_ref = sym->try_as()) { - if (!lval_ctx) { - // `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it - // note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type - std::vector local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)"); - code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); - return local_ir_idx; - } else { - // `globalVar = rhs` / `mutate globalVar` / `globalTuple.0 = rhs` - lval_ctx->register_lval(loc, glob_ref); - if (const std::vector* local_ir_idx = lval_ctx->exists_already_known_global(glob_ref)) { - return *local_ir_idx; // `f(mutate g.0, mutate g.1)`, then g will be read only once - } - std::vector local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)"); - if (lval_ctx->is_rval_inside_lval()) { // for `globalVar.0` "globalVar" is rvalue inside lvalue - // for `globalVar = rhs` don't read a global actually, but for `globalVar.0 = rhs` do - code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); - } - lval_ctx->register_modified_global(glob_ref, local_ir_idx); - return local_ir_idx; + // handle `globalVar = rhs` / `mutate globalVar` + if (lval_ctx && !lval_ctx->is_rval_inside_lval()) { + std::vector lval_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(lval-glob)"); + lval_ctx->capture_global_modification(glob_ref, lval_ir_idx); + return lval_ir_idx; } + // `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it + // note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type + std::vector local_ir_idx = code.create_var(glob_ref->declared_type, loc, "g_" + glob_ref->name); + code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); + if (lval_ctx) { // `globalVar.0 = rhs`, globalVar is rval inside lval + lval_ctx->capture_global_modification(glob_ref, local_ir_idx); + } + return local_ir_idx; } if (const auto* const_ref = sym->try_as()) { if (const_ref->is_int_const()) { @@ -407,15 +430,12 @@ static std::vector pre_compile_symbol(SrcLocation loc, const Symbol* #ifdef TOLK_DEBUG tolk_assert(static_cast(var_ref->ir_idx.size()) == var_ref->declared_type->calc_width_on_stack()); #endif - if (lval_ctx) { - lval_ctx->register_lval(loc, var_ref); - } return var_ref->ir_idx; } throw Fatal("pre_compile_symbol"); } -static std::vector process_assign(V v, CodeBlob& code) { +static std::vector process_assignment(V v, CodeBlob& code) { if (auto lhs_decl = v->get_lhs()->try_as()) { return pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); } else { @@ -492,12 +512,18 @@ static std::vector process_dot_access(V v, CodeBlob& if (!v->is_target_fun_ref()) { TypePtr obj_type = v->get_obj()->inferred_type; int index_at = std::get(v->target); - // `tensorVar.0`; since a tensor of N elems are N vars on a stack actually, calculate offset + // `tensorVar.0` if (const auto* t_tensor = obj_type->try_as()) { - if (lval_ctx) lval_ctx->register_lval(v->loc, v); - if (lval_ctx) lval_ctx->enter_rval_inside_lval(); + // handle `tensorVar.0 = rhs` if tensors is a global, special case, then the global will be read on demand + if (lval_ctx && !lval_ctx->is_rval_inside_lval()) { + if (auto sink = calc_sink_leftmost_obj(v); sink && sink->sym->try_as()) { + std::vector lval_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(lval-global-tensor)"); + lval_ctx->capture_field_of_global_modification(v->get_obj(), index_at, lval_ir_idx); + return lval_ir_idx; + } + } + // since a tensor of N elems are N vars on a stack actually, calculate offset std::vector lhs_vars = pre_compile_expr(v->get_obj(), code, lval_ctx); - if (lval_ctx) lval_ctx->exit_rval_inside_lval(); int stack_width = t_tensor->items[index_at]->calc_width_on_stack(); int stack_offset = 0; for (int i = 0; i < index_at; ++i) { @@ -505,39 +531,26 @@ static std::vector process_dot_access(V v, CodeBlob& } return {lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width}; } - // `tupleVar.0`; not to mess up, separate rvalue and lvalue cases + // `tupleVar.0` if (obj_type->try_as() || obj_type->try_as()) { - if (!lval_ctx) { - // `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars - std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code); - std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); - code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); - std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); - tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values - const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); - code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); - return field_ir_idx; - } else { - // `tupleVar.0 = rhs`: finally "tupleSetAt(tupleVar, rhs, 0)" will be done - uint64_t hash = lval_ctx->register_lval(v->loc, v); - if (const var_idx_t* field_ir_idx = lval_ctx->exists_already_known_tuple_index(hash)) { - return {*field_ir_idx}; // `(t.0.0, t.0.1) = rhs`, then "t.0" will be read (tupleAt) once - } - lval_ctx->enter_rval_inside_lval(); - std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code, lval_ctx); - lval_ctx->exit_rval_inside_lval(); - std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); - code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); - std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); - if (lval_ctx->is_rval_inside_lval()) { // for `t.0.1 = rhs` "t.0" is rvalue inside lvalue - // for `t.0 = rhs` don't call tupleAt, but for `t.0.1 = rhs` do for t.0 (still don't for t.0.1) - const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); - code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); - } - lval_ctx->register_modified_tuple_index(hash, tuple_ir_idx[0], index_ir_idx[0], field_ir_idx[0]); - vars_modification_watcher.trigger_callbacks(tuple_ir_idx, v->loc); - return field_ir_idx; + // handle `tupleVar.0 = rhs`, "0 SETINDEX" will be called when this was is modified + if (lval_ctx && !lval_ctx->is_rval_inside_lval() && calc_sink_leftmost_obj(v)) { + std::vector lval_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(lval-tuple-field)"); + lval_ctx->capture_tuple_index_modification(v->get_obj(), index_at, lval_ir_idx); + return lval_ir_idx; } + // `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars + std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code); + std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); + code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); + std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); + tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values + const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); + code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); + if (lval_ctx && calc_sink_leftmost_obj(v)) { // `tupleVar.0.1 = rhs`, then `tupleVar.0` is rval inside lval + lval_ctx->capture_tuple_index_modification(v->get_obj(), index_at, field_ir_idx); + } + return field_ir_idx; } tolk_assert(false); } @@ -627,8 +640,8 @@ static std::vector process_function_call(V v, Code std::vector rvect = code.create_tmp_var(real_ret_type, v->loc, "(fun-call)"); left.insert(left.end(), rvect.begin(), rvect.end()); vars_modification_watcher.trigger_callbacks(left, v->loc); - code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); - local_lval.gen_ops_if_nonempty(code, v->loc); + code.emplace_back(v->loc, Op::_Let, left, rvect_apply); + local_lval.after_let(std::move(left), code, v->loc); rvect_apply = rvect; } @@ -710,7 +723,7 @@ std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* case ast_reference: return pre_compile_symbol(v->loc, v->as()->sym, code, lval_ctx); case ast_assign: - return process_assign(v->as(), code); + return process_assignment(v->as(), code); case ast_set_assign: return process_set_assign(v->as(), code); case ast_binary_operator: diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp index 98996c28..05d543b3 100644 --- a/tolk/pipe-constant-folding.cpp +++ b/tolk/pipe-constant-folding.cpp @@ -25,6 +25,8 @@ * * Currently, it just replaces `-1` (ast_unary_operator ast_int_const) with a number -1 * and `!true` with false. + * Also, all parenthesized `((expr))` are replaced with `expr`, it's a constant transformation. + * (not to handle parenthesized in optimization passes, like `((x)) == true`) * More rich constant folding should be done some day, but even without this, IR optimizations * (operating low-level stack variables) pretty manage to do all related optimizations. * Constant folding in the future, done at AST level, just would slightly reduce amount of work for optimizer. @@ -47,6 +49,14 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { return v_bool; } + AnyExprV replace(V v) override { + AnyExprV inner = parent::replace(v->get_expr()); + if (v->is_lvalue) { + inner->mutate()->assign_lvalue_true(); + } + return inner; + } + AnyExprV replace(V v) override { parent::replace(v); diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index ba5f77a7..abb060a2 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -133,8 +133,8 @@ static void fire_error_cannot_deduce_untyped_tuple_access(SrcLocation loc, int i // fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_put_non1_stack_width_arg_to_tuple(SrcLocation loc, TypePtr inferred_type) { - throw ParseError(loc, "can not put " + to_string(inferred_type) + " into a tuple, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1"); +static void fire_error_tuple_cannot_have_non1_stack_width_elem(SrcLocation loc, TypePtr inferred_type) { + throw ParseError(loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1"); } // check correctness of called arguments counts and their type matching @@ -351,6 +351,8 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { return infer_bool_const(v->as()); case ast_local_vars_declaration: return infer_local_vars_declaration(v->as()); + case ast_local_var_lhs: + return infer_local_var_lhs(v->as()); case ast_assign: return infer_assignment(v->as()); case ast_set_assign: @@ -410,133 +412,71 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { assign_inferred_type(v, TypeDataBool::create()); } - static void infer_local_vars_declaration(V) { - // it can not appear as a standalone expression - // `var ... = rhs` is handled by ast_assign - tolk_assert(false); + void infer_local_vars_declaration(V v) { + infer_any_expr(v->get_expr()); + assign_inferred_type(v, v->get_expr()); + } + + static void infer_local_var_lhs(V v) { + // `var v = rhs`, inferring is called for `v` + // at the moment of inferring left side of assignment, we don't know type of rhs (since lhs is executed first) + // so, mark `v` as unknown + // later, v's inferred_type will be reassigned; see process_assignment_lhs_after_infer_rhs() + if (v->marked_as_redef) { + assign_inferred_type(v, v->var_ref->declared_type); + } else { + assign_inferred_type(v, v->declared_type ? v->declared_type : TypeDataUnknown::create()); + } } void infer_assignment(V v) { // v is assignment: `x = 5` / `var x = 5` / `var x: slice = 5` / `(cs,_) = f()` / `val (a,[b],_) = (a,t,0)` - // it's a tricky node to handle, because to infer rhs, at first we need to create hint from lhs - // and then to apply/check inferred rhs onto lhs - // about a hint: `var i: int = t.tupleAt(0)` is ok, but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` + // execution flow is: lhs first, rhs second (at IR generation, also lhs is evaluated first, unlike FunC) + // after inferring lhs, use it for hint when inferring rhs + // example: `var i: int = t.tupleAt(0)` is ok (hint=int, T=int), but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(rhs, calc_hint_from_assignment_lhs(lhs)); + infer_any_expr(lhs); + infer_any_expr(rhs, lhs->inferred_type); process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, rhs); - assign_inferred_type(v, lhs); - } - - // having assignment like `var (i: int, s) = rhs` (its lhs is local vars declaration), - // create a contextual infer hint for rhs, `(int, unknown)` in this case - // this hint helps to deduce generics and to resolve unknown types while inferring rhs - static TypePtr calc_hint_from_assignment_lhs(AnyExprV lhs) { - // `var ... = rhs` - dig into left part - if (auto lhs_decl = lhs->try_as()) { - return calc_hint_from_assignment_lhs(lhs_decl->get_expr()); - } - - // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") - if (auto lhs_var = lhs->try_as()) { - if (lhs_var->marked_as_redef) { - return lhs_var->var_ref->declared_type; - } - if (lhs_var->declared_type) { - return lhs_var->declared_type; - } - return TypeDataUnknown::create(); - } - - // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) - if (auto lhs_ref = lhs->try_as()) { - if (const auto* var_ref = lhs_ref->sym->try_as()) { - return var_ref->declared_type; - } - if (const auto* glob_ref = lhs_ref->sym->try_as()) { - return glob_ref->declared_type; - } - return TypeDataUnknown::create(); - } - - // `(v1, v2) = rhs` / `var (v1, v2) = rhs` - if (auto lhs_tensor = lhs->try_as()) { - std::vector sub_hints; - sub_hints.reserve(lhs_tensor->size()); - for (AnyExprV item : lhs_tensor->get_items()) { - sub_hints.push_back(calc_hint_from_assignment_lhs(item)); - } - return TypeDataTensor::create(std::move(sub_hints)); - } - - // `[v1, v2] = rhs` / `var [v1, v2] = rhs` - if (auto lhs_tuple = lhs->try_as()) { - std::vector sub_hints; - sub_hints.reserve(lhs_tuple->size()); - for (AnyExprV item : lhs_tuple->get_items()) { - sub_hints.push_back(calc_hint_from_assignment_lhs(item)); - } - return TypeDataTypedTuple::create(std::move(sub_hints)); - } - - // `a.0 = rhs` / `b.1.0 = rhs` (remember, its target is not assigned yet) - if (auto lhs_dot = lhs->try_as()) { - TypePtr obj_hint = calc_hint_from_assignment_lhs(lhs_dot->get_obj()); - std::string_view field_name = lhs_dot->get_field_name(); - if (field_name[0] >= '0' && field_name[0] <= '9') { - int index_at = std::stoi(std::string(field_name)); - if (const auto* t_tensor = obj_hint->try_as(); t_tensor && index_at < t_tensor->size()) { - return t_tensor->items[index_at]; - } - if (const auto* t_tuple = obj_hint->try_as(); t_tuple && index_at < t_tuple->size()) { - return t_tuple->items[index_at]; - } - } - return TypeDataUnknown::create(); - } - - return TypeDataUnknown::create(); + assign_inferred_type(v, rhs); // note, that the resulting type is rhs, not lhs } // handle (and dig recursively) into `var lhs = rhs` + // at this point, both lhs and rhs are already inferred, but lhs newly-declared vars are unknown (unless have declared_type) // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` + // the purpose is to update inferred_type of lhs vars (z, x, y) // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 - void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + static void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + tolk_assert(lhs->inferred_type != nullptr); AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; // `var ... = rhs` - dig into left part if (auto lhs_decl = lhs->try_as()) { process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); - assign_inferred_type(lhs, lhs_decl->get_expr()->inferred_type); return; } // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") if (auto lhs_var = lhs->try_as()) { - TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr) - if (lhs_var->marked_as_redef) { - tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type); - declared_type = lhs_var->var_ref->declared_type; - } - if (declared_type) { + if (lhs_var->inferred_type != TypeDataUnknown::create()) { // it's `var v: int` or redef + TypePtr declared_type = lhs_var->inferred_type; if (!declared_type->can_rhs_be_assigned(rhs_type)) { err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); } - assign_inferred_type(lhs, declared_type); } else { if (rhs_type == TypeDataNullLiteral::create()) { fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); } - assign_inferred_type(lhs, rhs_type); - assign_inferred_type(lhs_var->var_ref, lhs_var->inferred_type); + assign_inferred_type(lhs_var, rhs_type); + assign_inferred_type(lhs_var->var_ref, rhs_type); } return; } // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) if (lhs->try_as()) { - infer_any_expr(lhs); if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); } @@ -554,13 +494,9 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); } V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; - std::vector types_list; - types_list.reserve(lhs_tensor->size()); for (int i = 0; i < lhs_tensor->size(); ++i) { process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); - types_list.push_back(lhs_tensor->get_item(i)->inferred_type); } - assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); return; } @@ -575,25 +511,23 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); } V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; - std::vector types_list; - types_list.reserve(lhs_tuple->size()); for (int i = 0; i < lhs_tuple->size(); ++i) { process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); - types_list.push_back(lhs_tuple->get_item(i)->inferred_type); } - assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); return; } - // `_ = rhs` - if (lhs->type == ast_underscore) { - assign_inferred_type(lhs, TypeDataUnknown::create()); - return; + // check `untypedTuple.0 = rhs_tensor` and other non-1 width elements + if (auto lhs_dot = lhs->try_as()) { + if (lhs_dot->is_target_indexed_access() && lhs_dot->get_obj()->inferred_type == TypeDataTuple::create()) { + if (rhs_type->calc_width_on_stack() != 1) { + fire_error_tuple_cannot_have_non1_stack_width_elem(err_loc->loc, rhs_type); + } + } } - // here is something unhandled like `a.0 = rhs`, run regular inferring on rhs + // here is something unhandled like `a.0 = rhs`, just check type matching // for something strange like `f() = rhs` type inferring will pass, but will fail later - infer_any_expr(lhs, rhs_type); if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); } @@ -895,14 +829,20 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { return; } if (obj_type->try_as()) { - if (hint == nullptr) { - fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at); - } - if (hint->calc_width_on_stack() != 1) { - fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->loc, hint); + TypePtr item_type = nullptr; + if (v->is_lvalue && !hint) { // left side of assignment + item_type = TypeDataUnknown::create(); + } else { + if (hint == nullptr) { + fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at); + } + if (hint->calc_width_on_stack() != 1) { + fire_error_tuple_cannot_have_non1_stack_width_elem(v->loc, hint); + } + item_type = hint; } v->mutate()->assign_target(index_at); - assign_inferred_type(v, hint); + assign_inferred_type(v, item_type); return; } v_ident->error("type " + to_string(obj_type) + " is not indexable"); @@ -1081,7 +1021,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { AnyExprV item = v->get_item(i); infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); if (item->inferred_type->calc_width_on_stack() != 1) { - fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->get_item(i)->loc, item->inferred_type); + fire_error_tuple_cannot_have_non1_stack_width_elem(v->get_item(i)->loc, item->inferred_type); } types_list.emplace_back(item->inferred_type); } diff --git a/tolk/pipe-optimize-boolean-expr.cpp b/tolk/pipe-optimize-boolean-expr.cpp index 03750256..a2e67047 100644 --- a/tolk/pipe-optimize-boolean-expr.cpp +++ b/tolk/pipe-optimize-boolean-expr.cpp @@ -25,7 +25,6 @@ * * Example: `boolVar == true` -> `boolVar`. * Example: `!!boolVar` -> `boolVar`. - * Also in unwraps parenthesis inside if condition and similar: `assert(((x)), 404)` -> `assert(x, 404)` * * todo some day, replace && || with & | when it's safe (currently, && always produces IFs in Fift) * It's tricky to implement whether replacing is safe. @@ -35,13 +34,6 @@ namespace tolk { -static AnyExprV unwrap_parenthesis(AnyExprV v) { - while (v->type == ast_parenthesized_expression) { - v = v->as()->get_expr(); - } - return v; -} - struct OptimizerBooleanExpressionsReplacer final : ASTReplacerInFunctionBody { static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { auto v_int = createV(loc, std::move(intval), {}); @@ -117,9 +109,6 @@ protected: AnyV replace(V v) override { parent::replace(v); - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, v->is_ifnot, unwrap_parenthesis(v->get_cond()), v->get_if_body(), v->get_else_body()); - } // `if (!x)` -> ifnot(x) while (auto v_cond_unary = v->get_cond()->try_as()) { @@ -132,33 +121,6 @@ protected: return v; } - AnyV replace(V v) override { - parent::replace(v); - - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_body()); - } - return v; - } - - AnyV replace(V v) override { - parent::replace(v); - - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, v->get_body(), unwrap_parenthesis(v->get_cond())); - } - return v; - } - - AnyV replace(V v) override { - parent::replace(v); - - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_thrown_code()); - } - return v; - } - public: bool should_visit_function(const FunctionData* fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); From b3b2bd1c3c645a931a9aa6cbfc915c258c9012cc Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 27 Feb 2025 15:18:59 +0300 Subject: [PATCH 54/61] New extra currency behavior (#1539) --- common/global-version.h | 2 +- crypto/block/block.cpp | 29 ++++++ crypto/block/block.h | 2 + crypto/block/block.tlb | 2 +- crypto/block/mc-config.cpp | 1 + crypto/block/mc-config.h | 1 + crypto/block/transaction.cpp | 131 ++++++++++++++++++++++++---- crypto/block/transaction.h | 17 ++-- crypto/vm/boc.cpp | 8 +- crypto/vm/tonops.cpp | 12 ++- doc/GlobalVersions.md | 23 ++++- emulator/transaction-emulator.cpp | 11 ++- validator/impl/collator-impl.h | 14 ++- validator/impl/collator.cpp | 17 ++-- validator/impl/external-message.cpp | 16 ++-- validator/impl/validate-query.cpp | 6 +- validator/impl/validate-query.hpp | 1 + 17 files changed, 228 insertions(+), 65 deletions(-) diff --git a/common/global-version.h b/common/global-version.h index 533e5e8d..2308ce3e 100644 --- a/common/global-version.h +++ b/common/global-version.h @@ -19,6 +19,6 @@ namespace ton { // See doc/GlobalVersions.md -const int SUPPORTED_VERSION = 9; +constexpr int SUPPORTED_VERSION = 10; } diff --git a/crypto/block/block.cpp b/crypto/block/block.cpp index 452d78a2..e0782240 100644 --- a/crypto/block/block.cpp +++ b/crypto/block/block.cpp @@ -1350,6 +1350,35 @@ bool CurrencyCollection::clamp(const CurrencyCollection& other) { return ok || invalidate(); } +bool CurrencyCollection::check_extra_currency_limit(td::uint32 max_currencies) const { + td::uint32 count = 0; + return vm::Dictionary{extra, 32}.check_for_each([&](td::Ref, td::ConstBitPtr, int) { + ++count; + return count <= max_currencies; + }); +} + +bool CurrencyCollection::remove_zero_extra_currencies(Ref& root, td::uint32 max_currencies) { + td::uint32 count = 0; + vm::Dictionary dict{root, 32}; + int res = dict.filter([&](const vm::CellSlice& cs, td::ConstBitPtr, int) -> int { + ++count; + if (count > max_currencies) { + return -1; + } + td::RefInt256 val = tlb::t_VarUInteger_32.as_integer(cs); + if (val.is_null()) { + return -1; + } + return val->sgn() > 0; + }); + if (res < 0) { + return false; + } + root = dict.get_root_cell(); + return true; +} + bool CurrencyCollection::operator==(const CurrencyCollection& other) const { return is_valid() && other.is_valid() && !td::cmp(grams, other.grams) && (extra.not_null() == other.extra.not_null()) && diff --git a/crypto/block/block.h b/crypto/block/block.h index f64f00a8..685005b4 100644 --- a/crypto/block/block.h +++ b/crypto/block/block.h @@ -391,6 +391,8 @@ struct CurrencyCollection { CurrencyCollection operator-(CurrencyCollection&& other) const; CurrencyCollection operator-(td::RefInt256 other_grams) const; bool clamp(const CurrencyCollection& other); + bool check_extra_currency_limit(td::uint32 max_currencies) const; + static bool remove_zero_extra_currencies(Ref& root, td::uint32 max_currencies); bool store(vm::CellBuilder& cb) const; bool store_or_zero(vm::CellBuilder& cb) const; bool fetch(vm::CellSlice& cs); diff --git a/crypto/block/block.tlb b/crypto/block/block.tlb index b8b40827..4a8bbc06 100644 --- a/crypto/block/block.tlb +++ b/crypto/block/block.tlb @@ -801,7 +801,7 @@ size_limits_config#01 max_msg_bits:uint32 max_msg_cells:uint32 max_library_cells max_ext_msg_size:uint32 max_ext_msg_depth:uint16 = SizeLimitsConfig; size_limits_config_v2#02 max_msg_bits:uint32 max_msg_cells:uint32 max_library_cells:uint32 max_vm_data_depth:uint16 max_ext_msg_size:uint32 max_ext_msg_depth:uint16 max_acc_state_cells:uint32 max_acc_state_bits:uint32 - max_acc_public_libraries:uint32 defer_out_queue_size_limit:uint32 = SizeLimitsConfig; + max_acc_public_libraries:uint32 defer_out_queue_size_limit:uint32 max_msg_extra_currencies:uint32 = SizeLimitsConfig; _ SizeLimitsConfig = ConfigParam 43; // key is [ wc:int32 addr:uint256 ] diff --git a/crypto/block/mc-config.cpp b/crypto/block/mc-config.cpp index 48a2d613..0f019b06 100644 --- a/crypto/block/mc-config.cpp +++ b/crypto/block/mc-config.cpp @@ -1960,6 +1960,7 @@ td::Result Config::do_get_size_limits_config(td::Refsgn() == 0 : ap.remaining_balance.is_zero()); ap.acc_status_change = ActionPhase::acst_deleted; - acc_status = Account::acc_deleted; + acc_status = (ap.remaining_balance.is_zero() ? Account::acc_deleted : Account::acc_uninit); was_deleted = true; } ap.success = true; @@ -2472,6 +2472,20 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, LOG(DEBUG) << "invalid destination address in a proposed outbound message"; return check_skip_invalid(36); // invalid destination address } + if (cfg.extra_currency_v2) { + CurrencyCollection value; + if (!value.unpack(info.value)) { + LOG(DEBUG) << "invalid value:ExtraCurrencies in a proposed outbound message"; + return check_skip_invalid(37); // invalid value:CurrencyCollection + } + if (!CurrencyCollection::remove_zero_extra_currencies(value.extra, cfg.size_limits.max_msg_extra_currencies)) { + LOG(DEBUG) << "invalid value:ExtraCurrencies in a proposed outbound message: too many currencies (max " + << cfg.size_limits.max_msg_extra_currencies << ")"; + // Dict should be valid, since it was checked in t_OutListNode.validate_ref, so error here means limit exceeded + return check_skip_invalid(41); // invalid value:CurrencyCollection : too many extra currencies + } + info.value = value.pack(); + } // fetch message pricing info const MsgPrices& msg_prices = cfg.fetch_msg_prices(to_mc || account.is_masterchain()); @@ -2524,7 +2538,7 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, }; add_used_storage(msg.init, 3); // message init add_used_storage(msg.body, 3); // message body (the root cell itself is not counted) - if (!ext_msg) { + if (!ext_msg && !cfg.extra_currency_v2) { add_used_storage(info.value->prefetch_ref(), 0); } auto collect_fine = [&] { @@ -2595,11 +2609,19 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, if (act_rec.mode & 0x80) { // attach all remaining balance to this message - req = ap.remaining_balance; + if (cfg.extra_currency_v2) { + req.grams = ap.remaining_balance.grams; + } else { + req = ap.remaining_balance; + } act_rec.mode &= ~1; // pay fees from attached value } else if (act_rec.mode & 0x40) { // attach all remaining balance of the inbound message (in addition to the original value) - req += msg_balance_remaining; + if (cfg.extra_currency_v2) { + req.grams += msg_balance_remaining.grams; + } else { + req += msg_balance_remaining; + } if (!(act_rec.mode & 1)) { req -= ap.action_fine; if (compute_phase) { @@ -2639,6 +2661,11 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, return check_skip_invalid(37); // not enough grams } + if (cfg.extra_currency_v2 && !req.check_extra_currency_limit(cfg.size_limits.max_msg_extra_currencies)) { + LOG(DEBUG) << "too many extra currencies in the message : max " << cfg.size_limits.max_msg_extra_currencies; + return check_skip_invalid(41); // to many extra currencies + } + Ref new_extra; if (!block::sub_extra_currency(ap.remaining_balance.extra, req.extra, new_extra)) { @@ -2680,7 +2707,11 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, // clear msg_balance_remaining if it has been used if (act_rec.mode & 0xc0) { - msg_balance_remaining.set_zero(); + if (cfg.extra_currency_v2) { + msg_balance_remaining.grams = td::zero_refint(); + } else { + msg_balance_remaining.set_zero(); + } } // update balance @@ -2754,8 +2785,13 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, ap.total_fwd_fees += fees_total; if ((act_rec.mode & 0xa0) == 0xa0) { - CHECK(ap.remaining_balance.is_zero()); - ap.acc_delete_req = ap.reserved_balance.is_zero(); + if (cfg.extra_currency_v2) { + CHECK(ap.remaining_balance.grams->sgn() == 0); + ap.acc_delete_req = ap.reserved_balance.grams->sgn() == 0; + } else { + CHECK(ap.remaining_balance.is_zero()); + ap.acc_delete_req = ap.reserved_balance.is_zero(); + } } ap.tot_msg_bits += sstat.bits + new_msg_bits; @@ -3026,7 +3062,8 @@ bool Transaction::prepare_bounce_phase(const ActionPhaseConfig& cfg) { bp.fwd_fees -= bp.fwd_fees_collected; total_fees += td::make_refint(bp.fwd_fees_collected); // serialize outbound message - info.created_lt = end_lt++; + info.created_lt = start_lt + 1 + out_msgs.size(); + end_lt++; info.created_at = now; vm::CellBuilder cb; CHECK(cb.store_long_bool(5, 4) // int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool @@ -3107,6 +3144,7 @@ bool Account::store_acc_status(vm::CellBuilder& cb, int acc_status) const { * Tries to update the storage statistics based on the old storage statistics and old account state without fully recomputing it. * * It succeeds if only root cell of AccountStorage is changed. + * old_cs and new_cell are AccountStorage without extra currencies (if global_version >= 10). * * @param old_stat The old storage statistics. * @param old_cs The old AccountStorage. @@ -3140,13 +3178,48 @@ static td::optional try_update_storage_stat(const vm::CellS return new_stat; } +/** + * Removes extra currencies dict from AccountStorage. + * + * This is used for computing account storage stats. + * + * @param storage_cs AccountStorage as CellSlice. + * + * @returns AccountStorage without extra currencies as Cell. + */ +static td::Ref storage_without_extra_currencies(td::Ref storage_cs) { + block::gen::AccountStorage::Record rec; + if (!block::gen::csr_unpack(storage_cs, rec)) { + LOG(ERROR) << "failed to unpack AccountStorage"; + return {}; + } + if (rec.balance->size_refs() > 0) { + block::gen::CurrencyCollection::Record balance; + if (!block::gen::csr_unpack(rec.balance, balance)) { + LOG(ERROR) << "failed to unpack AccountStorage"; + return {}; + } + balance.other = vm::CellBuilder{}.store_zeroes(1).as_cellslice_ref(); + if (!block::gen::csr_pack(rec.balance, balance)) { + LOG(ERROR) << "failed to pack AccountStorage"; + return {}; + } + } + td::Ref cell; + if (!block::gen::pack_cell(cell, rec)) { + LOG(ERROR) << "failed to pack AccountStorage"; + return {}; + } + return cell; +} + namespace transaction { /** * Computes the new state of the account. * * @returns True if the state computation is successful, false otherwise. */ -bool Transaction::compute_state() { +bool Transaction::compute_state(const SerializeConfig& cfg) { if (new_total_state.not_null()) { return true; } @@ -3218,13 +3291,27 @@ bool Transaction::compute_state() { new_inner_state.clear(); } vm::CellStorageStat& stats = new_storage_stat; - auto new_stats = try_update_storage_stat(account.storage_stat, account.storage, storage); + td::Ref old_storage_for_stat = account.storage; + td::Ref new_storage_for_stat = storage; + if (cfg.extra_currency_v2) { + new_storage_for_stat = storage_without_extra_currencies(new_storage); + if (new_storage_for_stat.is_null()) { + return false; + } + if (old_storage_for_stat.not_null()) { + old_storage_for_stat = vm::load_cell_slice_ref(storage_without_extra_currencies(old_storage_for_stat)); + if (old_storage_for_stat.is_null()) { + return false; + } + } + } + auto new_stats = try_update_storage_stat(account.storage_stat, old_storage_for_stat, storage); if (new_stats) { stats = new_stats.unwrap(); } else { TD_PERF_COUNTER(transaction_storage_stat_b); td::Timer timer; - stats.add_used_storage(Ref(storage)).ensure(); + stats.add_used_storage(new_storage_for_stat).ensure(); if (timer.elapsed() > 0.1) { LOG(INFO) << "Compute used storage took " << timer.elapsed() << "s"; } @@ -3260,11 +3347,11 @@ bool Transaction::compute_state() { * * @returns True if the serialization is successful, False otherwise. */ -bool Transaction::serialize() { +bool Transaction::serialize(const SerializeConfig& cfg) { if (root.not_null()) { return true; } - if (!compute_state()) { + if (!compute_state(cfg)) { return false; } vm::Dictionary dict{15}; @@ -3730,6 +3817,7 @@ bool Account::libraries_changed() const { * @param rand_seed Pointer to the random seed. Generates a new seed if the value is `td::Bits256::zero()`. * @param compute_phase_cfg Pointer to store the compute phase configuration. * @param action_phase_cfg Pointer to store the action phase configuration. + * @param serialize_cfg Pointer to store the serialize phase configuration. * @param masterchain_create_fee Pointer to store the masterchain create fee. * @param basechain_create_fee Pointer to store the basechain create fee. * @param wc The workchain ID. @@ -3738,15 +3826,15 @@ bool Account::libraries_changed() const { td::Status FetchConfigParams::fetch_config_params( const block::ConfigInfo& config, Ref* old_mparams, std::vector* storage_prices, StoragePhaseConfig* storage_phase_cfg, td::BitArray<256>* rand_seed, ComputePhaseConfig* compute_phase_cfg, - ActionPhaseConfig* action_phase_cfg, td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, - ton::WorkchainId wc, ton::UnixTime now) { + ActionPhaseConfig* action_phase_cfg, SerializeConfig* serialize_cfg, td::RefInt256* masterchain_create_fee, + td::RefInt256* basechain_create_fee, ton::WorkchainId wc, ton::UnixTime now) { auto prev_blocks_info = config.get_prev_blocks_info(); if (prev_blocks_info.is_error()) { return prev_blocks_info.move_as_error_prefix( td::Status::Error(-668, "cannot fetch prev blocks info from masterchain configuration: ")); } return fetch_config_params(config, prev_blocks_info.move_as_ok(), old_mparams, storage_prices, storage_phase_cfg, - rand_seed, compute_phase_cfg, action_phase_cfg, masterchain_create_fee, + rand_seed, compute_phase_cfg, action_phase_cfg, serialize_cfg, masterchain_create_fee, basechain_create_fee, wc, now); } @@ -3761,6 +3849,7 @@ td::Status FetchConfigParams::fetch_config_params( * @param rand_seed Pointer to the random seed. Generates a new seed if the value is `td::Bits256::zero()`. * @param compute_phase_cfg Pointer to store the compute phase configuration. * @param action_phase_cfg Pointer to store the action phase configuration. + * @param serialize_cfg Pointer to store the serialize phase configuration. * @param masterchain_create_fee Pointer to store the masterchain create fee. * @param basechain_create_fee Pointer to store the basechain create fee. * @param wc The workchain ID. @@ -3770,8 +3859,8 @@ td::Status FetchConfigParams::fetch_config_params( const block::Config& config, td::Ref prev_blocks_info, Ref* old_mparams, std::vector* storage_prices, StoragePhaseConfig* storage_phase_cfg, td::BitArray<256>* rand_seed, ComputePhaseConfig* compute_phase_cfg, ActionPhaseConfig* action_phase_cfg, - td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, ton::WorkchainId wc, - ton::UnixTime now) { + SerializeConfig* serialize_cfg, td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, + ton::WorkchainId wc, ton::UnixTime now) { *old_mparams = config.get_config_param(9); { auto res = config.get_storage_prices(); @@ -3843,6 +3932,10 @@ td::Status FetchConfigParams::fetch_config_params( action_phase_cfg->disable_custom_fess = config.get_global_version() >= 8; action_phase_cfg->reserve_extra_enabled = config.get_global_version() >= 9; action_phase_cfg->mc_blackhole_addr = config.get_burning_config().blackhole_addr; + action_phase_cfg->extra_currency_v2 = config.get_global_version() >= 10; + } + { + serialize_cfg->extra_currency_v2 = config.get_global_version() >= 10; } { // fetch block_grams_created diff --git a/crypto/block/transaction.h b/crypto/block/transaction.h index 0f6952dc..8e612e6a 100644 --- a/crypto/block/transaction.h +++ b/crypto/block/transaction.h @@ -170,12 +170,17 @@ struct ActionPhaseConfig { bool message_skip_enabled{false}; bool disable_custom_fess{false}; bool reserve_extra_enabled{false}; + bool extra_currency_v2{false}; td::optional mc_blackhole_addr; const MsgPrices& fetch_msg_prices(bool is_masterchain) const { return is_masterchain ? fwd_mc : fwd_std; } }; +struct SerializeConfig { + bool extra_currency_v2{false}; +}; + struct CreditPhase { td::RefInt256 due_fees_collected; block::CurrencyCollection credit; @@ -389,8 +394,8 @@ struct Transaction { bool prepare_action_phase(const ActionPhaseConfig& cfg); td::Status check_state_limits(const SizeLimitsConfig& size_limits, bool update_storage_stat = true); bool prepare_bounce_phase(const ActionPhaseConfig& cfg); - bool compute_state(); - bool serialize(); + bool compute_state(const SerializeConfig& cfg); + bool serialize(const SerializeConfig& cfg); td::uint64 gas_used() const { return compute_phase ? compute_phase->gas_used : 0; } @@ -428,14 +433,14 @@ struct FetchConfigParams { std::vector* storage_prices, StoragePhaseConfig* storage_phase_cfg, td::BitArray<256>* rand_seed, ComputePhaseConfig* compute_phase_cfg, ActionPhaseConfig* action_phase_cfg, - td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, - ton::WorkchainId wc, ton::UnixTime now); + SerializeConfig* serialize_cfg, td::RefInt256* masterchain_create_fee, + td::RefInt256* basechain_create_fee, ton::WorkchainId wc, ton::UnixTime now); static td::Status fetch_config_params(const block::Config& config, Ref prev_blocks_info, Ref* old_mparams, std::vector* storage_prices, StoragePhaseConfig* storage_phase_cfg, td::BitArray<256>* rand_seed, ComputePhaseConfig* compute_phase_cfg, ActionPhaseConfig* action_phase_cfg, - td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, - ton::WorkchainId wc, ton::UnixTime now); + SerializeConfig* serialize_cfg, td::RefInt256* masterchain_create_fee, + td::RefInt256* basechain_create_fee, ton::WorkchainId wc, ton::UnixTime now); }; } // namespace block diff --git a/crypto/vm/boc.cpp b/crypto/vm/boc.cpp index 7ec8bdd1..72afb998 100644 --- a/crypto/vm/boc.cpp +++ b/crypto/vm/boc.cpp @@ -1153,8 +1153,12 @@ td::Result CellStorageStat::add_used_storage(Refsecond; } } - vm::CellSlice cs{vm::NoVm{}, std::move(cell)}; - return add_used_storage(std::move(cs), kill_dup, skip_count_root); + vm::CellSlice cs{vm::NoVm{}, cell}; + TRY_RESULT(res, add_used_storage(std::move(cs), kill_dup, skip_count_root)); + if (kill_dup) { + seen[cell->get_hash()] = res; + } + return res; } void NewCellStorageStat::add_cell(Ref cell) { diff --git a/crypto/vm/tonops.cpp b/crypto/vm/tonops.cpp index 5d90b8fd..aab1711f 100644 --- a/crypto/vm/tonops.cpp +++ b/crypto/vm/tonops.cpp @@ -1761,6 +1761,10 @@ int exec_send_message(VmState* st) { vm::VmStorageStat stat(max_cells); CellSlice cs = load_cell_slice(msg_cell); cs.skip_first(cs.size()); + if (st->get_global_version() >= 10 && have_extra_currencies) { + // Skip extra currency dict + cs.advance_refs(1); + } stat.add_storage(cs); if (!ext_msg) { @@ -1773,7 +1777,9 @@ int exec_send_message(VmState* st) { if (value.is_null()) { throw VmError{Excno::type_chk, "invalid param BALANCE"}; } - have_extra_currencies |= !tuple_index(balance, 1).as_cell().is_null(); + if (st->get_global_version() < 10) { + have_extra_currencies |= !tuple_index(balance, 1).as_cell().is_null(); + } } else if (mode & 64) { // value += value of incoming message Ref balance = get_param(st, 11).as_tuple(); if (balance.is_null()) { @@ -1784,7 +1790,9 @@ int exec_send_message(VmState* st) { throw VmError{Excno::type_chk, "invalid param INCOMINGVALUE"}; } value += balance_grams; - have_extra_currencies |= !tuple_index(balance, 1).as_cell().is_null(); + if (st->get_global_version() < 10) { + have_extra_currencies |= !tuple_index(balance, 1).as_cell().is_null(); + } } } diff --git a/doc/GlobalVersions.md b/doc/GlobalVersions.md index f4156ca0..77963e95 100644 --- a/doc/GlobalVersions.md +++ b/doc/GlobalVersions.md @@ -134,4 +134,25 @@ Example: if the last masterchain block seqno is `19071` then the list contains b - `PFXDICTADD`, `PFXDICTSET`, `PFXDICTREPLACE`, `PFXDICTDEL`, `GETGASFEE`, `GETSTORAGEFEE`, `GETFORWARDFEE`, `GETORIGINALFWDFEE`, `GETGASFEESIMPLE`, `GETFORWARDFEESIMPLE`, `HASHEXT` - Now setting the contract code to a library cell does not consume additional gas on execution of the code. - Temporary increase gas limit for some accounts (see [this post](https://t.me/tondev_news/129) for details, `override_gas_limit` in `transaction.cpp` for the list of accounts). -- Fix recursive jump to continuations with non-null control data. \ No newline at end of file +- Fix recursive jump to continuations with non-null control data. + +## Version 10 + +### Extra currencies +- Internal messages cannot carry more than 2 different extra currencies. The limit can be changed in size limits config (`ConfigParam 43`). +- Amount of an extra currency in an output action "send message" can be zero. + - In action phase zero values are automatically deleted from the dictionary before sending. + - However, the size of the extra currency dictionary in the "send message" action should not be greater than 2 (or the value in size limits config). +- Extra currency dictionary is not counted in message size and does not affect message fees. +- Message mode `+64` (carry all remaining message balance) is now considered as "carry all remaining TONs from message balance". +- Message mode `+128` (carry all remaining account balance) is now considered as "carry all remaining TONs from account balance". +- Message mode `+32` (delete account if balance is zero) deletes account if it has zero TONs, regardless of extra currencies. + - Deleted accounts with extra currencies become `account_uninit`, extra currencies remain on the account. +- `SENDMSG` in TVM calculates message size and fees without extra currencies, uses new `+64` and `+128` mode behavior. + - `SENDMSG` does not check the number of extra currencies. +- Extra currency dictionary is not counted in the account size and does not affect storage fees. + - Accounts with already existing extra currencies will get their sizes recomputed without EC only after modifying `AccountState`. + +### TVM changes +- `SENDMSG` calculates messages size and fees without extra currencies, uses new +64 and +128 mode behavior. + - `SENDMSG` does not check the number of extra currencies. diff --git a/emulator/transaction-emulator.cpp b/emulator/transaction-emulator.cpp index e87b2dfb..6267f9bd 100644 --- a/emulator/transaction-emulator.cpp +++ b/emulator/transaction-emulator.cpp @@ -16,6 +16,7 @@ td::Result> TransactionEmu block::StoragePhaseConfig storage_phase_cfg{&storage_prices}; block::ComputePhaseConfig compute_phase_cfg; block::ActionPhaseConfig action_phase_cfg; + block::SerializeConfig serialize_config; td::RefInt256 masterchain_create_fee, basechain_create_fee; if (!utime) { @@ -25,11 +26,9 @@ td::Result> TransactionEmu utime = (unsigned)std::time(nullptr); } - auto fetch_res = block::FetchConfigParams::fetch_config_params(*config_, prev_blocks_info_, &old_mparams, - &storage_prices, &storage_phase_cfg, - &rand_seed_, &compute_phase_cfg, - &action_phase_cfg, &masterchain_create_fee, - &basechain_create_fee, account.workchain, utime); + auto fetch_res = block::FetchConfigParams::fetch_config_params( + *config_, prev_blocks_info_, &old_mparams, &storage_prices, &storage_phase_cfg, &rand_seed_, &compute_phase_cfg, + &action_phase_cfg, &serialize_config, &masterchain_create_fee, &basechain_create_fee, account.workchain, utime); if(fetch_res.is_error()) { return fetch_res.move_as_error_prefix("cannot fetch config params "); } @@ -66,7 +65,7 @@ td::Result> TransactionEmu return std::make_unique(std::move(vm_log), vm_exit_code, elapsed); } - if (!trans->serialize()) { + if (!trans->serialize(serialize_config)) { return td::Status::Error(-669,"cannot serialize new transaction for smart contract "s + trans->account.addr.to_hex()); } diff --git a/validator/impl/collator-impl.h b/validator/impl/collator-impl.h index ce21bc5e..340e3a40 100644 --- a/validator/impl/collator-impl.h +++ b/validator/impl/collator-impl.h @@ -109,14 +109,11 @@ class Collator final : public td::actor::Actor { return 2; } - static td::Result> - impl_create_ordinary_transaction(Ref msg_root, - block::Account* acc, - UnixTime utime, LogicalTime lt, - block::StoragePhaseConfig* storage_phase_cfg, - block::ComputePhaseConfig* compute_phase_cfg, - block::ActionPhaseConfig* action_phase_cfg, - bool external, LogicalTime after_lt); + static td::Result> impl_create_ordinary_transaction( + Ref msg_root, block::Account* acc, UnixTime utime, LogicalTime lt, + block::StoragePhaseConfig* storage_phase_cfg, block::ComputePhaseConfig* compute_phase_cfg, + block::ActionPhaseConfig* action_phase_cfg, block::SerializeConfig* serialize_cfg, bool external, + LogicalTime after_lt); private: void start_up() override; @@ -177,6 +174,7 @@ class Collator final : public td::actor::Actor { block::StoragePhaseConfig storage_phase_cfg_{&storage_prices_}; block::ComputePhaseConfig compute_phase_cfg_; block::ActionPhaseConfig action_phase_cfg_; + block::SerializeConfig serialize_cfg_; td::RefInt256 masterchain_create_fee_, basechain_create_fee_; std::unique_ptr block_limits_; std::unique_ptr block_limit_status_; diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index d5c41853..2a6d7a2b 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -1995,12 +1995,9 @@ bool Collator::init_lt() { * @returns True if the configuration parameters were successfully fetched and initialized, false otherwise. */ bool Collator::fetch_config_params() { - auto res = block::FetchConfigParams::fetch_config_params(*config_, - &old_mparams_, &storage_prices_, &storage_phase_cfg_, - &rand_seed_, &compute_phase_cfg_, &action_phase_cfg_, - &masterchain_create_fee_, &basechain_create_fee_, - workchain(), now_ - ); + auto res = block::FetchConfigParams::fetch_config_params( + *config_, &old_mparams_, &storage_prices_, &storage_phase_cfg_, &rand_seed_, &compute_phase_cfg_, + &action_phase_cfg_, &serialize_cfg_, &masterchain_create_fee_, &basechain_create_fee_, workchain(), now_); if (res.is_error()) { return fatal_error(res.move_as_error()); } @@ -2750,7 +2747,7 @@ bool Collator::create_ticktock_transaction(const ton::StdSmcAddress& smc_addr, t return fatal_error(td::Status::Error( -666, std::string{"cannot create action phase of a new transaction for smart contract "} + smc_addr.to_hex())); } - if (!trans->serialize()) { + if (!trans->serialize(serialize_cfg_)) { return fatal_error(td::Status::Error( -666, std::string{"cannot serialize new transaction for smart contract "} + smc_addr.to_hex())); } @@ -2834,7 +2831,7 @@ Ref Collator::create_ordinary_transaction(Ref msg_root, after_lt = std::max(after_lt, it->second); } auto res = impl_create_ordinary_transaction(msg_root, acc, now_, start_lt, &storage_phase_cfg_, &compute_phase_cfg_, - &action_phase_cfg_, external, after_lt); + &action_phase_cfg_, &serialize_cfg_, external, after_lt); if (res.is_error()) { auto error = res.move_as_error(); if (error.code() == -701) { @@ -2885,6 +2882,7 @@ Ref Collator::create_ordinary_transaction(Ref msg_root, * @param storage_phase_cfg The configuration for the storage phase of the transaction. * @param compute_phase_cfg The configuration for the compute phase of the transaction. * @param action_phase_cfg The configuration for the action phase of the transaction. + * @param serialize_cfg The configuration for the serialization of the transaction. * @param external Flag indicating if the message is external. * @param after_lt The logical time after which the transaction should occur. Used only for external messages. * @@ -2898,6 +2896,7 @@ td::Result> Collator::impl_crea block::StoragePhaseConfig* storage_phase_cfg, block::ComputePhaseConfig* compute_phase_cfg, block::ActionPhaseConfig* action_phase_cfg, + block::SerializeConfig* serialize_cfg, bool external, LogicalTime after_lt) { if (acc->last_trans_end_lt_ >= lt && acc->transactions.empty()) { return td::Status::Error(-669, PSTRING() << "last transaction time in the state of account " << acc->workchain @@ -2965,7 +2964,7 @@ td::Result> Collator::impl_crea return td::Status::Error( -669, "cannot create bounce phase of a new transaction for smart contract "s + acc->addr.to_hex()); } - if (!trans->serialize()) { + if (!trans->serialize(*serialize_cfg)) { return td::Status::Error(-669, "cannot serialize new transaction for smart contract "s + acc->addr.to_hex()); } return std::move(trans); diff --git a/validator/impl/external-message.cpp b/validator/impl/external-message.cpp index 2fdb491b..8b1f5eb7 100644 --- a/validator/impl/external-message.cpp +++ b/validator/impl/external-message.cpp @@ -136,13 +136,12 @@ td::Status ExtMessageQ::run_message_on_account(ton::WorkchainId wc, td::BitArray<256> rand_seed_; block::ComputePhaseConfig compute_phase_cfg_; block::ActionPhaseConfig action_phase_cfg_; + block::SerializeConfig serialize_config_; td::RefInt256 masterchain_create_fee, basechain_create_fee; - auto fetch_res = block::FetchConfigParams::fetch_config_params(*config, &old_mparams, - &storage_prices_, &storage_phase_cfg_, - &rand_seed_, &compute_phase_cfg_, - &action_phase_cfg_, &masterchain_create_fee, - &basechain_create_fee, wc, utime); + auto fetch_res = block::FetchConfigParams::fetch_config_params( + *config, &old_mparams, &storage_prices_, &storage_phase_cfg_, &rand_seed_, &compute_phase_cfg_, + &action_phase_cfg_, &serialize_config_, &masterchain_create_fee, &basechain_create_fee, wc, utime); if(fetch_res.is_error()) { auto error = fetch_res.move_as_error(); LOG(DEBUG) << "Cannot fetch config params: " << error.message(); @@ -152,10 +151,9 @@ td::Status ExtMessageQ::run_message_on_account(ton::WorkchainId wc, compute_phase_cfg_.with_vm_log = true; compute_phase_cfg_.stop_on_accept_message = true; - auto res = Collator::impl_create_ordinary_transaction(msg_root, acc, utime, lt, - &storage_phase_cfg_, &compute_phase_cfg_, - &action_phase_cfg_, - true, lt); + auto res = + Collator::impl_create_ordinary_transaction(msg_root, acc, utime, lt, &storage_phase_cfg_, &compute_phase_cfg_, + &action_phase_cfg_, &serialize_config_, true, lt); if(res.is_error()) { auto error = res.move_as_error(); LOG(DEBUG) << "Cannot run message on account: " << error.message(); diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index 31c30e90..90966d82 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -1004,6 +1004,10 @@ bool ValidateQuery::fetch_config_params() { action_phase_cfg_.disable_custom_fess = config_->get_global_version() >= 8; action_phase_cfg_.reserve_extra_enabled = config_->get_global_version() >= 9; action_phase_cfg_.mc_blackhole_addr = config_->get_burning_config().blackhole_addr; + action_phase_cfg_.extra_currency_v2 = config_->get_global_version() >= 10; + } + { + serialize_cfg_.extra_currency_v2 = config_->get_global_version() >= 10; } { // fetch block_grams_created @@ -5608,7 +5612,7 @@ bool ValidateQuery::check_one_transaction(block::Account& account, ton::LogicalT return reject_query(PSTRING() << "cannot re-create bounce phase of transaction " << lt << " for smart contract " << addr.to_hex()); } - if (!trs->serialize()) { + if (!trs->serialize(serialize_cfg_)) { return reject_query(PSTRING() << "cannot re-create the serialization of transaction " << lt << " for smart contract " << addr.to_hex()); } diff --git a/validator/impl/validate-query.hpp b/validator/impl/validate-query.hpp index 90c368ff..60f0cc8a 100644 --- a/validator/impl/validate-query.hpp +++ b/validator/impl/validate-query.hpp @@ -205,6 +205,7 @@ class ValidateQuery : public td::actor::Actor { block::StoragePhaseConfig storage_phase_cfg_{&storage_prices_}; block::ComputePhaseConfig compute_phase_cfg_; block::ActionPhaseConfig action_phase_cfg_; + block::SerializeConfig serialize_cfg_; td::RefInt256 masterchain_create_fee_, basechain_create_fee_; std::vector neighbors_; From 44e7e091b28a15b362def30ff62602158e28b7f0 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 27 Feb 2025 15:41:21 +0300 Subject: [PATCH 55/61] Use HashSet/HashMap in storage stat (#1540) --- crypto/vm/boc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/vm/boc.h b/crypto/vm/boc.h index 8adf240f..17e7eb69 100644 --- a/crypto/vm/boc.h +++ b/crypto/vm/boc.h @@ -101,9 +101,9 @@ class NewCellStorageStat { private: const CellUsageTree* usage_tree_; - std::set seen_; + td::HashSet seen_; Stat stat_; - std::set proof_seen_; + td::HashSet proof_seen_; Stat proof_stat_; const NewCellStorageStat* parent_{nullptr}; @@ -117,7 +117,7 @@ struct CellStorageStat { struct CellInfo { td::uint32 max_merkle_depth = 0; }; - std::map seen; + td::HashMap seen; CellStorageStat() : cells(0), bits(0), public_cells(0) { } explicit CellStorageStat(unsigned long long limit_cells) @@ -173,7 +173,7 @@ class ProofStorageStat { enum CellStatus { c_none = 0, c_prunned = 1, c_loaded = 2 }; - std::map cells_; + td::HashMap cells_; td::uint64 proof_size_ = 0; }; From f3e620f48ce56f766791870a81463bc46da3ffa7 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 24 Feb 2025 20:13:36 +0300 Subject: [PATCH 56/61] [Tolk] Nullable types `T?` and null safety This commit introduces nullable types `T?` that are distinct from non-nullable `T`. Example: `int?` (int or null) and `int` are different now. Previously, `null` could be assigned to any primitive type. Now, it can be assigned only to `T?`. A non-null assertion operator `!` was also introduced, similar to `!` in TypeScript and `!!` in Kotlin. If `int?` still occupies 1 stack slot, `(int,int)?` and other nullable tensors occupy N+1 slots, the last for "null precedence". `v == null` actually compares that slot. Assigning `(int,int)` to `(int,int)?` implicitly creates a null presence slot. Assigning `null` to `(int,int)?` widens this null value to 3 slots. This is called "type transitioning". All stdlib functions prototypes have been updated to reflect whether they return/accept a nullable or a strict value. This commit also contains refactoring from `const FunctionData*` to `FunctionPtr` and similar. --- crypto/smartcont/tolk-stdlib/common.tolk | 20 +- crypto/smartcont/tolk-stdlib/lisp-lists.tolk | 9 +- crypto/smartcont/tolk-stdlib/tvm-dicts.tolk | 139 ++--- tolk-tester/tests/a10.tolk | 2 +- tolk-tester/tests/assignment-tests.tolk | 4 +- tolk-tester/tests/c2.tolk | 2 +- tolk-tester/tests/cells-slices.tolk | 11 +- tolk-tester/tests/dicts-demo.tolk | 34 +- tolk-tester/tests/generics-1.tolk | 8 +- tolk-tester/tests/imports/use-dicts.tolk | 14 +- tolk-tester/tests/indexed-access.tolk | 27 +- tolk-tester/tests/inference-tests.tolk | 8 +- tolk-tester/tests/invalid-generics-1.tolk | 2 +- tolk-tester/tests/invalid-generics-13.tolk | 11 + tolk-tester/tests/invalid-mutate-18.tolk | 10 + tolk-tester/tests/invalid-mutate-19.tolk | 10 + tolk-tester/tests/invalid-mutate-20.tolk | 13 + tolk-tester/tests/invalid-typing-14.tolk | 14 + tolk-tester/tests/invalid-typing-15.tolk | 13 + tolk-tester/tests/invalid-typing-16.tolk | 13 + tolk-tester/tests/invalid-typing-17.tolk | 10 + tolk-tester/tests/invalid-typing-18.tolk | 16 + tolk-tester/tests/logical-operators.tolk | 9 +- tolk-tester/tests/null-keyword.tolk | 51 +- tolk-tester/tests/nullable-tensors.tolk | 474 +++++++++++++++++ tolk-tester/tests/nullable-types.tolk | 109 ++++ tolk-tester/tests/use-before-declare.tolk | 4 +- tolk-tester/tests/var-apply.tolk | 41 ++ tolk/abscode.cpp | 6 +- tolk/ast-from-tokens.cpp | 44 +- tolk/ast-replacer.h | 12 +- tolk/ast-replicator.h | 8 + tolk/ast-stringifier.h | 4 + tolk/ast-visitor.h | 12 +- tolk/ast.cpp | 22 +- tolk/ast.h | 68 ++- tolk/codegen.cpp | 4 +- tolk/compiler-state.cpp | 2 +- tolk/compiler-state.h | 8 +- tolk/constant-evaluator.cpp | 2 +- tolk/fwd-declarations.h | 5 + tolk/generics-helpers.cpp | 162 +++--- tolk/generics-helpers.h | 42 +- tolk/pipe-ast-to-legacy.cpp | 527 ++++++++++++++----- tolk/pipe-calc-rvalue-lvalue.cpp | 16 +- tolk/pipe-check-pure-impure.cpp | 4 +- tolk/pipe-check-rvalue-lvalue.cpp | 28 +- tolk/pipe-constant-folding.cpp | 13 +- tolk/pipe-detect-unreachable.cpp | 6 +- tolk/pipe-find-unused-symbols.cpp | 6 +- tolk/pipe-generate-fif-output.cpp | 8 +- tolk/pipe-infer-types-and-calls.cpp | 277 ++++++---- tolk/pipe-optimize-boolean-expr.cpp | 11 +- tolk/pipe-refine-lvalue-for-mutate.cpp | 8 +- tolk/pipe-register-symbols.cpp | 6 +- tolk/pipe-resolve-identifiers.cpp | 22 +- tolk/pipeline.h | 8 +- tolk/symtable.cpp | 6 +- tolk/symtable.h | 19 +- tolk/tolk.h | 18 +- tolk/type-system.cpp | 191 ++++--- tolk/type-system.h | 80 ++- 62 files changed, 2031 insertions(+), 702 deletions(-) create mode 100644 tolk-tester/tests/invalid-generics-13.tolk create mode 100644 tolk-tester/tests/invalid-mutate-18.tolk create mode 100644 tolk-tester/tests/invalid-mutate-19.tolk create mode 100644 tolk-tester/tests/invalid-mutate-20.tolk create mode 100644 tolk-tester/tests/invalid-typing-14.tolk create mode 100644 tolk-tester/tests/invalid-typing-15.tolk create mode 100644 tolk-tester/tests/invalid-typing-16.tolk create mode 100644 tolk-tester/tests/invalid-typing-17.tolk create mode 100644 tolk-tester/tests/invalid-typing-18.tolk create mode 100644 tolk-tester/tests/nullable-tensors.tolk create mode 100644 tolk-tester/tests/nullable-types.tolk diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index 5311ec2f..ba1e6c14 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -139,7 +139,7 @@ fun getMyOriginalBalance(): int /// `int` — balance in nanotoncoins; /// `cell` — a dictionary with 32-bit keys representing the balance of "extra currencies". @pure -fun getMyOriginalBalanceWithExtraCurrencies(): [int, cell] +fun getMyOriginalBalanceWithExtraCurrencies(): [int, cell?] asm "BALANCE"; /// Returns the logical time of the current transaction. @@ -154,7 +154,7 @@ fun getCurrentBlockLogicalTime(): int /// Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. @pure -fun getBlockchainConfigParam(x: int): cell +fun getBlockchainConfigParam(x: int): cell? asm "CONFIGOPTPARAM"; /// Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. @@ -291,7 +291,7 @@ fun calculateSliceSizeStrict(s: slice, maxCells: int): (int, int, int) /// otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. /// If [c] is a `null` instead of a cell, returns zero. @pure -fun getCellDepth(c: cell): int +fun getCellDepth(c: cell?): int asm "CDEPTH"; /// Returns the depth of `slice` [s]. @@ -417,12 +417,12 @@ fun getLastBits(self: slice, len: int): slice /// Loads a dictionary (TL HashMapE structure, represented as TVM cell) from a slice. /// Returns `null` if `nothing` constructor is used. @pure -fun loadDict(mutate self: slice): cell +fun loadDict(mutate self: slice): cell? asm( -> 1 0) "LDDICT"; /// Preloads a dictionary (cell) from a slice. @pure -fun preloadDict(self: slice): cell +fun preloadDict(self: slice): cell? asm "PLDDICT"; /// Loads a dictionary as [loadDict], but returns only the remainder of the slice. @@ -433,12 +433,12 @@ fun skipDict(mutate self: slice): self /// Loads (Maybe ^Cell) from a slice. /// In other words, loads 1 bit: if it's true, loads the first ref, otherwise returns `null`. @pure -fun loadMaybeRef(mutate self: slice): cell +fun loadMaybeRef(mutate self: slice): cell? asm( -> 1 0) "LDOPTREF"; /// Preloads (Maybe ^Cell) from a slice. @pure -fun preloadMaybeRef(self: slice): cell +fun preloadMaybeRef(self: slice): cell? asm "PLDOPTREF"; /// Loads (Maybe ^Cell), but returns only the remainder of the slice. @@ -497,13 +497,13 @@ fun storeBool(mutate self: builder, x: bool): self /// Stores dictionary (represented by TVM `cell` or `null`) into a builder. /// In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. @pure -fun storeDict(mutate self: builder, c: cell): self +fun storeDict(mutate self: builder, c: cell?): self asm(c self) "STDICT"; /// Stores (Maybe ^Cell) into a builder. /// In other words, if cell is `null`, store '0' bit; otherwise, store '1' and a ref to [c]. @pure -fun storeMaybeRef(mutate self: builder, c: cell): self +fun storeMaybeRef(mutate self: builder, c: cell?): self asm(c self) "STOPTREF"; /// Concatenates two builders. @@ -661,7 +661,7 @@ fun reserveToncoinsOnBalance(nanoTonCoins: int, reserveMode: int): void /// Similar to [reserveToncoinsOnBalance], but also accepts a dictionary extraAmount (represented by a cell or null) /// with extra currencies. In this way currencies other than Toncoin can be reserved. -fun reserveExtraCurrenciesOnBalance(nanoTonCoins: int, extraAmount: cell, reserveMode: int): void +fun reserveExtraCurrenciesOnBalance(nanoTonCoins: int, extraAmount: cell?, reserveMode: int): void asm "RAWRESERVEX"; diff --git a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk index 0cb17841..af8b6bd7 100644 --- a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk +++ b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk @@ -14,17 +14,18 @@ fun createEmptyList(): tuple /// Adds an element to the beginning of lisp-style list. /// Note, that it does not mutate the list: instead, it returns a new one (it's a lisp pattern). @pure -fun listPrepend(head: X, tail: tuple): tuple +fun listPrepend(head: X, tail: tuple?): tuple asm "CONS"; /// Extracts the head and the tail of lisp-style list. @pure -fun listSplit(list: tuple): (X, tuple) +fun listSplit(list: tuple): (X, tuple?) asm "UNCONS"; /// Extracts the tail and the head of lisp-style list. +/// After extracting the last element, tuple is assigned to null. @pure -fun listNext(mutate self: tuple): X +fun listNext(mutate self: tuple?): X asm( -> 1 0) "UNCONS"; /// Returns the head of lisp-style list. @@ -34,5 +35,5 @@ fun listGetHead(list: tuple): X /// Returns the tail of lisp-style list. @pure -fun listGetTail(list: tuple): tuple +fun listGetTail(list: tuple): tuple? asm "CDR"; diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk index 5c436239..4b9d5c81 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -9,288 +9,289 @@ tolk 0.8 - uDict* - dicts with unsigned integer keys - sDict* - dicts with arbitrary slice keys When accessing a dict element, you should not only provide a key, but provide keyLen, - since for optimization, for optimization, key length is not stored in the dictionary itself. + since for optimization, key length is not stored in the dictionary itself. + Every dictionary object (`self` parameter) can be null. TVM NULL is essentially "empty dictionary". */ /// Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL @pure -fun createEmptyDict(): cell +fun createEmptyDict(): cell? asm "NEWDICT"; /// Checks whether a dictionary is empty. @pure -fun dictIsEmpty(self: cell): bool +fun dictIsEmpty(self: cell?): bool asm "DICTEMPTY"; @pure -fun iDictGet(self: cell, keyLen: int, key: int): (slice, bool) +fun iDictGet(self: cell?, keyLen: int, key: int): (slice?, bool) asm(key self keyLen) "DICTIGET" "NULLSWAPIFNOT"; @pure -fun uDictGet(self: cell, keyLen: int, key: int): (slice, bool) +fun uDictGet(self: cell?, keyLen: int, key: int): (slice?, bool) asm(key self keyLen) "DICTUGET" "NULLSWAPIFNOT"; @pure -fun sDictGet(self: cell, keyLen: int, key: slice): (slice, bool) +fun sDictGet(self: cell?, keyLen: int, key: slice): (slice?, bool) asm(key self keyLen) "DICTGET" "NULLSWAPIFNOT"; @pure -fun iDictSet(mutate self: cell, keyLen: int, key: int, value: slice): void +fun iDictSet(mutate self: cell?, keyLen: int, key: int, value: slice): void asm(value key self keyLen) "DICTISET"; @pure -fun uDictSet(mutate self: cell, keyLen: int, key: int, value: slice): void +fun uDictSet(mutate self: cell?, keyLen: int, key: int, value: slice): void asm(value key self keyLen) "DICTUSET"; @pure -fun sDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): void +fun sDictSet(mutate self: cell?, keyLen: int, key: slice, value: slice): void asm(value key self keyLen) "DICTSET"; @pure -fun iDictSetRef(mutate self: cell, keyLen: int, key: int, value: cell): void +fun iDictSetRef(mutate self: cell?, keyLen: int, key: int, value: cell): void asm(value key self keyLen) "DICTISETREF"; @pure -fun uDictSetRef(mutate self: cell, keyLen: int, key: int, value: cell): void +fun uDictSetRef(mutate self: cell?, keyLen: int, key: int, value: cell): void asm(value key self keyLen) "DICTUSETREF"; @pure -fun sDictSetRef(mutate self: cell, keyLen: int, key: slice, value: cell): void +fun sDictSetRef(mutate self: cell?, keyLen: int, key: slice, value: cell): void asm(value key self keyLen) "DICTSETREF"; @pure -fun iDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): bool +fun iDictSetIfNotExists(mutate self: cell?, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTIADD"; @pure -fun uDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): bool +fun uDictSetIfNotExists(mutate self: cell?, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTUADD"; @pure -fun iDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): bool +fun iDictSetIfExists(mutate self: cell?, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTIREPLACE"; @pure -fun uDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): bool +fun uDictSetIfExists(mutate self: cell?, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTUREPLACE"; @pure -fun iDictGetRef(self: cell, keyLen: int, key: int): (cell, bool) +fun iDictGetRef(self: cell?, keyLen: int, key: int): (cell?, bool) asm(key self keyLen) "DICTIGETREF" "NULLSWAPIFNOT"; @pure -fun uDictGetRef(self: cell, keyLen: int, key: int): (cell, bool) +fun uDictGetRef(self: cell?, keyLen: int, key: int): (cell?, bool) asm(key self keyLen) "DICTUGETREF" "NULLSWAPIFNOT"; @pure -fun sDictGetRef(self: cell, keyLen: int, key: slice): (cell, bool) +fun sDictGetRef(self: cell?, keyLen: int, key: slice): (cell?, bool) asm(key self keyLen) "DICTGETREF" "NULLSWAPIFNOT"; @pure -fun iDictGetRefOrNull(self: cell, keyLen: int, key: int): cell +fun iDictGetRefOrNull(self: cell?, keyLen: int, key: int): cell? asm(key self keyLen) "DICTIGETOPTREF"; @pure -fun uDictGetRefOrNull(self: cell, keyLen: int, key: int): cell +fun uDictGetRefOrNull(self: cell?, keyLen: int, key: int): cell? asm(key self keyLen) "DICTUGETOPTREF"; @pure -fun sDictGetRefOrNull(self: cell, keyLen: int, key: slice): cell +fun sDictGetRefOrNull(self: cell?, keyLen: int, key: slice): cell? asm(key self keyLen) "DICTGETOPTREF"; @pure -fun iDictDelete(mutate self: cell, keyLen: int, key: int): bool +fun iDictDelete(mutate self: cell?, keyLen: int, key: int): bool asm(key self keyLen) "DICTIDEL"; @pure -fun uDictDelete(mutate self: cell, keyLen: int, key: int): bool +fun uDictDelete(mutate self: cell?, keyLen: int, key: int): bool asm(key self keyLen) "DICTUDEL"; @pure -fun sDictDelete(mutate self: cell, keyLen: int, key: slice): bool +fun sDictDelete(mutate self: cell?, keyLen: int, key: slice): bool asm(key self keyLen) "DICTDEL"; @pure -fun iDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, bool) +fun iDictSetAndGet(mutate self: cell?, keyLen: int, key: int, value: slice): (slice?, bool) asm(value key self keyLen) "DICTISETGET" "NULLSWAPIFNOT"; @pure -fun uDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, bool) +fun uDictSetAndGet(mutate self: cell?, keyLen: int, key: int, value: slice): (slice?, bool) asm(value key self keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; @pure -fun sDictSetAndGet(mutate self: cell, keyLen: int, key: slice, value: slice): (slice, bool) +fun sDictSetAndGet(mutate self: cell?, keyLen: int, key: slice, value: slice): (slice?, bool) asm(value key self keyLen) "DICTSETGET" "NULLSWAPIFNOT"; @pure -fun iDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cell): cell +fun iDictSetAndGetRefOrNull(mutate self: cell?, keyLen: int, key: int, value: cell): cell? asm(value key self keyLen) "DICTISETGETOPTREF"; @pure -fun uDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cell): cell +fun uDictSetAndGetRefOrNull(mutate self: cell?, keyLen: int, key: int, value: cell): cell? asm(value key self keyLen) "DICTUSETGETOPTREF"; @pure -fun iDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, bool) +fun iDictDeleteAndGet(mutate self: cell?, keyLen: int, key: int): (slice?, bool) asm(key self keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; @pure -fun uDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, bool) +fun uDictDeleteAndGet(mutate self: cell?, keyLen: int, key: int): (slice?, bool) asm(key self keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; @pure -fun sDictDeleteAndGet(mutate self: cell, keyLen: int, key: slice): (slice, bool) +fun sDictDeleteAndGet(mutate self: cell?, keyLen: int, key: slice): (slice?, bool) asm(key self keyLen) "DICTDELGET" "NULLSWAPIFNOT"; @pure -fun iDictSetBuilder(mutate self: cell, keyLen: int, key: int, value: builder): void +fun iDictSetBuilder(mutate self: cell?, keyLen: int, key: int, value: builder): void asm(value key self keyLen) "DICTISETB"; @pure -fun uDictSetBuilder(mutate self: cell, keyLen: int, key: int, value: builder): void +fun uDictSetBuilder(mutate self: cell?, keyLen: int, key: int, value: builder): void asm(value key self keyLen) "DICTUSETB"; @pure -fun sDictSetBuilder(mutate self: cell, keyLen: int, key: slice, value: builder): void +fun sDictSetBuilder(mutate self: cell?, keyLen: int, key: slice, value: builder): void asm(value key self keyLen) "DICTSETB"; @pure -fun iDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): bool +fun iDictSetBuilderIfNotExists(mutate self: cell?, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTIADDB"; @pure -fun uDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): bool +fun uDictSetBuilderIfNotExists(mutate self: cell?, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTUADDB"; @pure -fun iDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): bool +fun iDictSetBuilderIfExists(mutate self: cell?, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTIREPLACEB"; @pure -fun uDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): bool +fun uDictSetBuilderIfExists(mutate self: cell?, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTUREPLACEB"; @pure -fun iDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, bool) +fun iDictDeleteFirstAndGet(mutate self: cell?, keyLen: int): (int?, slice?, bool) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; @pure -fun uDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, bool) +fun uDictDeleteFirstAndGet(mutate self: cell?, keyLen: int): (int?, slice?, bool) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; @pure -fun sDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (slice, slice, bool) +fun sDictDeleteFirstAndGet(mutate self: cell?, keyLen: int): (slice?, slice?, bool) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; @pure -fun iDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, bool) +fun iDictDeleteLastAndGet(mutate self: cell?, keyLen: int): (int?, slice?, bool) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; @pure -fun uDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, bool) +fun uDictDeleteLastAndGet(mutate self: cell?, keyLen: int): (int?, slice?, bool) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; @pure -fun sDictDeleteLastAndGet(mutate self: cell, keyLen: int): (slice, slice, bool) +fun sDictDeleteLastAndGet(mutate self: cell?, keyLen: int): (slice?, slice?, bool) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirst(self: cell, keyLen: int): (int, slice, bool) +fun iDictGetFirst(self: cell?, keyLen: int): (int?, slice?, bool) asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirst(self: cell, keyLen: int): (int, slice, bool) +fun uDictGetFirst(self: cell?, keyLen: int): (int?, slice?, bool) asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirst(self: cell, keyLen: int): (slice, slice, bool) +fun sDictGetFirst(self: cell?, keyLen: int): (slice?, slice?, bool) asm (-> 1 0 2) "DICTMIN" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, bool) +fun iDictGetFirstAsRef(self: cell?, keyLen: int): (int?, cell?, bool) asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, bool) +fun uDictGetFirstAsRef(self: cell?, keyLen: int): (int?, cell?, bool) asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirstAsRef(self: cell, keyLen: int): (slice, cell, bool) +fun sDictGetFirstAsRef(self: cell?, keyLen: int): (slice?, cell?, bool) asm (-> 1 0 2) "DICTMINREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetLast(self: cell, keyLen: int): (int, slice, bool) +fun iDictGetLast(self: cell?, keyLen: int): (int?, slice?, bool) asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; @pure -fun uDictGetLast(self: cell, keyLen: int): (int, slice, bool) +fun uDictGetLast(self: cell?, keyLen: int): (int?, slice?, bool) asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; @pure -fun sDictGetLast(self: cell, keyLen: int): (slice, slice, bool) +fun sDictGetLast(self: cell?, keyLen: int): (slice?, slice?, bool) asm (-> 1 0 2) "DICTMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetLastAsRef(self: cell, keyLen: int): (int, cell, bool) +fun iDictGetLastAsRef(self: cell?, keyLen: int): (int?, cell?, bool) asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetLastAsRef(self: cell, keyLen: int): (int, cell, bool) +fun uDictGetLastAsRef(self: cell?, keyLen: int): (int?, cell?, bool) asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetLastAsRef(self: cell, keyLen: int): (slice, cell, bool) +fun sDictGetLastAsRef(self: cell?, keyLen: int): (slice?, cell?, bool) asm (-> 1 0 2) "DICTMAXREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun iDictGetNext(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; @pure -fun uDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun uDictGetNext(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; @pure -fun iDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun iDictGetNextOrEqual(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun uDictGetNextOrEqual(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun iDictGetPrev(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun uDictGetPrev(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun iDictGetPrevOrEqual(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun uDictGetPrevOrEqual(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; @@ -299,13 +300,13 @@ fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) */ @pure -fun prefixDictGet(self: cell, keyLen: int, key: slice): (slice, slice, slice, bool) +fun prefixDictGet(self: cell?, keyLen: int, key: slice): (slice, slice?, slice?, bool) asm(key self keyLen) "PFXDICTGETQ" "NULLSWAPIFNOT2"; @pure -fun prefixDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): bool +fun prefixDictSet(mutate self: cell?, keyLen: int, key: slice, value: slice): bool asm(value key self keyLen) "PFXDICTSET"; @pure -fun prefixDictDelete(mutate self: cell, keyLen: int, key: slice): bool +fun prefixDictDelete(mutate self: cell?, keyLen: int, key: slice): bool asm(key self keyLen) "PFXDICTDEL"; diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index 031e29c9..9d24f38d 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -2,7 +2,7 @@ import "@stdlib/tvm-lowlevel" fun pair_first(p: [X, Y]): X asm "FIRST"; -fun one(dummy: tuple) { +fun one(dummy: tuple?) { return 1; } diff --git a/tolk-tester/tests/assignment-tests.tolk b/tolk-tester/tests/assignment-tests.tolk index 34dd3e84..bb647652 100644 --- a/tolk-tester/tests/assignment-tests.tolk +++ b/tolk-tester/tests/assignment-tests.tolk @@ -206,9 +206,9 @@ fun test116() { fun main(value: int) { - var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); + var (x: int?, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); if (x == null && y == null) { return null; } - return x == null || y == null ? -1 : x + y; + return x == null || y == null ? -1 : x! + y!; } /** diff --git a/tolk-tester/tests/c2.tolk b/tolk-tester/tests/c2.tolk index 257aba5b..bcbc6c93 100644 --- a/tolk-tester/tests/c2.tolk +++ b/tolk-tester/tests/c2.tolk @@ -8,7 +8,7 @@ fun unnamed_args(_: int, _: slice, _: int) { return true; } -fun main(x: int, y: int, z: int): bool { +fun main(x: int, y: int, z: int): bool? { op = `_+_`; if (0) { return null; } return check_assoc(x, y, z); diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index 19e2e215..772812eb 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -32,7 +32,8 @@ fun test1(): [int,int,int,int,int] { fun test2(): [int,int,int] { var b: builder = beginCell().myStoreInt(1, 32); b = b.myStoreInt(2, 32); - b.myStoreInt(3, 32); + // operator ! here and below is used just for testing purposes, it doesn't affect the result + b!.myStoreInt(3, 32); var cs: slice = b.endCell().beginParse(); var one: int = cs.myLoadInt(32); @@ -43,14 +44,14 @@ fun test2(): [int,int,int] { @method_id(103) fun test3(ret: int): int { - val same: int = beginCell().storeUint(ret,32).endCell().beginParse().loadUint(32); + val same: int = beginCell()!.storeUint(ret,32).endCell().beginParse().loadUint(32); return same; } @method_id(104) fun test4(): [int,int] { - var b: builder = beginCell().myStoreInt(1, 32); - b = b.storeInt(2, 32).storeInt(3, 32); + var b: builder = (beginCell() as builder).myStoreInt(1, 32); + b = b!.storeInt(2, 32)!.storeInt(3, 32); var cs: slice = b.endCell().beginParse(); var (one, _, three) = (cs.getFirstBits(32).loadUint(32), cs.skipBits(64), cs.load_u32()); @@ -116,7 +117,7 @@ fun test10() { fun test11() { var s: slice = beginCell().storeInt(1, 32).storeInt(2, 32).storeInt(3, 32).storeInt(4, 32).storeInt(5, 32).storeInt(6, 32).storeInt(7, 32).endCell().beginParse(); var size1 = getRemainingBitsCount(s); - s.skipBits(32); + s!.skipBits(32); var s1: slice = s.getFirstBits(64); var n1 = s1.loadInt(32); var size2 = getRemainingBitsCount(s); diff --git a/tolk-tester/tests/dicts-demo.tolk b/tolk-tester/tests/dicts-demo.tolk index 291bd2ea..606318cb 100644 --- a/tolk-tester/tests/dicts-demo.tolk +++ b/tolk-tester/tests/dicts-demo.tolk @@ -1,15 +1,15 @@ import "@stdlib/tvm-dicts" -fun addIntToIDict(mutate self: cell, key: int, number: int): void { +fun addIntToIDict(mutate self: cell?, key: int, number: int): void { return self.iDictSetBuilder(32, key, beginCell().storeInt(number, 32)); } -fun calculateDictLen(d: cell) { +fun calculateDictLen(d: cell?) { var len = 0; var (k, v, f) = d.uDictGetFirst(32); while (f) { len += 1; - (k, v, f) = d.uDictGetNext(32, k); + (k, v, f) = d.uDictGetNext(32, k!); } return len; } @@ -25,13 +25,13 @@ fun loadTwoDigitNumberFromSlice(mutate self: slice): int { fun test101(getK1: int, getK2: int, getK3: int) { var dict = createEmptyDict(); dict.uDictSetBuilder(32, 1, beginCell().storeUint(1, 32)); - var (old1: slice, found1) = dict.uDictSetAndGet(32, getK1, beginCell().storeUint(2, 32).endCell().beginParse()); - var (old2: slice, found2) = dict.uDictSetAndGet(32, getK2, beginCell().storeUint(3, 32).endCell().beginParse()); - var (cur3: slice, found3) = dict.uDictGet(32, getK3); + var (old1: slice?, found1) = dict.uDictSetAndGet(32, getK1, beginCell().storeUint(2, 32).endCell().beginParse()); + var (old2: slice?, found2) = dict.uDictSetAndGet(32, getK2, beginCell().storeUint(3, 32).endCell().beginParse()); + var (cur3: slice?, found3) = dict.uDictGet(32, getK3); return ( - found1 ? old1.loadUint(32) : -1, - found2 ? old2.loadUint(32) : -1, - found3 ? cur3.loadUint(32) : -1 + found1 ? old1!.loadUint(32) : -1, + found2 ? old2!.loadUint(32) : -1, + found3 ? cur3!.loadUint(32) : -1 ); } @@ -47,7 +47,7 @@ fun test102() { while (!shouldBreak) { var (kDel, kVal, wasDel) = dict.iDictDeleteLastAndGet(32); if (wasDel) { - deleted.tuplePush([kDel, kVal.loadInt(32)]); + deleted.tuplePush([kDel, kVal!.loadInt(32)]); } else { shouldBreak = true; } @@ -82,14 +82,14 @@ fun test104() { var (old2, _) = dict.sDictDeleteAndGet(32, "key1"); var (restK, restV, _) = dict.sDictGetFirst(32); var (restK1, restV1, _) = dict.sDictDeleteLastAndGet(32); - assert (restK.isSliceBitsEqual(restK1)) throw 123; - assert (restV.isSliceBitsEqual(restV1)) throw 123; + assert (restK!.isSliceBitsEqual(restK1!)) throw 123; + assert (restV!.isSliceBitsEqual(restV1!)) throw 123; return ( - old1.loadTwoDigitNumberFromSlice(), - old2.loadTwoDigitNumberFromSlice(), - restV.loadTwoDigitNumberFromSlice(), - restK.loadTwoDigitNumberFromSlice(), - restK.loadTwoDigitNumberFromSlice() + old1!.loadTwoDigitNumberFromSlice(), + old2!.loadTwoDigitNumberFromSlice(), + restV!.loadTwoDigitNumberFromSlice(), + restK!.loadTwoDigitNumberFromSlice(), + restK!.loadTwoDigitNumberFromSlice() ); } diff --git a/tolk-tester/tests/generics-1.tolk b/tolk-tester/tests/generics-1.tolk index 453ec282..ca310927 100644 --- a/tolk-tester/tests/generics-1.tolk +++ b/tolk-tester/tests/generics-1.tolk @@ -49,17 +49,17 @@ fun manyEq(a: T1, b: T2, c: T3): [T1, T2, T3] { fun test104(f: int) { var result = ( manyEq(1 ? 1 : 1, f ? 0 : null, !f ? getTwo() as int : null), - manyEq(f ? null as int : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool(), eq4(f)) + manyEq(f ? null as int? : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool(), eq4(f)) ); - __expect_type(result, "([int, int, int], [int, bool, int])"); + __expect_type(result, "([int, int?, int?], [int?, bool, int])"); return result; } -fun calcSum(x: X, y: X) { return x + y; } +fun calcSum(x: X, y: X) { return x! + y!; } @method_id(105) fun test105() { - if (0) { calcSum(((0)), null); } + if (0) { calcSum(((0 as int?)), null); } return (calcSum(1, 2)); } diff --git a/tolk-tester/tests/imports/use-dicts.tolk b/tolk-tester/tests/imports/use-dicts.tolk index c9d5dcfe..2daaf2b1 100644 --- a/tolk-tester/tests/imports/use-dicts.tolk +++ b/tolk-tester/tests/imports/use-dicts.tolk @@ -1,21 +1,21 @@ import "@stdlib/tvm-dicts" -fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { - var dict: cell = createEmptyDict(); +fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell? { + var dict: cell? = createEmptyDict(); dict.iDictSetBuilder(32, 3, beginCell().storeInt(30, 32)); dict.iDictSetBuilder(32, 4, beginCell().storeInt(40, 32)); dict.iDictSetBuilder(32, 5, beginCell().storeInt(valueAt5, 32)); return dict; } -fun lookupIdxByValue(idict32: cell, value: int): int { - var cur_key = -1; +fun lookupIdxByValue(idict32: cell?, value: int): int { + var cur_key: int? = -1; do { - var (cur_key redef, cs: slice, found: bool) = idict32.iDictGetNext(32, cur_key); + var (cur_key redef, cs: slice?, found: bool) = idict32.iDictGetNext(32, cur_key!); // one-line condition (via &) doesn't work, since right side is calculated immediately if (found) { - if (cs.loadInt(32) == value) { - return cur_key; + if (cs!.loadInt(32) == value) { + return cur_key!; } } } while (found); diff --git a/tolk-tester/tests/indexed-access.tolk b/tolk-tester/tests/indexed-access.tolk index ab7995cf..7915536e 100644 --- a/tolk-tester/tests/indexed-access.tolk +++ b/tolk-tester/tests/indexed-access.tolk @@ -86,8 +86,8 @@ fun test104() { } @method_id(105) -fun test105(x: int, y: int): (tuple, int, (int, int), int, int) { - var ab = (createEmptyTuple(), (x, y), tupleSize); +fun test105(x: int, y: int): (tuple, int, (int?, int), int, int) { + var ab = (createEmptyTuple(), (x as int?, y), tupleSize); ab.0.tuplePush(1); tuplePush(mutate ab.0, 2); ab.1.0 = null; @@ -98,7 +98,7 @@ fun test105(x: int, y: int): (tuple, int, (int, int), int, int) { @method_id(106) fun test106(x: int, y: int) { - var ab = [createEmptyTuple(), [x, y], tupleSize]; + var ab = [createEmptyTuple(), [x as int?, y], tupleSize]; ab.0.tuplePush(1); tuplePush(mutate ab.0, 2); ab.1.0 = null; @@ -233,6 +233,25 @@ fun test121(zero: int) { return t; } +fun isFirstComponentGt0(t: (T1, T2)): bool { + return t.0 > 0; +} + +@method_id(122) +fun test122(x: (int, int)) { + return ( + isFirstComponentGt0(x), isFirstComponentGt0((2, beginCell())), isFirstComponentGt0((0, null)), + x.isFirstComponentGt0(), (2, beginCell()).isFirstComponentGt0(), (0, null).isFirstComponentGt0() + ); +} + +@method_id(123) +fun test123() { + var t = [[10, 20]] as [[int,int]]?; + t!.0.0 = t!.0.1 = 100; + return t; +} + fun main(){} @@ -258,6 +277,8 @@ fun main(){} @testcase | 119 | 1 2 3 4 | 4 1 3 @testcase | 120 | | 3 4 [ 5 6 ] @testcase | 121 | 0 | [ 3 ] +@testcase | 122 | 1 2 | -1 -1 0 -1 -1 0 +@testcase | 123 | | [ [ 100 100 ] ] @fif_codegen """ diff --git a/tolk-tester/tests/inference-tests.tolk b/tolk-tester/tests/inference-tests.tolk index 3d451581..96bf8b1a 100644 --- a/tolk-tester/tests/inference-tests.tolk +++ b/tolk-tester/tests/inference-tests.tolk @@ -18,10 +18,12 @@ fun test1(x: int, y: int) { __expect_type(random() ? x : y, "int"); __expect_type(eq(x), "int"); __expect_type(eq(x), "int"); - __expect_type(eq(null), "int"); + __expect_type(eq(null), "int?"); __expect_type(x as int, "int"); __expect_type(+x, "int"); __expect_type(~x, "int"); + __expect_type(x!, "int"); + __expect_type(x!!!, "int"); { var x: slice = beginCell().endCell().beginParse(); __expect_type(x, "slice"); @@ -62,9 +64,9 @@ fun test5(x: int) { __expect_type([], "[]"); __expect_type([x], "[int]"); __expect_type([x, x >= 1], "[int, bool]"); - __expect_type([x, x >= 1, null as slice], "[int, bool, slice]"); + __expect_type([x, x >= 1, null as slice?], "[int, bool, slice?]"); __expect_type((x, [x], [[x], x]), "(int, [int], [[int], int])"); - __expect_type(getMyOriginalBalanceWithExtraCurrencies(), "[int, cell]"); + __expect_type(getMyOriginalBalanceWithExtraCurrencies(), "[int, cell?]"); } fun test6() { diff --git a/tolk-tester/tests/invalid-generics-1.tolk b/tolk-tester/tests/invalid-generics-1.tolk index c8ff7fec..0bbdeee6 100644 --- a/tolk-tester/tests/invalid-generics-1.tolk +++ b/tolk-tester/tests/invalid-generics-1.tolk @@ -6,5 +6,5 @@ fun failCantDeduceWithoutArgument() { /** @compilation_should_fail -@stderr can not deduce X for generic function `f` +@stderr too few arguments in call to `f`, expected 2, have 1 */ diff --git a/tolk-tester/tests/invalid-generics-13.tolk b/tolk-tester/tests/invalid-generics-13.tolk new file mode 100644 index 00000000..7574bde7 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-13.tolk @@ -0,0 +1,11 @@ +fun calcSum(x: X, y: X) { return x + y; } + +fun cantApplyPlusOnNullable() { + return calcSum(((0 as int?)), null); +} + +/** +@compilation_should_fail +@stderr while instantiating generic function `calcSum` +@stderr can not apply operator `+` to `int?` and `int?` + */ diff --git a/tolk-tester/tests/invalid-mutate-18.tolk b/tolk-tester/tests/invalid-mutate-18.tolk new file mode 100644 index 00000000..bb8cde05 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-18.tolk @@ -0,0 +1,10 @@ +fun getNullableTuple(): tuple? { return createEmptyTuple(); } + +fun cantUseLValueUnwrappedNotNull() { + tuplePush(mutate getNullableTuple()!, 1); +} + +/** +@compilation_should_fail +@stderr function call can not be used as lvalue + */ diff --git a/tolk-tester/tests/invalid-mutate-19.tolk b/tolk-tester/tests/invalid-mutate-19.tolk new file mode 100644 index 00000000..bb8cde05 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-19.tolk @@ -0,0 +1,10 @@ +fun getNullableTuple(): tuple? { return createEmptyTuple(); } + +fun cantUseLValueUnwrappedNotNull() { + tuplePush(mutate getNullableTuple()!, 1); +} + +/** +@compilation_should_fail +@stderr function call can not be used as lvalue + */ diff --git a/tolk-tester/tests/invalid-mutate-20.tolk b/tolk-tester/tests/invalid-mutate-20.tolk new file mode 100644 index 00000000..f6eb2f9f --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-20.tolk @@ -0,0 +1,13 @@ +fun acceptMutateNullableTensor(mutate self: (int, int)?) { +} + +fun cantModifyTupleIndexWithTypeTransition() { + var t = [1, null]; + t.1.acceptMutateNullableTensor(); +} + +/** +@compilation_should_fail +@stderr can not call method for mutate `(int, int)?` with object of type `null` +@stderr because mutation is not type compatible + */ diff --git a/tolk-tester/tests/invalid-typing-14.tolk b/tolk-tester/tests/invalid-typing-14.tolk new file mode 100644 index 00000000..657ab5f4 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-14.tolk @@ -0,0 +1,14 @@ + +fun autoGetIntOrNull() { + if (random()) { return 1; } + return null; +} + +fun testAutoInferredIntOrNull() { + var b: builder = autoGetIntOrNull() as builder; +} + +/** +@compilation_should_fail +@stderr type `int?` can not be cast to `builder` + */ diff --git a/tolk-tester/tests/invalid-typing-15.tolk b/tolk-tester/tests/invalid-typing-15.tolk new file mode 100644 index 00000000..fbcff8a2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-15.tolk @@ -0,0 +1,13 @@ + +fun getNullable4(): int? { + return 4; +} + +fun testCantSumNullable() { + return 1 + getNullable4(); +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-16.tolk b/tolk-tester/tests/invalid-typing-16.tolk new file mode 100644 index 00000000..1dca7822 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-16.tolk @@ -0,0 +1,13 @@ +@pure +fun myDictDeleteStrict(mutate self: cell, keyLen: int, key: int): bool + asm(key self keyLen) "DICTIDEL"; + + +fun testCantCallDictMethodsOnNullable(c: cell) { + c.beginParse().loadDict().myDictDeleteStrict(16, 1); +} + +/** +@compilation_should_fail +@stderr can not call method for `cell` with object of type `cell?` + */ diff --git a/tolk-tester/tests/invalid-typing-17.tolk b/tolk-tester/tests/invalid-typing-17.tolk new file mode 100644 index 00000000..b7302684 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-17.tolk @@ -0,0 +1,10 @@ + +fun testCantUseNullableAsCondition(x: int?) { + if (x) { return 1; } + return 0; +} + +/** +@compilation_should_fail +@stderr can not use `int?` as a boolean condition + */ diff --git a/tolk-tester/tests/invalid-typing-18.tolk b/tolk-tester/tests/invalid-typing-18.tolk new file mode 100644 index 00000000..cf985add --- /dev/null +++ b/tolk-tester/tests/invalid-typing-18.tolk @@ -0,0 +1,16 @@ +fun incrementOrSetNull(mutate x: int?) { + if (random()) { x! += 1; } + else { x = null; } +} + +fun cantCallMutateMethodNotNullable() { + var x = 1; + incrementOrSetNull(mutate x); + return x; +} + +/** +@compilation_should_fail +@stderr can not pass `int` to mutate `int?` +@stderr because mutation is not type compatible + */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index 29cd1d10..700f2a3c 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -53,9 +53,8 @@ fun testDict(last: int) { } @method_id(105) -fun testNotNull(x: int) { - // return [x == null, null == x, !(x == null), null == null, +(null != null)]; - return [x == null, null == x, !(x == null)]; +fun testNotNull(x: int?) { + return [x == null, null == x, !(x == null), null == null, (null != null) as int]; } @method_id(106) @@ -170,8 +169,8 @@ fun main() { @testcase | 104 | 50 | 3 5 -1 @testcase | 104 | 100 | 3 5 5 @testcase | 104 | 0 | 3 -1 5 -@testcase | 105 | 0 | [ 0 0 -1 ] -@testcase | 105 | null | [ -1 -1 0 ] +@testcase | 105 | 0 | [ 0 0 -1 -1 0 ] +@testcase | 105 | null | [ -1 -1 0 -1 0 ] @testcase | 106 | | [ 0 0 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ 0 -1 ] @testcase | 107 | | [ -1 -1 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ -1 0 ] @testcase | 108 | 1 2 | -1 diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index eb02b624..3ea0aaa2 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -2,13 +2,13 @@ import "@stdlib/lisp-lists" @method_id(101) fun test1() { - var numbers: tuple = createEmptyList(); + var numbers: tuple? = createEmptyList(); numbers = listPrepend(1, numbers); numbers = listPrepend(2, numbers); numbers = listPrepend(3, numbers); numbers = listPrepend(4, numbers); - var (h: int, numbers redef) = listSplit(numbers); - h += listGetHead(numbers); + var (h: int, numbers redef) = listSplit(numbers!); + h += listGetHead(numbers!); _ = null; (_, _) = (null, null); @@ -22,22 +22,22 @@ fun test1() { } @method_id(102) -fun test2(x: int) { +fun test2(x: int?) { if (null != x) { - var y: int = null; + var y: int? = null; if (y != null) { return 10; } return y; } try { - return x + 10; // will throw, since not a number + return x! + 10; // will throw, since not a number } catch { return -1; } return 100; } -fun myIsNull(x: int): int { - return x == null ? -1 : x; +fun myIsNull(x: int?): int { + return x == null ? -1 : x!; } @method_id(103) @@ -64,21 +64,28 @@ fun test4(): null { @method_id(105) fun test5() { - var n: slice = getUntypedNull(); - return !(null == n) ? n.loadInt(32) : 100; + var n: slice? = getUntypedNull(); + return !(null == n) ? n!.loadInt(32) : 100; } @method_id(107) fun test7() { - var b = beginCell().storeMaybeRef(null); - var s = b.endCell().beginParse(); + var b = beginCell().storeMaybeRef(null) as builder?; + var s = b!.endCell().beginParse(); var c = s.loadMaybeRef(); return (null == c) as int * 10 + (b != null) as int; } +fun test8() { + __expect_type(null, "null"); + __expect_type([[null]], "[[null]]"); + __expect_type(null as tuple?, "tuple?"); + __expect_type(null as [int]?, "[int]?"); + __expect_type(((null)) as (int, int)?, "(int, int)?"); +} + fun main() { - // now, the compiler doesn't optimize this at compile-time, fif codegen contains ifs - var i: int = null; + var i: int? = null; if (i == null) { return 1; } @@ -120,12 +127,12 @@ fun main() { """ main PROC:<{ // - PUSHNULL // i - ISNULL // '2 - IFJMP:<{ // - 1 PUSHINT // '3=1 - }> // - 10 PUSHINT // '4=10 + PUSHNULL // i + ISNULL // '2 + IFJMP:<{ // + 1 PUSHINT // '3=1 + }> // + 10 PUSHINT // '4=10 }> """ @@ -139,8 +146,8 @@ fun main() { 10 MULCONST // b '13 SWAP // '13 b ISNULL // '13 '14 - NOT // '13 '15 - ADD // '16 + NOT // '13 '14 + ADD // '15 }> """ */ diff --git a/tolk-tester/tests/nullable-tensors.tolk b/tolk-tester/tests/nullable-tensors.tolk new file mode 100644 index 00000000..4008482f --- /dev/null +++ b/tolk-tester/tests/nullable-tensors.tolk @@ -0,0 +1,474 @@ +fun getNullableInt(): int? { return 5; } + +fun sumOfNullableTensorComponents(t: (int, int)?): int { + if (t == null) { return 0; } + return t!.0 + t!.1; +} + +fun isTensorNull(t: (int, int)?) { + return t == null; +} + +fun incrementNullableTensorComponents(mutate self: (int, int)?): self { + if (self != null) { + self!.0 += 1; + self!.1 += 1; + } + return self; +} + +fun incrementTensorComponents(mutate self: (int, int)): self { + self.0 += 1; + self.1 += 1; + return self; +} + +fun assignFirstComponent(mutate t: (int, int), first: int) { + t!.0 = first; +} + +fun assignFirstComponentNullable(mutate t: (int, int)?, first: int) { + if (t == null) { + t = (first, 0); + } else { + t!.0 = first; + } +} + +fun getNullableTensor(firstComponent: int?): (int, int)? { + return firstComponent == null ? null : (firstComponent!, 2); +} + +fun sumOfTensor(x: (int, int)) { + return x.0 + x.1; +} + +fun assignNullTo(mutate x: T?) { + x = null; +} + +fun getTensor12() { + return (1,2); +} + +@method_id(101) +fun test101(): (int, int)? { + return (1, 2); +} + +@method_id(102) +fun test102(): ((int, int)?, (int, int)?) { + var t = (1, 2); + return (t, null); +} + +@method_id(103) +fun test103(t: (int, int)) { + var t2: (int, int)? = t; + return (sumOfNullableTensorComponents(t), sumOfNullableTensorComponents(t2), sumOfNullableTensorComponents(null), t2); +} + +@method_id(104) +fun test104() { + var t1_1: (int, int)? = (1, 2); + var t1_2: (int, int)? = t1_1; + var t1_3: (int, int)? = t1_1!; + var t2_1: (int, int)? = null; + var t2_2 = t2_1; + return (t1_3, t2_2); +} + +@method_id(105) +fun test105() { + return (null as (int, slice, cell)?, (1, 2, 3) as (int, int, int)?); +} + +@method_id(106) +fun test106() { + var t: (int?, int?)? = (((((1, 2))) as (int, int))); + return t; +} + +@method_id(107) +fun test107() { + var ab = (1, 2); + var ab2: (int, int)? = ab; + return (isTensorNull(ab), isTensorNull(ab2), isTensorNull(null), ab.isTensorNull(), ab2.isTensorNull(), null.isTensorNull()); +} + +@method_id(108) +fun test108(x1: (int, int)) { + incrementTensorComponents(mutate x1); + x1.incrementTensorComponents(); + var x2: (int, int)? = x1; + x2.incrementNullableTensorComponents().incrementNullableTensorComponents(); + incrementNullableTensorComponents(mutate x2); + var x3: (int, int)? = null; + x3.incrementNullableTensorComponents().incrementNullableTensorComponents(); + incrementNullableTensorComponents(mutate x3); + return (x1, x2, x3); +} + +fun isTensorNullGen(t: (T1, T2)?) { + return t == null; +} + +@method_id(109) +fun test109() { + var x1 = (1, 2); + var x2: (int, int)? = x1; + var x3: (int, int)? = x1.1 > 10 ? (1, 2) : null; + return ( + isTensorNullGen(x1), isTensorNullGen(x2), isTensorNullGen(null), + isTensorNullGen(x1), isTensorNullGen(x3), + x1.isTensorNullGen(), x2.isTensorNullGen(), x3.isTensorNullGen(), null.isTensorNullGen() + ); +} + +global g110_1: (int, int); +global g110_2: (int, int)?; + +@method_id(110) +fun test110() { + g110_1 = getNullableTensor(1)!; + incrementTensorComponents(mutate g110_1); + g110_1.incrementTensorComponents(); + g110_2 = g110_1; + g110_2.incrementNullableTensorComponents().incrementNullableTensorComponents(); + incrementNullableTensorComponents(mutate g110_2); + var tmp = g110_2; + g110_2 = null; + g110_2.incrementNullableTensorComponents(); + incrementNullableTensorComponents(mutate g110_2); + return (g110_1, g110_2, tmp); +} + +@method_id(111) +fun test111() { + var x = (1, 2); + assignFirstComponent(mutate x, 50); + var x2: (int, int)? = null; + var x3 = x2; + assignFirstComponentNullable(mutate x2, 30); + assignFirstComponentNullable(mutate x3, 70); + g110_1 = (1, 2); + g110_2 = null; + assignFirstComponent(mutate g110_1, 90); + assignFirstComponentNullable(mutate g110_2, 100); + return (x.0, x2!.0, x3!.0, g110_1.0, g110_2!.0); +} + +@method_id(112) +fun test112() { + var x: (int, int)? = (10, 20); + incrementTensorComponents(mutate x!); + x!.incrementTensorComponents(); + return x; +} + +@method_id(113) +fun test113() { + var t = [1, null]; // t.1 is always null + return isTensorNull(t.1); +} + +@method_id(114) +fun test114(): ((slice, (cell, [int, slice, tuple]))?, slice?, (int?, bool?)?) { + var t = [[null]]; + return (t.0.0, t.0.0, t.0.0); +} + +@method_id(115) +fun test115() { + var tt = getNullableTensor(null); + assignFirstComponentNullable(mutate tt, 5); + return ( + getNullableTensor(1)!.incrementTensorComponents(), + sumOfNullableTensorComponents(getNullableTensor(1).incrementNullableTensorComponents().incrementNullableTensorComponents()), + getNullableTensor(null).incrementNullableTensorComponents(), + tt, + sumOfNullableTensorComponents(getNullableTensor(null)) + ); +} + +@method_id(116) +fun test116(returnNull: bool) { + var t1: (int, int)? = returnNull ? null : getTensor12(); + var t2 = returnNull ? null as (int, int)? : getTensor12() as (int, int)?; + returnNull ? null : (1, 2); + return (t1, t2); +} + +@method_id(117) +fun test117() { + var (a, b: (int, int)?, c) = (1, null, 3); + return (b, a, c); +} + +fun autoInferNullableTensor(a: int?, b: int) { + if (a != null) { + return (a!, b); + } + return null; +} + +@method_id(118) +fun test118(a: int?) { + return autoInferNullableTensor(a, 10); +} + +@method_id(119) +fun test119() { + var x: (int, int)? = (1, 2); + x = null; + var tt: (int, (int, int)?) = (0, (1, 2)); + tt.1 = null; + var third: (int, (int, int)?, int) = (0, (1, 2), 3); + third.2 = 100; + return (x, tt.1, third.1, third.2); +} + +@method_id(120) +fun test120(setNull: bool) { + var x: (int, int)? = (1, 2); + if (setNull) { + assignNullTo(mutate x); + } + return x; +} + +@method_id(121) +fun test121() { + var t: [int?, [int?, int?]?] = [1, [2, 3]]; + t.1 = [3, 4]; + return t; +} + +@method_id(122) +fun test122(setNull: bool) { + var t: [int?, [int?, int?]?, int?, [int?, int?]?]? = [1, [2, 3], 4, null]; + if (setNull) { + assignNullTo(mutate t!.1); + } else { + var rhs = [3, 4]; + t!!.1 = rhs; + } + return t; +} + +@method_id(123) +fun test123() { + var t: (int?, (int?, int?)?) = (1, (2, 3)); + t.1 = (3, 4); + return t; +} + +@method_id(124) +fun test124(setNull: bool) { + var t: (int?, (int?, int?)?, int?, (int?, int?)?)? = (1, (2, 3), 4, null); + if (setNull) { + assignNullTo(mutate t!.1); + } else { + var rhs = (3, 4); + t!!.1 = rhs; + } + return t; +} + +global g125: int; +fun getT125(): (int, (int, int)?, (int?, int)?) { return (g125 += 1, null, null); } + +@method_id(125) +fun test125() { + g125 = 0; + getT125().1 = null; + getT125().2 = (1, 2); + (getT125()!! as (int, (int, int)?, (int?, int)?)).2 = null; + // test that nothing left on a stack + return g125; +} + +@method_id(126) +fun test126() { + var tt1: (int, null, int) = (1, null, 2); + var (a: int, b: (int, int)?, c: int) = tt1; + return (a, b, c); +} + +@method_id(127) +fun test127(choice: int) { + var tt1: (int, null, int) = (1, null, 2); + var tt2: (int, (int, int), int) = (1, (2, 3), 4); + var tt3: (int, (int, int)?, int) = (1, null, 5); + var abc: (int, (int, int)?, int) = choice == 1 ? tt1 : choice == 2 ? tt2 : tt3; + return abc; +} + +fun get128_1() { return (1, null, 2); } +fun get128_2() { return null; } +fun get128_3() { return (1, (2, 3), 4); } +fun takeT128(abc: (int, (int, int)?, int)?) { return abc; } + +@method_id(128) +fun test128(choice: int) { + if (choice == 1) { + return takeT128(get128_1())!; + } + if (choice == 2) { + return takeT128(get128_2()); + } + return takeT128(get128_3()); +} + +@method_id(129) +fun test129(setNull: bool) { + var t: (int?, int?) = (getNullableInt(), getNullableInt()); + var r1 = (t, t == null, t != null); + t = (setNull ? null : 1, setNull ? null : 2); + var r2 = (t, t == null, t != null); + return (r1, r2); +} + +@method_id(130) +fun test130(setNull: bool) { + var os: (int, (int, int)?) = (1, setNull ? null : (2, 3)); + return os; +} + +fun getEmptyNullableTensor(getNull: bool): ()? { + return getNull ? null : (); +} + +@method_id(131) +fun test131() { + var nonNullEmptyT = getEmptyNullableTensor(false); + var nullEmptyT = getEmptyNullableTensor(true); + var emptyT = nonNullEmptyT!; + __expect_type(emptyT, "()"); + var doubleNulls1 = (null, null) as (()?, ()?); + var doubleNulls2 = ((), ()) as (()?, ()?); + var doubleNulls3 = ((), ()) as (()?, ()?)?; + var stillEmpty = ((), ()); + return (nonNullEmptyT, 777, nullEmptyT, 777, emptyT, 777, nullEmptyT!, 777, doubleNulls1, doubleNulls2, 777, doubleNulls3, 777, stillEmpty); +} + +@method_id(132) +fun test132() { + var doubleNulls: (()?, ()?) = (getEmptyNullableTensor(true), getEmptyNullableTensor(false)); + var result = ((null as ()?) == null, (() as ()?) == null, doubleNulls.0 == null, doubleNulls.1 == null); + var aln1: int? = (doubleNulls.1 = null); + var aln2: null = (doubleNulls.1 = null); + return (result, 777, aln1, aln2, doubleNulls.1 == null, doubleNulls); +} + + +fun getNormalNullableTensorWidth1(vLess100: int?): ([int?], ())? { + if (vLess100 != null && vLess100! >= 100) { + return null; + } + return ([vLess100], ()); // such a nullable tensor can store NULL in the same slot +} + +fun getTrickyNullableTensorWidth1(vLess100: int?): (int?, ())? { + if (vLess100 != null && vLess100! >= 100) { + return null; + } + return (vLess100, ()); // such a nullable tensor requires an extra stack slot for null presence +} + +fun getEvenTrickierNullableWidth1(vLess100: int?): ((), (int?, ()), ())? { + if (vLess100 != null && vLess100! >= 100) { + return null; + } + return ((), (vLess100, ()), ()); +} + +@method_id(135) +fun test135() { + var n1 = getNormalNullableTensorWidth1(10); // ([10], ()) + var n2 = getNormalNullableTensorWidth1(null); // ([null], ()) + var n3 = getNormalNullableTensorWidth1(100); // null + var t1 = getTrickyNullableTensorWidth1(10); // (10, ()) + var t2 = getTrickyNullableTensorWidth1(null); // (null, ()) + var t3 = getTrickyNullableTensorWidth1(100); // null + var e1 = getEvenTrickierNullableWidth1(10); // ((), (10, ()), ()) + var e2 = getEvenTrickierNullableWidth1(null); // ((), (null, (), ()) + var e3 = getEvenTrickierNullableWidth1(100); // null + return (n1, n2, n3, 777, t1, t2, t3, 777, e1, e2, e3, 777, + n1 == null, n2 == null, n3 == null, t1 == null, t2 == null, t3 == null, e1 == null, e2 == null, e3 == null, 777, + t1!.0 == null, t2!.0 == null, e1!.1.0 == null, e1!.1.1 == null, e2!.1.0 == null, e2!.1.1 == null); +} + + + +fun main(){} + +/** +@testcase | 101 | | 1 2 -1 +@testcase | 102 | | 1 2 -1 (null) (null) 0 +@testcase | 103 | 1 2 | 3 3 0 1 2 -1 +@testcase | 104 | | 1 2 -1 (null) (null) 0 +@testcase | 105 | | (null) (null) (null) 0 1 2 3 -1 +@testcase | 106 | | 1 2 -1 +@testcase | 107 | | 0 0 -1 0 0 -1 +@testcase | 108 | 5 6 | 7 8 10 11 -1 (null) (null) 0 +@testcase | 109 | | 0 0 -1 0 -1 0 0 -1 -1 +@testcase | 110 | | 3 4 (null) (null) 0 6 7 -1 +@testcase | 111 | | 50 30 70 90 100 +@testcase | 112 | | 12 22 -1 +@testcase | 113 | | -1 +@testcase | 114 | | (null) (null) (null) 0 (null) (null) (null) 0 +@testcase | 115 | | 2 3 7 (null) (null) 0 5 0 -1 0 +@testcase | 116 | -1 | (null) (null) 0 (null) (null) 0 +@testcase | 116 | 0 | 1 2 -1 1 2 -1 +@testcase | 117 | | (null) (null) 0 1 3 +@testcase | 118 | 5 | 5 10 -1 +@testcase | 118 | null | (null) (null) 0 +@testcase | 119 | | (null) (null) 0 (null) (null) 0 1 2 -1 100 +@testcase | 120 | -1 | (null) (null) 0 +@testcase | 120 | 0 | 1 2 -1 +@testcase | 121 | | [ 1 [ 3 4 ] ] +@testcase | 122 | 0 | [ 1 [ 3 4 ] 4 (null) ] +@testcase | 122 | -1 | [ 1 (null) 4 (null) ] +@testcase | 123 | | 1 3 4 -1 +@testcase | 124 | 0 | 1 3 4 -1 4 (null) (null) 0 -1 +@testcase | 124 | -1 | 1 (null) (null) 0 4 (null) (null) 0 -1 +@testcase | 125 | | 3 +@testcase | 126 | | 1 (null) (null) 0 2 +@testcase | 127 | 1 | 1 (null) (null) 0 2 +@testcase | 127 | 2 | 1 2 3 -1 4 +@testcase | 127 | 3 | 1 (null) (null) 0 5 +@testcase | 128 | 1 | 1 (null) (null) 0 2 -1 +@testcase | 128 | 2 | (null) (null) (null) (null) (null) 0 +@testcase | 128 | 3 | 1 2 3 -1 4 -1 +@testcase | 129 | 0 | 5 5 0 -1 1 2 0 -1 +@testcase | 129 | -1 | 5 5 0 -1 (null) (null) 0 -1 +@testcase | 130 | 0 | 1 2 3 -1 +@testcase | 130 | -1 | 1 (null) (null) 0 +@testcase | 131 | | -1 777 0 777 777 777 0 0 -1 -1 777 -1 -1 -1 777 +@testcase | 132 | | -1 0 -1 0 777 (null) (null) -1 0 0 +@testcase | 135 | | [ 10 ] [ (null) ] (null) 777 10 -1 (null) -1 (null) 0 777 10 -1 (null) -1 (null) 0 777 0 0 -1 0 0 -1 0 0 -1 777 0 -1 0 0 -1 0 + +@fif_codegen +""" + isTensorNull PROC:<{ + // t.0 t.1 t.NNFlag + 2 1 BLKDROP2 // t.NNFlag + 0 EQINT // '3 + }> +""" + +@fif_codegen +""" + test113 PROC:<{ + // + 1 PUSHINT // '2=1 + PUSHNULL // '2=1 '3 + PAIR // t + 1 INDEX // '5 + PUSHNULL // '5 '6 + 0 PUSHINT // '5 '6 '7=0 + isTensorNull CALLDICT // '8 + }> +""" +*/ diff --git a/tolk-tester/tests/nullable-types.tolk b/tolk-tester/tests/nullable-types.tolk new file mode 100644 index 00000000..ebabb80d --- /dev/null +++ b/tolk-tester/tests/nullable-types.tolk @@ -0,0 +1,109 @@ + +fun getNullable4(): int? { return 4; } +fun getNullableIntNull(): int? asm "PUSHNULL"; + +fun eqInt(x: int) { return x; } +fun eq(x: T) { return x; } + +fun unwrap(x: T?): T { return x!; } +fun intOr0(x: int?): int { return null == x ? 0 : x!; } + +@method_id(101) +fun test101(x: int) { + var re = x == 0 ? null : 100; + return re == null ? re : 200 + getNullable4()!; +} + +@method_id(102) +fun test102(a: int) { + try { + throw (123, a > 10 ? null : a); + return 0; + } catch (excno, arg) { + var i = arg as int?; + return excno + (i != null ? i!!!!! : -100); + } +} + +@method_id(103) +fun test103(x: int?): (bool, bool, int) { + var x_gt_0 = x != null && eqInt(x!) > 0; + var x_lt_0 = x != null && eq(x)! < 0; + if (x == null) { + return (x_gt_0, x_lt_0, 0); + } + return (x_gt_0, x_lt_0, x!); +} + +@method_id(104) +fun test104(x: int?) { + var x2 = eq(x = 10); + var ab = (x2, getNullableIntNull()); + return (unwrap(ab.0) + (ab.1 == null ? -100 : ab.1!), ab.1); +} + +@method_id(105) +fun test105() { + var xy: (int?, int?) = (5, null); + var ab = [1 ? [xy.0, xy.1] : null]; + ab.0!.0 = intOr0(ab.0!.0); + ab.0!.1 = intOr0(ab.0!.1); + return ab.0!.0! + ab.0!.1!; +} + +global gTup106: tuple?; +global gInt106: int?; + +@method_id(106) +fun test106() { + gInt106 = 0; + gInt106! += 5; + var int106: int? = 0; + var gTup106 = createEmptyTuple(); + gTup106!.tuplePush(createEmptyTuple()); + (gTup106!.0 as tuple?)!.tuplePush(0 as int?); + tuplePush(mutate gTup106!, gInt106); + tuplePush(mutate gTup106!.0, int106! += 1); + return (gTup106 == null, null != gTup106, gTup106, gTup106!.0 as tuple?); +} + +@method_id(107) +fun test107() { + var b: builder? = beginCell(); + b!.storeInt(1, 32).storeInt(2, 32); + b = b!.storeInt(3, 32); + storeInt(mutate b!, 4, 32); + (b! as builder).storeInt(5, 32); + return b!.getBuilderBitsCount(); +} + +@method_id(108) +fun test108() { + var (a, b: cell?, c) = (1, beginCell().endCell(), 3); + b = null; + return a + (b == null ? 0 : b!.beginParse().loadInt(32)) + c; +} + +@method_id(109) +fun test109() { + var a = getNullable4(); + var b = getNullable4(); + return ([a, b] = [3, 4], a, b); +} + +fun main(x: int?, y: int?) { +} + +/** +@testcase | 101 | 0 | (null) +@testcase | 101 | -1 | 204 +@testcase | 102 | 5 | 128 +@testcase | 102 | 15 | 23 +@testcase | 103 | 10 | -1 0 10 +@testcase | 104 | 8 | -90 (null) +@testcase | 105 | | 5 +@testcase | 106 | | 0 -1 [ [ 0 1 ] 5 ] [ 0 1 ] +@testcase | 107 | | 160 +@testcase | 108 | | 4 +@testcase | 109 | | [ 3 4 ] 3 4 + */ diff --git a/tolk-tester/tests/use-before-declare.tolk b/tolk-tester/tests/use-before-declare.tolk index d3e6b165..2a0e0e7f 100644 --- a/tolk-tester/tests/use-before-declare.tolk +++ b/tolk-tester/tests/use-before-declare.tolk @@ -27,8 +27,8 @@ fun test1(): int { var demo_var: int = demo_10; var demo_slice: int = demo_20; if (demo_var > 0) { - var demo_var: tuple = null; - var demo_slice: tuple = null; + var demo_var: tuple? = null; + var demo_slice: tuple? = null; } return demo_var + demo_slice; } diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk index 16863560..d189430f 100644 --- a/tolk-tester/tests/var-apply.tolk +++ b/tolk-tester/tests/var-apply.tolk @@ -138,6 +138,43 @@ fun testIndexedAccessApply() { return functions2.0(functions1.1(b)).loadInt(32); } +fun getNullable4(): int? { return 4; } +fun myBeginCell(): builder? asm "NEWC"; + +@method_id(108) +fun testCallingNotNull() { + var n4: () -> int? = getNullable4; + var creator: (() -> builder?)? = myBeginCell; + var end2: [int, (builder -> cell)?] = [0, endCell]; + var c: cell = end2.1!((creator!()!)!.storeInt(getNullable4()!, 32)); + return c.beginParse().loadInt(32); +} + +fun sumOfTensorIfNotNull(t: (int, int)?) { + if (t == null) { return 0; } + return t!.0 + t!.1; +} + +@method_id(109) +fun testTypeTransitionOfVarCall() { + var summer = sumOfTensorIfNotNull; + var hh1 = [1, null]; + var tt1 = (3, 4); + return (summer(null), summer((1,2)), summer(hh1.1), summer(tt1)); +} + +fun makeTensor(x1: int, x2: int, x3: int, x4: int, x5: int) { + return (x1, x2, x3, x4, x5); +} + +fun eq(x: T): T { return x; } + +@method_id(110) +fun testVarsModificationInsideVarCall(x: int) { + var cb = makeTensor; + return x > 3 ? cb(x, x += 5, eq(x *= x), x, eq(x)) : null; +} + fun main() {} /** @@ -148,4 +185,8 @@ fun main() {} @testcase | 105 | | 1 @testcase | 106 | | 1 1 [ 2 ] [ 2 ] @testcase | 107 | | 65537 +@testcase | 108 | | 4 +@testcase | 109 | | 0 3 0 7 +@testcase | 110 | 5 | 5 10 100 100 100 -1 +@testcase | 110 | 0 | (null) (null) (null) (null) (null) 0 */ diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index b465b72b..72da0ac8 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -402,7 +402,7 @@ void CodeBlob::print(std::ostream& os, int flags) const { std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, std::string name) { std::vector ir_idx; - int stack_w = var_type->calc_width_on_stack(); + int stack_w = var_type->get_width_on_stack(); ir_idx.reserve(stack_w); if (const TypeDataTensor* t_tensor = var_type->try_as()) { for (int i = 0; i < t_tensor->size(); ++i) { @@ -410,6 +410,10 @@ std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, s std::vector nested = create_var(t_tensor->items[i], loc, std::move(sub_name)); ir_idx.insert(ir_idx.end(), nested.begin(), nested.end()); } + } else if (const TypeDataNullable* t_nullable = var_type->try_as(); t_nullable && stack_w != 1) { + std::string null_flag_name = name.empty() ? name : name + ".NNFlag"; + ir_idx = create_var(t_nullable->inner, loc, std::move(name)); + ir_idx.emplace_back(create_var(TypeDataBool::create(), loc, std::move(null_flag_name))[0]); } else if (var_type != TypeDataVoid::create()) { #ifdef TOLK_DEBUG tolk_assert(stack_w == 1); diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index f5855bc1..fcaa1157 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -111,23 +111,16 @@ static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bits } } -// replace (a == null) and similar to isNull(a) (call of a built-in function) -static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) { +// replace (a == null) and similar to ast_is_null_check(a) (special AST vertex) +static AnyExprV maybe_replace_eq_null_with_isNull_check(V v) { bool has_null = v->get_lhs()->type == ast_null_keyword || v->get_rhs()->type == ast_null_keyword; bool replace = has_null && (v->tok == tok_eq || v->tok == tok_neq); if (!replace) { return v; } - auto v_ident = createV(v->loc, "__isNull"); // built-in function - auto v_ref = createV(v->loc, v_ident, nullptr); - AnyExprV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); - AnyExprV v_arg = createV(v->loc, v_null, false); - AnyExprV v_isNull = createV(v->loc, v_ref, createV(v->loc, {v_arg})); - if (v->tok == tok_neq) { - v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); - } - return v_isNull; + AnyExprV v_nullable = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); + return createV(v->loc, v_nullable, v->tok == tok_neq); } @@ -372,16 +365,31 @@ static AnyExprV parse_expr100(Lexer& lex) { } } -// parse E(...) (left-to-right) +// parse E(...) and E! having parsed E already (left-to-right) +static AnyExprV parse_fun_call_postfix(Lexer& lex, AnyExprV lhs) { + while (true) { + if (lex.tok() == tok_oppar) { + lhs = createV(lhs->loc, lhs, parse_argument_list(lex)); + } else if (lex.tok() == tok_logical_not) { + lex.next(); + lhs = createV(lhs->loc, lhs); + } else { + break; + } + } + return lhs; +} + +// parse E(...) and E! (left-to-right) static AnyExprV parse_expr90(Lexer& lex) { AnyExprV res = parse_expr100(lex); - while (lex.tok() == tok_oppar) { - res = createV(res->loc, res, parse_argument_list(lex)); + if (lex.tok() == tok_oppar || lex.tok() == tok_logical_not) { + res = parse_fun_call_postfix(lex, res); } return res; } -// parse E.field and E.method(...) (left-to-right) +// parse E.field and E.method(...) and E.field! (left-to-right) static AnyExprV parse_expr80(Lexer& lex) { AnyExprV lhs = parse_expr90(lex); while (lex.tok() == tok_dot) { @@ -402,8 +410,8 @@ static AnyExprV parse_expr80(Lexer& lex) { lex.unexpected("method name"); } lhs = createV(loc, lhs, v_ident, v_instantiationTs); - while (lex.tok() == tok_oppar) { - lhs = createV(lex.cur_location(), lhs, parse_argument_list(lex)); + if (lex.tok() == tok_oppar || lex.tok() == tok_logical_not) { + lhs = parse_fun_call_postfix(lex, lhs); } } return lhs; @@ -491,7 +499,7 @@ static AnyExprV parse_expr15(Lexer& lex) { AnyExprV rhs = parse_expr17(lex); lhs = createV(loc, operator_name, t, lhs, rhs); if (t == tok_eq || t == tok_neq) { - lhs = maybe_replace_eq_null_with_isNull_call(lhs->as()); + lhs = maybe_replace_eq_null_with_isNull_check(lhs->as()); } } return lhs; diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h index c8350747..5103cc92 100644 --- a/tolk/ast-replacer.h +++ b/tolk/ast-replacer.h @@ -108,6 +108,8 @@ protected: virtual AnyExprV replace(V v) { return replace_children(v); } virtual AnyExprV replace(V v) { return replace_children(v); } virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } // statements virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } @@ -144,6 +146,8 @@ protected: case ast_binary_operator: return replace(v->as()); case ast_ternary_operator: return replace(v->as()); case ast_cast_as_operator: return replace(v->as()); + case ast_not_null_operator: return replace(v->as()); + case ast_is_null_check: return replace(v->as()); default: throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::replace"); } @@ -174,20 +178,20 @@ protected: } public: - virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + virtual bool should_visit_function(FunctionPtr fun_ref) = 0; - void start_replacing_in_function(const FunctionData* fun_ref, V v_function) { + void start_replacing_in_function(FunctionPtr fun_ref, V v_function) { replace(v_function->get_body()); } }; -const std::vector& get_all_not_builtin_functions(); +const std::vector& get_all_not_builtin_functions(); template void replace_ast_of_all_functions() { BodyReplacerT visitor; - for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + for (FunctionPtr fun_ref : get_all_not_builtin_functions()) { if (visitor.should_visit_function(fun_ref)) { visitor.start_replacing_in_function(fun_ref, fun_ref->ast_root->as()); } diff --git a/tolk/ast-replicator.h b/tolk/ast-replicator.h index 02198adb..16bbbeb8 100644 --- a/tolk/ast-replicator.h +++ b/tolk/ast-replicator.h @@ -121,6 +121,12 @@ protected: virtual V clone(V v) { return createV(v->loc, clone(v->get_expr()), clone(v->cast_to_type)); } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr()), v->is_negated); + } // statements @@ -200,6 +206,8 @@ protected: case ast_binary_operator: return clone(v->as()); case ast_ternary_operator: return clone(v->as()); case ast_cast_as_operator: return clone(v->as()); + case ast_not_null_operator: return clone(v->as()); + case ast_is_null_check: return clone(v->as()); default: throw UnexpectedASTNodeType(v, "ASTReplicatorFunction::clone"); } diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index 1211d63f..a7f260de 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -56,6 +56,8 @@ class ASTStringifier final : public ASTVisitor { {ast_binary_operator, "ast_binary_operator"}, {ast_ternary_operator, "ast_ternary_operator"}, {ast_cast_as_operator, "ast_cast_as_operator"}, + {ast_not_null_operator, "ast_not_null_operator"}, + {ast_is_null_check, "ast_is_null_check"}, // statements {ast_empty_statement, "ast_empty_statement"}, {ast_sequence, "ast_sequence"}, @@ -268,6 +270,8 @@ public: case ast_binary_operator: return handle_vertex(v->as()); case ast_ternary_operator: return handle_vertex(v->as()); case ast_cast_as_operator: return handle_vertex(v->as()); + case ast_not_null_operator: return handle_vertex(v->as()); + case ast_is_null_check: return handle_vertex(v->as()); // statements case ast_empty_statement: return handle_vertex(v->as()); case ast_sequence: return handle_vertex(v->as()); diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index a54cb13b..d697aa82 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -109,6 +109,8 @@ protected: virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } // statements virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -146,6 +148,8 @@ protected: case ast_binary_operator: return visit(v->as()); case ast_ternary_operator: return visit(v->as()); case ast_cast_as_operator: return visit(v->as()); + case ast_not_null_operator: return visit(v->as()); + case ast_is_null_check: return visit(v->as()); // statements case ast_empty_statement: return visit(v->as()); case ast_sequence: return visit(v->as()); @@ -167,20 +171,20 @@ protected: } public: - virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + virtual bool should_visit_function(FunctionPtr fun_ref) = 0; - virtual void start_visiting_function(const FunctionData* fun_ref, V v_function) { + virtual void start_visiting_function(FunctionPtr fun_ref, V v_function) { visit(v_function->get_body()); } }; -const std::vector& get_all_not_builtin_functions(); +const std::vector& get_all_not_builtin_functions(); template void visit_ast_of_all_functions() { BodyVisitorT visitor; - for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + for (FunctionPtr fun_ref : get_all_not_builtin_functions()) { if (visitor.should_visit_function(fun_ref)) { visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); } diff --git a/tolk/ast.cpp b/tolk/ast.cpp index 092260ff..8f1aa98f 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -121,7 +121,7 @@ void Vertex::assign_sym(const Symbol* sym) { this->sym = sym; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_maybe = fun_ref; } @@ -129,7 +129,7 @@ void Vertex::assign_resolved_type(TypePtr cast_to_type) { this->cast_to_type = cast_to_type; } -void Vertex::assign_var_ref(const GlobalVarData* var_ref) { +void Vertex::assign_var_ref(GlobalVarPtr var_ref) { this->var_ref = var_ref; } @@ -137,7 +137,7 @@ void Vertex::assign_resolved_type(TypePtr declared_t this->declared_type = declared_type; } -void Vertex::assign_const_ref(const GlobalConstData* const_ref) { +void Vertex::assign_const_ref(GlobalConstPtr const_ref) { this->const_ref = const_ref; } @@ -149,7 +149,7 @@ void Vertex::assign_resolved_type(TypePtr substituted_t this->substituted_type = substituted_type; } -void Vertex::assign_param_ref(const LocalVarData* param_ref) { +void Vertex::assign_param_ref(LocalVarPtr param_ref) { this->param_ref = param_ref; } @@ -157,23 +157,27 @@ void Vertex::assign_resolved_type(TypePtr declared_type) { this->declared_type = declared_type; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_ref = fun_ref; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_ref = fun_ref; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_ref = fun_ref; } +void Vertex::assign_is_negated(bool is_negated) { + this->is_negated = is_negated; +} + void Vertex::assign_target(const DotTarget& target) { this->target = target; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_ref = fun_ref; } @@ -181,7 +185,7 @@ void Vertex::assign_resolved_type(TypePtr declared_ret this->declared_return_type = declared_return_type; } -void Vertex::assign_var_ref(const LocalVarData* var_ref) { +void Vertex::assign_var_ref(LocalVarPtr var_ref) { this->var_ref = var_ref; } diff --git a/tolk/ast.h b/tolk/ast.h index d2db49f8..cd410187 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -88,6 +88,8 @@ enum ASTNodeType { ast_binary_operator, ast_ternary_operator, ast_cast_as_operator, + ast_not_null_operator, + ast_is_null_check, // statements ast_empty_statement, ast_sequence, @@ -408,7 +410,7 @@ private: V identifier; public: - const LocalVarData* var_ref = nullptr; // filled on resolve identifiers; for `redef` points to declared above; for underscore, name is empty + LocalVarPtr var_ref = nullptr; // filled on resolve identifiers; for `redef` points to declared above; for underscore, name is empty TypePtr declared_type; // not null for `var x: int = rhs`, otherwise nullptr bool is_immutable; // declared via 'val', not 'var' bool marked_as_redef; // var (existing_var redef, new_var: int) = ... @@ -417,7 +419,7 @@ public: std::string_view get_name() const { return identifier->name; } // empty for underscore Vertex* mutate() const { return const_cast(this); } - void assign_var_ref(const LocalVarData* var_ref); + void assign_var_ref(LocalVarPtr var_ref); void assign_resolved_type(TypePtr declared_type); Vertex(SrcLocation loc, V identifier, TypePtr declared_type, bool is_immutable, bool marked_as_redef) @@ -530,12 +532,12 @@ private: public: typedef std::variant< - const FunctionData*, // for `t.tupleAt` target is `tupleAt` global function + FunctionPtr, // for `t.tupleAt` target is `tupleAt` global function int // for `t.0` target is "indexed access" 0 > DotTarget; DotTarget target = static_cast(nullptr); // filled at type inferring - bool is_target_fun_ref() const { return std::holds_alternative(target); } + bool is_target_fun_ref() const { return std::holds_alternative(target); } bool is_target_indexed_access() const { return std::holds_alternative(target); } AnyExprV get_obj() const { return child; } @@ -560,7 +562,7 @@ template<> // example: `getF()()` then callee is another func call (which type is TypeDataFunCallable) // example: `obj.method()` then callee is dot access (resolved while type inferring) struct Vertex final : ASTExprBinary { - const FunctionData* fun_maybe = nullptr; // filled while type inferring for `globalF()` / `obj.f()`; remains nullptr for `local_var()` / `getF()()` + FunctionPtr fun_maybe = nullptr; // filled while type inferring for `globalF()` / `obj.f()`; remains nullptr for `local_var()` / `getF()()` AnyExprV get_callee() const { return lhs; } bool is_dot_call() const { return lhs->type == ast_dot_access; } @@ -570,7 +572,7 @@ struct Vertex final : ASTExprBinary { auto get_arg(int i) const { return rhs->as()->get_arg(i); } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); Vertex(SrcLocation loc, AnyExprV lhs_f, V arguments) : ASTExprBinary(ast_function_call, loc, lhs_f, arguments) {} @@ -603,7 +605,7 @@ template<> // ast_set_assign represents assignment-and-set operation "lhs = rhs" // examples: `a += 4` / `b <<= c` struct Vertex final : ASTExprBinary { - const FunctionData* fun_ref = nullptr; // filled at type inferring, points to `_+_` built-in for += + FunctionPtr fun_ref = nullptr; // filled at type inferring, points to `_+_` built-in for += std::string_view operator_name; // without equal sign, "+" for operator += TokenType tok; // tok_set_* @@ -611,7 +613,7 @@ struct Vertex final : ASTExprBinary { AnyExprV get_rhs() const { return rhs; } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) : ASTExprBinary(ast_set_assign, loc, lhs, rhs) @@ -622,14 +624,14 @@ template<> // ast_unary_operator is "some operator over one expression" // examples: `-1` / `~found` struct Vertex final : ASTExprUnary { - const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function + FunctionPtr fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; AnyExprV get_rhs() const { return child; } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV rhs) : ASTExprUnary(ast_unary_operator, loc, rhs) @@ -641,7 +643,7 @@ template<> // examples: `a + b` / `x & true` / `(a, b) << g()` // note, that `a = b` is NOT a binary operator, it's ast_assign, also `a += b`, it's ast_set_assign struct Vertex final : ASTExprBinary { - const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function + FunctionPtr fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; @@ -649,7 +651,7 @@ struct Vertex final : ASTExprBinary { AnyExprV get_rhs() const { return rhs; } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) : ASTExprBinary(ast_binary_operator, loc, lhs, rhs) @@ -684,6 +686,32 @@ struct Vertex final : ASTExprUnary { , cast_to_type(cast_to_type) {} }; +template<> +// ast_not_null_operator is non-null assertion: like TypeScript ! or Kotlin !! +// examples: `nullableInt!` / `getNullableBuilder()!` +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } + + Vertex(SrcLocation loc, AnyExprV expr) + : ASTExprUnary(ast_not_null_operator, loc, expr) {} +}; + +template<> +// ast_is_null_check is an artificial vertex for "expr == null" / "expr != null" / same but null on the left +// it's created instead of a general binary expression to emphasize its purpose +struct Vertex final : ASTExprUnary { + bool is_negated; + + AnyExprV get_expr() const { return child; } + + Vertex* mutate() const { return const_cast(this); } + void assign_is_negated(bool is_negated); + + Vertex(SrcLocation loc, AnyExprV expr, bool is_negated) + : ASTExprUnary(ast_is_null_check, loc, expr) + , is_negated(is_negated) {} +}; + // // --------------------------------------------------------- @@ -892,7 +920,7 @@ template<> // ast_parameter is a parameter of a function in its declaration // example: `fun f(a: int, mutate b: slice)` has 2 parameters struct Vertex final : ASTOtherLeaf { - const LocalVarData* param_ref = nullptr; // filled on resolve identifiers + LocalVarPtr param_ref = nullptr; // filled on resolve identifiers std::string_view param_name; TypePtr declared_type; bool declared_as_mutate; // declared as `mutate param_name` @@ -900,7 +928,7 @@ struct Vertex final : ASTOtherLeaf { bool is_underscore() const { return param_name.empty(); } Vertex* mutate() const { return const_cast(this); } - void assign_param_ref(const LocalVarData* param_ref); + void assign_param_ref(LocalVarPtr param_ref); void assign_resolved_type(TypePtr declared_type); Vertex(SrcLocation loc, std::string_view param_name, TypePtr declared_type, bool declared_as_mutate) @@ -951,7 +979,7 @@ struct Vertex final : ASTOtherVararg { auto get_param(int i) const { return children.at(1)->as()->get_param(i); } AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body - const FunctionData* fun_ref = nullptr; // filled after register + FunctionPtr fun_ref = nullptr; // filled after register TypePtr declared_return_type; // filled at ast parsing; if unspecified (nullptr), means "auto infer" V genericsT_list; // for non-generics it's nullptr td::RefInt256 method_id; // specified via @method_id annotation @@ -962,7 +990,7 @@ struct Vertex final : ASTOtherVararg { bool is_builtin_function() const { return children.at(2)->type == ast_empty_statement; } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); void assign_resolved_type(TypePtr declared_return_type); Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body, TypePtr declared_return_type, V genericsT_list, td::RefInt256 method_id, int flags) @@ -975,13 +1003,13 @@ template<> // example: `global g: int;` // note, that globals don't have default values, since there is no single "entrypoint" for a contract struct Vertex final : ASTOtherVararg { - const GlobalVarData* var_ref = nullptr; // filled after register + GlobalVarPtr var_ref = nullptr; // filled after register TypePtr declared_type; // filled always, typing globals is mandatory auto get_identifier() const { return children.at(0)->as(); } Vertex* mutate() const { return const_cast(this); } - void assign_var_ref(const GlobalVarData* var_ref); + void assign_var_ref(GlobalVarPtr var_ref); void assign_resolved_type(TypePtr declared_type); Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type) @@ -993,14 +1021,14 @@ template<> // ast_constant_declaration is declaring a global constant, outside a function // example: `const op = 0x123;` struct Vertex final : ASTOtherVararg { - const GlobalConstData* const_ref = nullptr; // filled after register + GlobalConstPtr const_ref = nullptr; // filled after register TypePtr declared_type; // not null for `const op: int = ...` auto get_identifier() const { return children.at(0)->as(); } AnyExprV get_init_value() const { return child_as_expr(1); } Vertex* mutate() const { return const_cast(this); } - void assign_const_ref(const GlobalConstData* const_ref); + void assign_const_ref(GlobalConstPtr const_ref); void assign_resolved_type(TypePtr declared_type); Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type, AnyExprV init_value) diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index ad61b8a5..5b2c50cc 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -348,9 +348,9 @@ bool Op::generate_code_step(Stack& stack) { std::vector args0, res; int w_arg = 0; for (const LocalVarData& param : f_sym->parameters) { - w_arg += param.declared_type->calc_width_on_stack(); + w_arg += param.declared_type->get_width_on_stack(); } - int w_ret = f_sym->inferred_return_type->calc_width_on_stack(); + int w_ret = f_sym->inferred_return_type->get_width_on_stack(); tolk_assert(w_ret >= 0 && w_arg >= 0); for (int i = 0; i < w_ret; i++) { res.emplace_back(0); diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp index 66fad844..95a7e6a5 100644 --- a/tolk/compiler-state.cpp +++ b/tolk/compiler-state.cpp @@ -66,7 +66,7 @@ void CompilerSettings::parse_experimental_options_cmd_arg(const std::string& cmd } } -const std::vector& get_all_not_builtin_functions() { +const std::vector& get_all_not_builtin_functions() { return G.all_functions; } diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index d33eec81..1d166a3a 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -95,10 +95,10 @@ struct CompilerState { GlobalSymbolTable symtable; PersistentHeapAllocator persistent_mem; - std::vector all_functions; // all user-defined (not built-in) functions, with generic instantiations - std::vector all_get_methods; - std::vector all_global_vars; - std::vector all_constants; + std::vector all_functions; // all user-defined (not built-in) functions, with generic instantiations + std::vector all_get_methods; + std::vector all_global_vars; + std::vector all_constants; AllRegisteredSrcFiles all_src_files; bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } diff --git a/tolk/constant-evaluator.cpp b/tolk/constant-evaluator.cpp index 9ad27381..4d11b922 100644 --- a/tolk/constant-evaluator.cpp +++ b/tolk/constant-evaluator.cpp @@ -255,7 +255,7 @@ struct ConstantEvaluator { if (!sym) { v->error("undefined symbol `" + static_cast(name) + "`"); } - const GlobalConstData* const_ref = sym->try_as(); + GlobalConstPtr const_ref = sym->try_as(); if (!const_ref) { v->error("symbol `" + static_cast(name) + "` is not a constant"); } diff --git a/tolk/fwd-declarations.h b/tolk/fwd-declarations.h index e3599f36..8d3b24a8 100644 --- a/tolk/fwd-declarations.h +++ b/tolk/fwd-declarations.h @@ -32,6 +32,11 @@ struct FunctionData; struct GlobalVarData; struct GlobalConstData; +using LocalVarPtr = const LocalVarData*; +using FunctionPtr = const FunctionData*; +using GlobalVarPtr = const GlobalVarData*; +using GlobalConstPtr = const GlobalConstData*; + class TypeData; using TypePtr = const TypeData*; diff --git a/tolk/generics-helpers.cpp b/tolk/generics-helpers.cpp index 7a2dd83f..86cdf82b 100644 --- a/tolk/generics-helpers.cpp +++ b/tolk/generics-helpers.cpp @@ -37,12 +37,38 @@ static TypePtr replace_genericT_with_deduced(TypePtr orig, const GenericsDeclara if (idx == -1) { throw Fatal("can not replace generic " + asT->nameT); } + if (substitutionTs[idx] == nullptr) { + throw GenericDeduceError("can not deduce " + asT->nameT); + } return substitutionTs[idx]; } return child; }); } +GenericSubstitutionsDeduceForCall::GenericSubstitutionsDeduceForCall(FunctionPtr fun_ref) + : fun_ref(fun_ref) { + substitutionTs.resize(fun_ref->genericTs->size()); // filled with nullptr (nothing deduced) +} + +void GenericSubstitutionsDeduceForCall::provide_deducedT(const std::string& nameT, TypePtr deduced) { + if (deduced == TypeDataNullLiteral::create() || deduced->has_unknown_inside()) { + return; // just 'null' doesn't give sensible info + } + + int idx = fun_ref->genericTs->find_nameT(nameT); + if (substitutionTs[idx] == nullptr) { + substitutionTs[idx] = deduced; + } else if (substitutionTs[idx] != deduced) { + throw GenericDeduceError(nameT + " is both " + substitutionTs[idx]->as_human_readable() + " and " + deduced->as_human_readable()); + } +} + +void GenericSubstitutionsDeduceForCall::provide_manually_specified(std::vector&& substitutionTs) { + this->substitutionTs = std::move(substitutionTs); + this->manually_specified = true; +} + // purpose: having `f(value: T)` and call `f(5)`, deduce T = int // generally, there may be many generic Ts for declaration, and many arguments // for every argument, `consider_next_condition()` is called @@ -51,71 +77,67 @@ static TypePtr replace_genericT_with_deduced(TypePtr orig, const GenericsDeclara // - next condition: param_type = `T1`, arg_type = `int`, deduce T1 = int // - next condition: param_type = `(T1, T2)`, arg_type = `(int, slice)`, deduce T1 = int, T2 = slice // for call `f(6, cs, (8, cs))` T1 will be both `slice` and `int`, fired an error -class GenericSubstitutionsDeduceForFunctionCall final { - const FunctionData* fun_ref; - std::vector substitutions; - - void provideDeducedT(const std::string& nameT, TypePtr deduced) { - if (deduced == TypeDataNullLiteral::create() || deduced->has_unknown_inside()) { - return; // just 'null' doesn't give sensible info +void GenericSubstitutionsDeduceForCall::consider_next_condition(TypePtr param_type, TypePtr arg_type) { + if (const auto* asT = param_type->try_as()) { + // `(arg: T)` called as `f([1, 2])` => T is [int, int] + provide_deducedT(asT->nameT, arg_type); + } else if (const auto* p_nullable = param_type->try_as()) { + // `arg: T?` called as `f(nullableInt)` => T is int + if (const auto* a_nullable = arg_type->try_as()) { + consider_next_condition(p_nullable->inner, a_nullable->inner); } - - int idx = fun_ref->genericTs->find_nameT(nameT); - if (substitutions[idx] == nullptr) { - substitutions[idx] = deduced; - } else if (substitutions[idx] != deduced) { - throw std::runtime_error(nameT + " is both " + substitutions[idx]->as_human_readable() + " and " + deduced->as_human_readable()); + // `arg: T?` called as `f(int)` => T is int + else { + consider_next_condition(p_nullable->inner, arg_type); } - } - -public: - explicit GenericSubstitutionsDeduceForFunctionCall(const FunctionData* fun_ref) - : fun_ref(fun_ref) { - substitutions.resize(fun_ref->genericTs->size()); // filled with nullptr (nothing deduced) - } - - void consider_next_condition(TypePtr param_type, TypePtr arg_type) { - if (const auto* asT = param_type->try_as()) { - // `(arg: T)` called as `f([1, 2])` => T is [int, int] - provideDeducedT(asT->nameT, arg_type); - } else if (const auto* p_tensor = param_type->try_as()) { - // `arg: (int, T)` called as `f((5, cs))` => T is slice - if (const auto* a_tensor = arg_type->try_as(); a_tensor && a_tensor->size() == p_tensor->size()) { - for (int i = 0; i < a_tensor->size(); ++i) { - consider_next_condition(p_tensor->items[i], a_tensor->items[i]); - } - } - } else if (const auto* p_tuple = param_type->try_as()) { - // `arg: [int, T]` called as `f([5, cs])` => T is slice - if (const auto* a_tuple = arg_type->try_as(); a_tuple && a_tuple->size() == p_tuple->size()) { - for (int i = 0; i < a_tuple->size(); ++i) { - consider_next_condition(p_tuple->items[i], a_tuple->items[i]); - } - } - } else if (const auto* p_callable = param_type->try_as()) { - // `arg: fun(TArg) -> TResult` called as `f(calcTupleLen)` => TArg is tuple, TResult is int - if (const auto* a_callable = arg_type->try_as(); a_callable && a_callable->params_size() == p_callable->params_size()) { - for (int i = 0; i < a_callable->params_size(); ++i) { - consider_next_condition(p_callable->params_types[i], a_callable->params_types[i]); - } - consider_next_condition(p_callable->return_type, a_callable->return_type); + } else if (const auto* p_tensor = param_type->try_as()) { + // `arg: (int, T)` called as `f((5, cs))` => T is slice + if (const auto* a_tensor = arg_type->try_as(); a_tensor && a_tensor->size() == p_tensor->size()) { + for (int i = 0; i < a_tensor->size(); ++i) { + consider_next_condition(p_tensor->items[i], a_tensor->items[i]); } } - } - - int get_first_not_deduced_idx() const { - for (int i = 0; i < static_cast(substitutions.size()); ++i) { - if (substitutions[i] == nullptr) { - return i; + } else if (const auto* p_tuple = param_type->try_as()) { + // `arg: [int, T]` called as `f([5, cs])` => T is slice + if (const auto* a_tuple = arg_type->try_as(); a_tuple && a_tuple->size() == p_tuple->size()) { + for (int i = 0; i < a_tuple->size(); ++i) { + consider_next_condition(p_tuple->items[i], a_tuple->items[i]); } } - return -1; + } else if (const auto* p_callable = param_type->try_as()) { + // `arg: fun(TArg) -> TResult` called as `f(calcTupleLen)` => TArg is tuple, TResult is int + if (const auto* a_callable = arg_type->try_as(); a_callable && a_callable->params_size() == p_callable->params_size()) { + for (int i = 0; i < a_callable->params_size(); ++i) { + consider_next_condition(p_callable->params_types[i], a_callable->params_types[i]); + } + consider_next_condition(p_callable->return_type, a_callable->return_type); + } } +} - std::vector flush() { - return {std::move(substitutions)}; +TypePtr GenericSubstitutionsDeduceForCall::replace_by_manually_specified(TypePtr param_type) const { + return replace_genericT_with_deduced(param_type, fun_ref->genericTs, substitutionTs); +} + +TypePtr GenericSubstitutionsDeduceForCall::auto_deduce_from_argument(SrcLocation loc, TypePtr param_type, TypePtr arg_type) { + try { + if (!manually_specified) { + consider_next_condition(param_type, arg_type); + } + return replace_genericT_with_deduced(param_type, fun_ref->genericTs, substitutionTs); + } catch (const GenericDeduceError& ex) { + throw ParseError(loc, ex.message + " for generic function `" + fun_ref->as_human_readable() + "`; instantiate it manually with `" + fun_ref->name + "<...>()`"); } -}; +} + +int GenericSubstitutionsDeduceForCall::get_first_not_deduced_idx() const { + for (int i = 0; i < static_cast(substitutionTs.size()); ++i) { + if (substitutionTs[i] == nullptr) { + return i; + } + } + return -1; +} // clone the body of `f` replacing T everywhere with a substitution // before: `fun f(v: T) { var cp: [T] = [v]; }` @@ -175,7 +197,7 @@ int GenericsDeclaration::find_nameT(std::string_view nameT) const { // after creating a deep copy of `f` like `f`, its new and fresh body needs the previous pipeline to run // for example, all local vars need to be registered as symbols, etc. -static void run_pipeline_for_instantiated_function(const FunctionData* inst_fun_ref) { +static void run_pipeline_for_instantiated_function(FunctionPtr inst_fun_ref) { // these pipes are exactly the same as in tolk.cpp — all preceding (and including) type inferring pipeline_resolve_identifiers_and_assign_symbols(inst_fun_ref); pipeline_calculate_rvalue_lvalue(inst_fun_ref); @@ -198,34 +220,12 @@ std::string generate_instantiated_name(const std::string& orig_name, const std:: return name; } -td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint) { - try { - GenericSubstitutionsDeduceForFunctionCall deducing(called_fun); - for (const LocalVarData& param : called_fun->parameters) { - if (param.declared_type->has_genericT_inside() && param.param_idx < static_cast(arg_types.size())) { - deducing.consider_next_condition(param.declared_type, arg_types[param.param_idx]); - } - } - int idx = deducing.get_first_not_deduced_idx(); - if (idx != -1 && return_hint && called_fun->declared_return_type->has_genericT_inside()) { - deducing.consider_next_condition(called_fun->declared_return_type, return_hint); - idx = deducing.get_first_not_deduced_idx(); - } - if (idx != -1) { - return td::Status::Error(td::Slice{"can not deduce " + called_fun->genericTs->get_nameT(idx)}); - } - return deducing.flush(); - } catch (const std::runtime_error& ex) { - return td::Status::Error(td::Slice{ex.what()}); - } -} - -const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs) { +FunctionPtr instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, const std::string& inst_name, std::vector&& substitutionTs) { tolk_assert(fun_ref->genericTs); // if `f` was earlier instantiated, return it if (const auto* existing = lookup_global_symbol(inst_name)) { - const FunctionData* inst_ref = existing->try_as(); + FunctionPtr inst_ref = existing->try_as(); tolk_assert(inst_ref); return inst_ref; } diff --git a/tolk/generics-helpers.h b/tolk/generics-helpers.h index 2a304f55..893bd98c 100644 --- a/tolk/generics-helpers.h +++ b/tolk/generics-helpers.h @@ -57,8 +57,46 @@ struct GenericsInstantiation { } }; +// this class helps to deduce Ts on the fly +// purpose: having `f(value: T)` and call `f(5)`, deduce T = int +// while analyzing a call, arguments are handled one by one, by `auto_deduce_from_argument()` +// this class also handles manually specified substitutions like `f(5)` +class GenericSubstitutionsDeduceForCall { + FunctionPtr fun_ref; + std::vector substitutionTs; + bool manually_specified = false; + + void provide_deducedT(const std::string& nameT, TypePtr deduced); + void consider_next_condition(TypePtr param_type, TypePtr arg_type); + +public: + explicit GenericSubstitutionsDeduceForCall(FunctionPtr fun_ref); + + bool is_manually_specified() const { + return manually_specified; + } + + void provide_manually_specified(std::vector&& substitutionTs); + TypePtr replace_by_manually_specified(TypePtr param_type) const; + TypePtr auto_deduce_from_argument(SrcLocation loc, TypePtr param_type, TypePtr arg_type); + int get_first_not_deduced_idx() const; + + std::vector&& flush() { + return std::move(substitutionTs); + } +}; + +struct GenericDeduceError final : std::exception { + std::string message; + explicit GenericDeduceError(std::string message) + : message(std::move(message)) { } + + const char* what() const noexcept override { + return message.c_str(); + } +}; + std::string generate_instantiated_name(const std::string& orig_name, const std::vector& substitutions); -td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint); -const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs); +FunctionPtr instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, const std::string& inst_name, std::vector&& substitutionTs); } // namespace tolk diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 867c05ec..269c5fe1 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -32,12 +32,22 @@ * So, if execution reaches this pass, the input is (almost) correct, and code generation should succeed. * (previously, there was a check for one variable modified twice like `(t.0, t.0) = rhs`, but after changing * execution order of assignment to "first lhs, then lhs", it was removed for several reasons) +* + * A noticeable property for IR generation is "target_type" used to extend/shrink stack. + * Example: `var a: (int,int)? = null`. This `null` has inferred_type "null literal", but target_type "nullable tensor", + * and when it's assigned, it's "expanded" from 1 stack slot to 3 (int + int + null flag). + * Example: `fun analyze(t: (int,int)?)` and a call `analyze((1,2))`. `(1,2)` is `(int,int)` (2 stack slots), + * and when passed to target (3 slots, one for null flag), this null flag is implicitly added (zero value). + * Example: `nullableInt!`; for `nullableInt` inferred_type is `int?`, and target_type is `int` + * (this doesn't lead to stack reorganization, but in case `nullableTensor!` does) + * (inferred_type of `nullableInt!` is `int`, and its target_type depends on its usage). + * The same mechanism will work for union types in the future. */ namespace tolk { class LValContext; -std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr); +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, TypePtr target_type = nullptr, LValContext* lval_ctx = nullptr); std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx); void process_any_statement(AnyV v, CodeBlob& code); @@ -101,8 +111,8 @@ class LValContext { // every global variable used as lvalue is registered here // example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after struct ModifiedGlobal { - const GlobalVarData* glob_ref; - std::vector lval_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors) + GlobalVarPtr glob_ref; + std::vector lval_ir_idx; // typically 1, generally get_width_on_stack() of global var (tensors) // for 1-slot globals int/cell/slice, assigning to them is just SETGLOB // same for tensors, if they are fully rewritten in an expression: `gTensor = (5,6)` @@ -139,13 +149,13 @@ class LValContext { void apply(CodeBlob& code, SrcLocation loc) const { LValContext local_lval; local_lval.enter_rval_inside_lval(); - std::vector obj_ir_idx = pre_compile_expr(tensor_obj, code, &local_lval); + std::vector obj_ir_idx = pre_compile_expr(tensor_obj, code, nullptr, &local_lval); const TypeDataTensor* t_tensor = tensor_obj->inferred_type->try_as(); tolk_assert(t_tensor); - int stack_width = t_tensor->items[index_at]->calc_width_on_stack(); + int stack_width = t_tensor->items[index_at]->get_width_on_stack(); int stack_offset = 0; for (int i = 0; i < index_at; ++i) { - stack_offset += t_tensor->items[i]->calc_width_on_stack(); + stack_offset += t_tensor->items[i]->get_width_on_stack(); } std::vector field_ir_idx = {obj_ir_idx.begin() + stack_offset, obj_ir_idx.begin() + stack_offset + stack_width}; tolk_assert(field_ir_idx.size() == lval_ir_idx.size()); @@ -167,12 +177,12 @@ class LValContext { void apply(CodeBlob& code, SrcLocation loc) const { LValContext local_lval; local_lval.enter_rval_inside_lval(); - std::vector tuple_ir_idx = pre_compile_expr(tuple_obj, code, &local_lval); + std::vector tuple_ir_idx = pre_compile_expr(tuple_obj, code, nullptr, &local_lval); std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), loc, "(tuple-idx)"); code.emplace_back(loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); vars_modification_watcher.trigger_callbacks(tuple_ir_idx, loc); - const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as(); + FunctionPtr builtin_sym = lookup_global_symbol("tupleSetAt")->try_as(); code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx[0], lval_ir_idx[0], index_ir_idx[0]}, builtin_sym); local_lval.after_let(std::move(tuple_ir_idx), code, loc); } @@ -195,7 +205,7 @@ public: void exit_rval_inside_lval() { level_rval_inside_lval--; } bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; } - void capture_global_modification(const GlobalVarData* glob_ref, std::vector lval_ir_idx) { + void capture_global_modification(GlobalVarPtr glob_ref, std::vector lval_ir_idx) { modifications.emplace_back(ModifiedGlobal{glob_ref, std::move(lval_ir_idx)}); } @@ -245,29 +255,39 @@ public: } }; +// given `{some_expr}!`, return some_expr +static AnyExprV unwrap_not_null_operator(AnyExprV v) { + while (auto v_notnull = v->try_as()) { + v = v_notnull->get_expr(); + } + return v; +} + // given `{some_expr}.{i}`, check it for pattern `some_var.0` / `some_var.0.1` / etc. // return some_var if satisfies (it may be a local or a global var, a tensor or a tuple) // return nullptr otherwise: `f().0` / `(v = rhs).0` / `some_var.method().0` / etc. static V calc_sink_leftmost_obj(V v) { - AnyExprV leftmost_obj = v->get_obj(); + AnyExprV leftmost_obj = unwrap_not_null_operator(v->get_obj()); while (auto v_dot = leftmost_obj->try_as()) { if (!v_dot->is_target_indexed_access()) { break; } - leftmost_obj = v_dot->get_obj(); + leftmost_obj = unwrap_not_null_operator(v_dot->get_obj()); } return leftmost_obj->type == ast_reference ? leftmost_obj->as() : nullptr; } static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, - LValContext* lval_ctx) { + const TypeDataTensor* tensor_target_type, LValContext* lval_ctx) { const int n = static_cast(args.size()); if (n == 0) { // just `()` return {}; } + tolk_assert(!tensor_target_type || tensor_target_type->size() == n); if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments - return {pre_compile_expr(args[0], code, lval_ctx)}; + TypePtr child_target_type = tensor_target_type ? tensor_target_type->items[0] : nullptr; + return {pre_compile_expr(args[0], code, child_target_type, lval_ctx)}; } // the purpose is to handle such cases: `return (x, x += y, x)` @@ -321,7 +341,8 @@ static std::vector> pre_compile_tensor_inner(CodeBlob& co WatchingVarList watched_vars(n); for (int arg_idx = 0; arg_idx < n; ++arg_idx) { - std::vector vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, lval_ctx); + TypePtr child_target_type = tensor_target_type ? tensor_target_type->items[arg_idx] : nullptr; + std::vector vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, child_target_type, lval_ctx); watched_vars.add_and_watch_modifications(std::move(vars_of_ith_arg), code); } return watched_vars.clear_and_stop_watching(); @@ -329,7 +350,13 @@ static std::vector> pre_compile_tensor_inner(CodeBlob& co static std::vector pre_compile_tensor(CodeBlob& code, const std::vector& args, LValContext* lval_ctx = nullptr) { - std::vector> res_lists = pre_compile_tensor_inner(code, args, lval_ctx); + std::vector types_list; + types_list.reserve(args.size()); + for (AnyExprV item : args) { + types_list.push_back(item->inferred_type); + } + const TypeDataTensor* tensor_target_type = TypeDataTensor::create(std::move(types_list))->try_as(); + std::vector> res_lists = pre_compile_tensor_inner(code, args, tensor_target_type, lval_ctx); std::vector res; for (const std::vector& list : res_lists) { res.insert(res.end(), list.cbegin(), list.cend()); @@ -340,6 +367,7 @@ static std::vector pre_compile_tensor(CodeBlob& code, const std::vect static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) { // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) { + // note: there are no type transitions (adding nullability flag, etc.), since only 1-slot elements allowed in tuples LValContext local_lval; std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); vars_modification_watcher.trigger_callbacks(left, loc); @@ -355,7 +383,7 @@ static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE LValContext local_lval; std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); vars_modification_watcher.trigger_callbacks(left, loc); - std::vector right = pre_compile_expr(rhs, code); + std::vector right = pre_compile_expr(rhs, code, nullptr); const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as(); std::vector types_list = inferred_tuple->items; std::vector rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc, "(unpack-tuple)"); @@ -365,25 +393,25 @@ static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyE return right; } // small optimization: `var x = rhs` or `local_var = rhs` (90% cases), LValContext not needed actually - if (lhs->type == ast_local_var_lhs || (lhs->type == ast_reference && lhs->as()->sym->try_as())) { - std::vector left = pre_compile_expr(lhs, code); // effectively, local_var->ir_idx + if (lhs->type == ast_local_var_lhs || (lhs->type == ast_reference && lhs->as()->sym->try_as())) { + std::vector left = pre_compile_expr(lhs, code, nullptr); // effectively, local_var->ir_idx vars_modification_watcher.trigger_callbacks(left, loc); - std::vector right = pre_compile_expr(rhs, code); + std::vector right = pre_compile_expr(rhs, code, lhs->inferred_type); code.emplace_back(loc, Op::_Let, std::move(left), right); return right; } // lhs = rhs LValContext local_lval; - std::vector left = pre_compile_expr(lhs, code, &local_lval); + std::vector left = pre_compile_expr(lhs, code, nullptr, &local_lval); vars_modification_watcher.trigger_callbacks(left, loc); - std::vector right = pre_compile_expr(rhs, code); + std::vector right = pre_compile_expr(rhs, code, lhs->inferred_type); code.emplace_back(loc, Op::_Let, left, right); local_lval.after_let(std::move(left), code, loc); return right; } static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation loc, - std::vector&& args_vars, const FunctionData* fun_ref, const char* debug_desc) { + std::vector&& args_vars, FunctionPtr fun_ref, const char* debug_desc) { std::vector rvect = code.create_tmp_var(ret_type, loc, debug_desc); Op& op = code.emplace_back(loc, Op::_Call, rvect, std::move(args_vars), fun_ref); if (!fun_ref->is_marked_as_pure()) { @@ -392,9 +420,161 @@ static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcL return rvect; } +// "Transition to target (runtime) type" is the following process. +// Imagine `fun analyze(t: (int,int)?)` and a call `analyze((1,2))`. +// `(1,2)` (inferred_type) is 2 stack slots, but `t` (target_type) is 3 (one for null-flag). +// So, this null flag should be implicitly added (non-zero, since a variable is not null). +// Another example: `var t: (int, int)? = null`. +// `null` (inferred_type) is 1 stack slots, but target_type is 3, we should add 2 nulls. +// Another example: `var t1 = (1, null); var t2: (int, (int,int)?) = t1;`. +// Then t1's rvect is 2 vars (1 and null), but t1's `null` should be converted to 3 stack slots (resulting in 4 total). +// The same mechanism will work for union types in the future. +// Here rvect is a list of IR vars for inferred_type, probably patched due to target_type. +GNU_ATTRIBUTE_NOINLINE +static std::vector transition_expr_to_runtime_type_impl(std::vector&& rvect, CodeBlob& code, TypePtr original_type, TypePtr target_type, SrcLocation loc) { + // pass `T` to `T` + // could occur for passing tensor `(..., T, ...)` to `(..., T, ...)` while traversing tensor's components + if (target_type == original_type) { + return rvect; + } + + int target_w = target_type->get_width_on_stack(); + const TypeDataNullable* t_nullable = target_type->try_as(); + const TypeDataNullable* o_nullable = original_type->try_as(); + + // pass `null` to `T?` + // for primitives like `int?`, no changes in rvect, null occupies the same TVM slot + // for tensors like `(int,int)?`, `null` is represented as N nulls + 1 null flag, insert N nulls + if (t_nullable && original_type == TypeDataNullLiteral::create()) { + tolk_assert(rvect.size() == 1); + if (target_w == 1 && !t_nullable->is_primitive_nullable()) { // `null` to `()?` + rvect = code.create_tmp_var(TypeDataInt::create(), loc, "(NNFlag)"); + code.emplace_back(loc, Op::_IntConst, rvect, td::make_refint(0)); + } + if (target_w > 1) { + FunctionPtr builtin_sym = lookup_global_symbol("__null")->try_as(); + rvect.reserve(target_w + 1); + for (int i = 1; i < target_w - 1; ++i) { + std::vector ith_null = code.create_tmp_var(TypeDataNullLiteral::create(), loc, "(null-literal)"); + code.emplace_back(loc, Op::_Call, ith_null, std::vector{}, builtin_sym); + rvect.push_back(ith_null[0]); + } + std::vector null_flag_ir = code.create_tmp_var(TypeDataInt::create(), loc, "(NNFlag)"); + var_idx_t null_flag_ir_idx = null_flag_ir[0]; + code.emplace_back(loc, Op::_IntConst, std::move(null_flag_ir), td::make_refint(0)); + rvect.push_back(null_flag_ir_idx); + } + return rvect; + } + // pass `T` to `T?` + // for primitives like `int?`, no changes in rvect: `int` and `int?` occupy the same TVM slot (null is represented as NULL TVM value) + // for passing `(int, int)` to `(int, int)?` / `(int, null)` to `(int, (int,int)?)?`, add a null flag equals to 0 + if (t_nullable && !o_nullable) { + if (!t_nullable->is_primitive_nullable()) { + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, original_type, t_nullable->inner, loc); + tolk_assert(target_w == static_cast(rvect.size() + 1)); + std::vector null_flag_ir = code.create_tmp_var(TypeDataInt::create(), loc, "(NNFlag)"); + var_idx_t null_flag_ir_idx = null_flag_ir[0]; + code.emplace_back(loc, Op::_IntConst, std::move(null_flag_ir), td::make_refint(-1)); + rvect.push_back(null_flag_ir_idx); + } + return rvect; + } + // pass `T1?` to `T2?` + // for example, `int8?` to `int16?` + // transition inner types, leaving nullable flag unchanged for tensors + if (t_nullable && o_nullable) { + if (target_w > 1) { + var_idx_t null_flag_ir_idx = rvect.back(); + rvect.pop_back(); + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, o_nullable->inner, t_nullable->inner, loc); + rvect.push_back(null_flag_ir_idx); + } + return rvect; + } + // pass `T?` to `null` + if (target_type == TypeDataNullLiteral::create() && original_type->can_rhs_be_assigned(target_type)) { + tolk_assert(o_nullable || original_type == TypeDataUnknown::create()); + if (o_nullable && !o_nullable->is_primitive_nullable()) { + FunctionPtr builtin_sym = lookup_global_symbol("__null")->try_as(); + rvect = code.create_tmp_var(TypeDataNullLiteral::create(), loc, "(null-literal)"); + code.emplace_back(loc, Op::_Call, rvect, std::vector{}, builtin_sym); + } + return rvect; + } + // pass `T?` to `T` + // it may occur due to operator `!` or smart cast + // for primitives like `int?`, no changes in rvect + // for passing `(int, int)?` to `(int, int)`, drop the null flag from the tail + if (!t_nullable && o_nullable) { + if (!o_nullable->is_primitive_nullable()) { + rvect.pop_back(); + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, original_type->try_as()->inner, target_type, loc); + } + return rvect; + } + // pass `bool` to `int` + // in code, it's done via `as` operator, like `boolVar as int` + // no changes in rvect, boolVar is guaranteed to be -1 or 0 at TVM level + if (target_type == TypeDataInt::create() && original_type == TypeDataBool::create()) { + return rvect; + } + // pass something to `unknown` + // probably, it comes from `_ = rhs`, type of `_` is unknown, it's target_type of rhs + // no changes in rvect + if (target_type == TypeDataUnknown::create()) { + return rvect; + } + // pass `unknown` to something + // probably, it comes from `arg` in exception, it's inferred as `unknown` and could be cast to any value + if (original_type == TypeDataUnknown::create()) { + tolk_assert(rvect.size() == 1); + return rvect; + } + // pass tensor to tensor, e.g. `(1, null)` to `(int, slice?)` / `(1, null)` to `(int, (int,int)?)` + // every element of rhs tensor should be transitioned + if (target_type->try_as() && original_type->try_as()) { + const TypeDataTensor* target_tensor = target_type->try_as(); + const TypeDataTensor* inferred_tensor = original_type->try_as(); + tolk_assert(target_tensor->size() == inferred_tensor->size()); + tolk_assert(inferred_tensor->get_width_on_stack() == static_cast(rvect.size())); + std::vector result_rvect; + result_rvect.reserve(target_w); + int stack_offset = 0; + for (int i = 0; i < inferred_tensor->size(); ++i) { + int ith_w = inferred_tensor->items[i]->get_width_on_stack(); + std::vector rvect_i{rvect.begin() + stack_offset, rvect.begin() + stack_offset + ith_w}; + std::vector result_i = transition_expr_to_runtime_type_impl(std::move(rvect_i), code, inferred_tensor->items[i], target_tensor->items[i], loc); + result_rvect.insert(result_rvect.end(), result_i.begin(), result_i.end()); + stack_offset += ith_w; + } + return result_rvect; + } + // pass tuple to tuple, e.g. `[1, null]` to `[int, int?]` / `[1, null]` to `[int, [int?,int?]?]` + // to changes to rvect, since tuples contain only 1-slot elements + if (target_type->try_as() && original_type->try_as()) { + tolk_assert(target_type->get_width_on_stack() == original_type->get_width_on_stack()); + return rvect; + } + + throw Fatal("unhandled transition_expr_to_runtime_type_impl() combination"); +} + +// invoke the function above only if potentially needed to +// (if an expression is targeted to another type) +#ifndef TOLK_DEBUG +GNU_ATTRIBUTE_ALWAYS_INLINE +#endif +static std::vector transition_to_target_type(std::vector&& rvect, CodeBlob& code, TypePtr target_type, AnyExprV v) { + if (target_type != nullptr && target_type != v->inferred_type) { + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, v->inferred_type, target_type, v->loc); + } + return rvect; +} + std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { - if (const auto* glob_ref = sym->try_as()) { + if (GlobalVarPtr glob_ref = sym->try_as()) { // handle `globalVar = rhs` / `mutate globalVar` if (lval_ctx && !lval_ctx->is_rval_inside_lval()) { std::vector lval_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(lval-glob)"); @@ -410,7 +590,7 @@ std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, Co } return local_ir_idx; } - if (const auto* const_ref = sym->try_as()) { + if (GlobalConstPtr const_ref = sym->try_as()) { if (const_ref->is_int_const()) { std::vector rvect = code.create_tmp_var(TypeDataInt::create(), loc, "(glob-const)"); code.emplace_back(loc, Op::_IntConst, rvect, const_ref->as_int_const()); @@ -421,44 +601,59 @@ std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, Co return rvect; } } - if (const auto* fun_ref = sym->try_as()) { + if (FunctionPtr fun_ref = sym->try_as()) { std::vector rvect = code.create_tmp_var(fun_ref->inferred_full_type, loc, "(glob-var-fun)"); code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); return rvect; } - if (const auto* var_ref = sym->try_as()) { + if (LocalVarPtr var_ref = sym->try_as()) { #ifdef TOLK_DEBUG - tolk_assert(static_cast(var_ref->ir_idx.size()) == var_ref->declared_type->calc_width_on_stack()); + tolk_assert(static_cast(var_ref->ir_idx.size()) == var_ref->declared_type->get_width_on_stack()); #endif return var_ref->ir_idx; } throw Fatal("pre_compile_symbol"); } -static std::vector process_assignment(V v, CodeBlob& code) { +static std::vector process_reference(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + std::vector rvect = pre_compile_symbol(v->loc, v->sym, code, lval_ctx); + return transition_to_target_type(std::move(rvect), code, target_type, v); +} + +static std::vector process_assignment(V v, CodeBlob& code, TypePtr target_type) { if (auto lhs_decl = v->get_lhs()->try_as()) { - return pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); + std::vector rvect = pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); + return transition_to_target_type(std::move(rvect), code, target_type, v); } else { - return pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); + std::vector rvect = pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); + // now rvect contains rhs IR vars constructed to fit lhs (for correct assignment, lhs type was target_type for rhs) + // but the type of `lhs = rhs` is RHS (see type inferring), so rvect now should fit rhs->inferred_type (= v->inferred_type) + // example: `t1 = t2 = null`, we're at `t2 = null`, earlier declared t1: `int?`, t2: `(int,int)?` + // currently "null" matches t2 (3 null slots), but type of this assignment is "plain null" (1 slot) assigned later to t1 + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, v->get_lhs()->inferred_type, v->inferred_type, v->loc); + return transition_to_target_type(std::move(rvect), code, target_type, v); } } -static std::vector process_set_assign(V v, CodeBlob& code) { +static std::vector process_set_assign(V v, CodeBlob& code, TypePtr target_type) { // for "a += b", emulate "a = a + b" // seems not beautiful, but it works; probably, this transformation should be done at AST level in advance std::string_view calc_operator = v->operator_name; // "+" for operator += auto v_apply = createV(v->loc, calc_operator, static_cast(v->tok - 1), v->get_lhs(), v->get_rhs()); v_apply->assign_inferred_type(v->inferred_type); v_apply->assign_fun_ref(v->fun_ref); - return pre_compile_let(code, v->get_lhs(), v_apply, v->loc); + + std::vector rvect = pre_compile_let(code, v->get_lhs(), v_apply, v->loc); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_binary_operator(V v, CodeBlob& code) { +static std::vector process_binary_operator(V v, CodeBlob& code, TypePtr target_type) { TokenType t = v->tok; if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(binary-op)"); + std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(binary-op)"); + return transition_to_target_type(std::move(rvect), code, target_type, v); } if (t == tok_logical_and || t == tok_logical_or) { // do the following transformations: @@ -470,43 +665,86 @@ static std::vector process_binary_operator(V v, v_1->mutate()->assign_inferred_type(TypeDataInt::create()); auto v_b_ne_0 = createV(v->loc, "!=", tok_neq, v->get_rhs(), v_0); v_b_ne_0->mutate()->assign_inferred_type(TypeDataInt::create()); - v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); - std::vector cond = pre_compile_expr(v->get_lhs(), code); + v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->try_as()); + std::vector cond = pre_compile_expr(v->get_lhs(), code, nullptr); tolk_assert(cond.size() == 1); - std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(ternary)"); Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); - code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code)); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code, nullptr)); code.close_pop_cur(v->loc); code.push_set_cur(if_op.block1); - code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code)); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code, nullptr)); code.close_pop_cur(v->loc); - return rvect; + return transition_to_target_type(std::move(rvect), code, target_type, v); } throw UnexpectedASTNodeType(v, "process_binary_operator"); } -static std::vector process_unary_operator(V v, CodeBlob& code) { - std::vector args_vars = pre_compile_tensor(code, {v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(unary-op)"); +static std::vector process_unary_operator(V v, CodeBlob& code, TypePtr target_type) { + std::vector rhs_vars = pre_compile_expr(v->get_rhs(), code, nullptr); + std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, std::move(rhs_vars), v->fun_ref, "(unary-op)"); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_ternary_operator(V v, CodeBlob& code) { - std::vector cond = pre_compile_expr(v->get_cond(), code); +static std::vector process_ternary_operator(V v, CodeBlob& code, TypePtr target_type) { + std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); tolk_assert(cond.size() == 1); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); - code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code)); + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code, v->inferred_type)); code.close_pop_cur(v->get_when_true()->loc); code.push_set_cur(if_op.block1); - code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code)); + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code, v->inferred_type)); code.close_pop_cur(v->get_when_false()->loc); - return rvect; + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_dot_access(V v, CodeBlob& code, LValContext* lval_ctx) { +static std::vector process_cast_as_operator(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + TypePtr child_target_type = v->cast_to_type; + std::vector rvect = pre_compile_expr(v->get_expr(), code, child_target_type, lval_ctx); + return transition_to_target_type(std::move(rvect), code, target_type, v); +} + +static std::vector process_not_null_operator(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + TypePtr child_target_type = v->get_expr()->inferred_type; + if (const auto* as_nullable = child_target_type->try_as()) { + child_target_type = as_nullable->inner; + } + std::vector rvect = pre_compile_expr(v->get_expr(), code, child_target_type, lval_ctx); + return transition_to_target_type(std::move(rvect), code, target_type, v); +} + +static std::vector process_is_null_check(V v, CodeBlob& code, TypePtr target_type) { + std::vector expr_ir_idx = pre_compile_expr(v->get_expr(), code, nullptr); + std::vector isnull_ir_idx = code.create_tmp_var(TypeDataBool::create(), v->loc, "(is-null)"); + TypePtr expr_type = v->get_expr()->inferred_type; + + if (const TypeDataNullable* t_nullable = expr_type->try_as()) { + if (!t_nullable->is_primitive_nullable()) { + std::vector zero_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->loc, "(zero)"); + code.emplace_back(v->loc, Op::_IntConst, zero_ir_idx, td::make_refint(0)); + FunctionPtr eq_sym = lookup_global_symbol("_==_")->try_as(); + code.emplace_back(v->loc, Op::_Call, isnull_ir_idx, std::vector{expr_ir_idx.back(), zero_ir_idx[0]}, eq_sym); + } else { + FunctionPtr builtin_sym = lookup_global_symbol("__isNull")->try_as(); + code.emplace_back(v->loc, Op::_Call, isnull_ir_idx, expr_ir_idx, builtin_sym); + } + } else { + bool always_null = expr_type == TypeDataNullLiteral::create(); + code.emplace_back(v->loc, Op::_IntConst, isnull_ir_idx, td::make_refint(always_null ? -1 : 0)); + } + + if (v->is_negated) { + FunctionPtr not_sym = lookup_global_symbol("!b_")->try_as(); + code.emplace_back(v->loc, Op::_Call, isnull_ir_idx, std::vector{isnull_ir_idx}, not_sym); + } + return transition_to_target_type(std::move(isnull_ir_idx), code, target_type, v); +} + +static std::vector process_dot_access(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { // it's NOT a method call `t.tupleSize()` (since such cases are handled by process_function_call) // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) if (!v->is_target_fun_ref()) { @@ -516,20 +754,21 @@ static std::vector process_dot_access(V v, CodeBlob& if (const auto* t_tensor = obj_type->try_as()) { // handle `tensorVar.0 = rhs` if tensors is a global, special case, then the global will be read on demand if (lval_ctx && !lval_ctx->is_rval_inside_lval()) { - if (auto sink = calc_sink_leftmost_obj(v); sink && sink->sym->try_as()) { + if (auto sink = calc_sink_leftmost_obj(v); sink && sink->sym->try_as()) { std::vector lval_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(lval-global-tensor)"); lval_ctx->capture_field_of_global_modification(v->get_obj(), index_at, lval_ir_idx); return lval_ir_idx; } } // since a tensor of N elems are N vars on a stack actually, calculate offset - std::vector lhs_vars = pre_compile_expr(v->get_obj(), code, lval_ctx); - int stack_width = t_tensor->items[index_at]->calc_width_on_stack(); + std::vector lhs_vars = pre_compile_expr(v->get_obj(), code, nullptr, lval_ctx); + int stack_width = t_tensor->items[index_at]->get_width_on_stack(); int stack_offset = 0; for (int i = 0; i < index_at; ++i) { - stack_offset += t_tensor->items[i]->calc_width_on_stack(); + stack_offset += t_tensor->items[i]->get_width_on_stack(); } - return {lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width}; + std::vector rvect{lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width}; + return transition_to_target_type(std::move(rvect), code, target_type, v); } // `tupleVar.0` if (obj_type->try_as() || obj_type->try_as()) { @@ -545,40 +784,52 @@ static std::vector process_dot_access(V v, CodeBlob& code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values - const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); + FunctionPtr builtin_sym = lookup_global_symbol("tupleAt")->try_as(); code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); if (lval_ctx && calc_sink_leftmost_obj(v)) { // `tupleVar.0.1 = rhs`, then `tupleVar.0` is rval inside lval lval_ctx->capture_tuple_index_modification(v->get_obj(), index_at, field_ir_idx); } - return field_ir_idx; + // like tensor index, `tupleVar.1` also might be smart cast, for example we're in `if (tupleVar.1 != null)` + // but since tuple's elements are only 1-slot width (no tensors and unions), no stack transformations required + return transition_to_target_type(std::move(field_ir_idx), code, target_type, v); } tolk_assert(false); } // okay, v->target refs a function, like `obj.method`, filled at type inferring // (currently, nothing except a global function can be referenced, no object-scope methods exist) - const FunctionData* fun_ref = std::get(v->target); + FunctionPtr fun_ref = std::get(v->target); tolk_assert(fun_ref); - return pre_compile_symbol(v->loc, fun_ref, code, lval_ctx); + std::vector rvect = pre_compile_symbol(v->loc, fun_ref, code, lval_ctx); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_function_call(V v, CodeBlob& code) { +static std::vector process_function_call(V v, CodeBlob& code, TypePtr target_type) { // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` - const FunctionData* fun_ref = v->fun_maybe; + FunctionPtr fun_ref = v->fun_maybe; if (!fun_ref) { + // it's `local_var(args)`, treat args like a tensor: + // 1) when variables are modified like `local_var(x, x += 2, x)`, regular mechanism of watching automatically works + // 2) when `null` is passed to `(int, int)?`, or any other type transitions, it automatically works std::vector args; args.reserve(v->get_num_args()); for (int i = 0; i < v->get_num_args(); ++i) { args.push_back(v->get_arg(i)->get_expr()); } - std::vector args_vars = pre_compile_tensor(code, args); - std::vector tfunc = pre_compile_expr(v->get_callee(), code); + std::vector params_types = v->get_callee()->inferred_type->try_as()->params_types; + const TypeDataTensor* tensor_tt = TypeDataTensor::create(std::move(params_types))->try_as(); + std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, tensor_tt, nullptr); + std::vector args_vars; + for (const std::vector& list : vars_per_arg) { + args_vars.insert(args_vars.end(), list.cbegin(), list.cend()); + } + std::vector tfunc = pre_compile_expr(v->get_callee(), code, nullptr); tolk_assert(tfunc.size() == 1); args_vars.push_back(tfunc[0]); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(call-ind)"); Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars)); op.set_impure_flag(); - return rvect; + return transition_to_target_type(std::move(rvect), code, target_type, v); } int delta_self = v->is_dot_call(); @@ -595,7 +846,11 @@ static std::vector process_function_call(V v, Code for (int i = 0; i < v->get_num_args(); ++i) { args.push_back(v->get_arg(i)->get_expr()); } - std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, nullptr); + // the purpose of tensor_tt ("tensor target type") is to transition `null` to `(int, int)?` and so on + // the purpose of calling `pre_compile_tensor_inner` is to have 0-th IR vars to handle return self + std::vector params_types = fun_ref->inferred_full_type->try_as()->params_types; + const TypeDataTensor* tensor_tt = TypeDataTensor::create(std::move(params_types))->try_as(); + std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, tensor_tt, nullptr); TypePtr op_call_type = v->inferred_type; TypePtr real_ret_type = v->inferred_type; @@ -609,7 +864,7 @@ static std::vector process_function_call(V v, Code std::vector types_list; for (int i = 0; i < delta_self + v->get_num_args(); ++i) { if (fun_ref->parameters[i].is_mutate_parameter()) { - types_list.push_back(args[i]->inferred_type); + types_list.push_back(fun_ref->parameters[i].declared_type); } } types_list.push_back(real_ret_type); @@ -630,7 +885,7 @@ static std::vector process_function_call(V v, Code AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[i]; tolk_assert(arg_i->is_lvalue || i == 0); if (arg_i->is_lvalue) { - std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_lval); + std::vector ith_var_idx = pre_compile_expr(arg_i, code, nullptr, &local_lval); left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); } else { left.insert(left.end(), vars_per_arg[0].begin(), vars_per_arg[0].end()); @@ -647,36 +902,39 @@ static std::vector process_function_call(V v, Code if (obj_leftmost && fun_ref->does_return_self()) { if (obj_leftmost->is_lvalue) { // to handle if obj is global var, potentially re-assigned inside a chain - rvect_apply = pre_compile_expr(obj_leftmost, code); + rvect_apply = pre_compile_expr(obj_leftmost, code, nullptr); } else { // temporary object, not lvalue, pre_compile_expr rvect_apply = vars_per_arg[0]; } } - return rvect_apply; + return transition_to_target_type(std::move(rvect_apply), code, target_type, v); } -static std::vector process_tensor(V v, CodeBlob& code, LValContext* lval_ctx) { - return pre_compile_tensor(code, v->get_items(), lval_ctx); +static std::vector process_tensor(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + // tensor is compiled "as is", for example `(1, null)` occupies 2 slots + // and if assigned/passed to something other, like `(int, (int,int)?)`, a whole tensor is transitioned, it works + std::vector rvect = pre_compile_tensor(code, v->get_items(), lval_ctx); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_typed_tuple(V v, CodeBlob& code, LValContext* lval_ctx) { +static std::vector process_typed_tuple(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { if (lval_ctx) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work v->error("[...] can not be used as lvalue here"); } std::vector left = code.create_tmp_var(v->inferred_type, v->loc, "(pack-tuple)"); std::vector right = pre_compile_tensor(code, v->get_items(), lval_ctx); code.emplace_back(v->loc, Op::_Tuple, left, std::move(right)); - return left; + return transition_to_target_type(std::move(left), code, target_type, v); } -static std::vector process_int_const(V v, CodeBlob& code) { +static std::vector process_int_const(V v, CodeBlob& code, TypePtr target_type) { std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(int-const)"); code.emplace_back(v->loc, Op::_IntConst, rvect, v->intval); - return rvect; + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_string_const(V v, CodeBlob& code) { +static std::vector process_string_const(V v, CodeBlob& code, TypePtr target_type) { ConstantValue value = eval_const_init_value(v); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(str-const)"); if (value.is_int()) { @@ -684,27 +942,31 @@ static std::vector process_string_const(V v, CodeBl } else { code.emplace_back(v->loc, Op::_SliceConst, rvect, value.as_slice()); } - return rvect; + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_bool_const(V v, CodeBlob& code) { - const FunctionData* builtin_sym = lookup_global_symbol(v->bool_val ? "__true" : "__false")->as(); - return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(bool-const)"); +static std::vector process_bool_const(V v, CodeBlob& code, TypePtr target_type) { + FunctionPtr builtin_sym = lookup_global_symbol(v->bool_val ? "__true" : "__false")->try_as(); + std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(bool-const)"); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_null_keyword(V v, CodeBlob& code) { - const FunctionData* builtin_sym = lookup_global_symbol("__null")->as(); - return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(null-literal)"); +static std::vector process_null_keyword(V v, CodeBlob& code, TypePtr target_type) { + FunctionPtr builtin_sym = lookup_global_symbol("__null")->try_as(); + std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(null-literal)"); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_local_var(V v, CodeBlob& code) { +static std::vector process_local_var(V v, CodeBlob& code, TypePtr target_type) { if (v->marked_as_redef) { - return pre_compile_symbol(v->loc, v->var_ref, code, nullptr); + std::vector rvect = pre_compile_symbol(v->loc, v->var_ref, code, nullptr); + return transition_to_target_type(std::move(rvect), code, target_type, v); } tolk_assert(v->var_ref->ir_idx.empty()); v->var_ref->mutate()->assign_ir_idx(code.create_var(v->inferred_type, v->loc, v->var_ref->name)); - return v->var_ref->ir_idx; + std::vector rvect = v->var_ref->ir_idx; + return transition_to_target_type(std::move(rvect), code, target_type, v); } static std::vector process_local_vars_declaration(V, CodeBlob&) { @@ -718,42 +980,46 @@ static std::vector process_underscore(V v, CodeBlob& return code.create_tmp_var(v->inferred_type, v->loc, "(underscore)"); } -std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx) { +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { switch (v->type) { case ast_reference: - return pre_compile_symbol(v->loc, v->as()->sym, code, lval_ctx); + return process_reference(v->as(), code, target_type, lval_ctx); case ast_assign: - return process_assignment(v->as(), code); + return process_assignment(v->as(), code, target_type); case ast_set_assign: - return process_set_assign(v->as(), code); + return process_set_assign(v->as(), code, target_type); case ast_binary_operator: - return process_binary_operator(v->as(), code); + return process_binary_operator(v->as(), code, target_type); case ast_unary_operator: - return process_unary_operator(v->as(), code); + return process_unary_operator(v->as(), code, target_type); case ast_ternary_operator: - return process_ternary_operator(v->as(), code); + return process_ternary_operator(v->as(), code, target_type); case ast_cast_as_operator: - return pre_compile_expr(v->as()->get_expr(), code, lval_ctx); + return process_cast_as_operator(v->as(), code, target_type, lval_ctx); + case ast_not_null_operator: + return process_not_null_operator(v->as(), code, target_type, lval_ctx); + case ast_is_null_check: + return process_is_null_check(v->as(), code, target_type); case ast_dot_access: - return process_dot_access(v->as(), code, lval_ctx); + return process_dot_access(v->as(), code, target_type, lval_ctx); case ast_function_call: - return process_function_call(v->as(), code); + return process_function_call(v->as(), code, target_type); case ast_parenthesized_expression: - return pre_compile_expr(v->as()->get_expr(), code, lval_ctx); + return pre_compile_expr(v->as()->get_expr(), code, target_type, lval_ctx); case ast_tensor: - return process_tensor(v->as(), code, lval_ctx); + return process_tensor(v->as(), code, target_type, lval_ctx); case ast_typed_tuple: - return process_typed_tuple(v->as(), code, lval_ctx); + return process_typed_tuple(v->as(), code, target_type, lval_ctx); case ast_int_const: - return process_int_const(v->as(), code); + return process_int_const(v->as(), code, target_type); case ast_string_const: - return process_string_const(v->as(), code); + return process_string_const(v->as(), code, target_type); case ast_bool_const: - return process_bool_const(v->as(), code); + return process_bool_const(v->as(), code, target_type); case ast_null_keyword: - return process_null_keyword(v->as(), code); + return process_null_keyword(v->as(), code, target_type); case ast_local_var_lhs: - return process_local_var(v->as(), code); + return process_local_var(v->as(), code, target_type); case ast_local_vars_declaration: return process_local_vars_declaration(v->as(), code); case ast_underscore: @@ -784,14 +1050,14 @@ static void process_assert_statement(V v, CodeBlob& code) args[2]->mutate()->assign_inferred_type(TypeDataInt::create()); } - const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as(); + FunctionPtr builtin_sym = lookup_global_symbol("__throw_if_unless")->try_as(); std::vector args_vars = pre_compile_tensor(code, args); gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)"); } static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { if (auto v_ref = v_catch_var->try_as(); v_ref && v_ref->sym) { // not underscore - const LocalVarData* var_ref = v_ref->sym->as(); + LocalVarPtr var_ref = v_ref->sym->try_as(); tolk_assert(var_ref->ir_idx.empty()); var_ref->mutate()->assign_ir_idx(code.create_var(v_catch_var->inferred_type, v_catch_var->loc, var_ref->name)); } @@ -816,7 +1082,7 @@ static void process_try_catch_statement(V v, CodeBlob& } static void process_repeat_statement(V v, CodeBlob& code) { - std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); + std::vector tmp_vars = pre_compile_expr(v->get_cond(), code, nullptr); Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); process_any_statement(v->get_body(), code); @@ -824,7 +1090,7 @@ static void process_repeat_statement(V v, CodeBlob& code) } static void process_if_statement(V v, CodeBlob& code) { - std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); + std::vector tmp_vars = pre_compile_expr(v->get_cond(), code, nullptr); Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars)); code.push_set_cur(if_op.block0); process_any_statement(v->get_if_body(), code); @@ -869,19 +1135,21 @@ static void process_do_while_statement(V v, CodeBlob& co } until_cond->mutate()->assign_inferred_type(TypeDataInt::create()); if (auto v_bin = until_cond->try_as(); v_bin && !v_bin->fun_ref) { - v_bin->mutate()->assign_fun_ref(lookup_global_symbol("_" + static_cast(v_bin->operator_name) + "_")->as()); + v_bin->mutate()->assign_fun_ref(lookup_global_symbol("_" + static_cast(v_bin->operator_name) + "_")->try_as()); } else if (auto v_un = until_cond->try_as(); v_un && !v_un->fun_ref) { - v_un->mutate()->assign_fun_ref(lookup_global_symbol(static_cast(v_un->operator_name) + "_")->as()); + v_un->mutate()->assign_fun_ref(lookup_global_symbol(static_cast(v_un->operator_name) + "_")->try_as()); } - until_op.left = pre_compile_expr(until_cond, code); + until_op.left = pre_compile_expr(until_cond, code, nullptr); + tolk_assert(until_op.left.size() == 1); code.close_pop_cur(v->get_body()->loc_end); } static void process_while_statement(V v, CodeBlob& code) { Op& while_op = code.emplace_back(v->loc, Op::_While); code.push_set_cur(while_op.block0); - while_op.left = pre_compile_expr(v->get_cond(), code); + while_op.left = pre_compile_expr(v->get_cond(), code, nullptr); + tolk_assert(while_op.left.size() == 1); code.close_pop_cur(v->get_body()->loc); code.push_set_cur(while_op.block1); process_any_statement(v->get_body(), code); @@ -890,18 +1158,25 @@ static void process_while_statement(V v, CodeBlob& code) { static void process_throw_statement(V v, CodeBlob& code) { if (v->has_thrown_arg()) { - const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as(); + FunctionPtr builtin_sym = lookup_global_symbol("__throw_arg")->try_as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()}); gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)"); } else { - const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as(); + FunctionPtr builtin_sym = lookup_global_symbol("__throw")->try_as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_code()}); gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)"); } } static void process_return_statement(V v, CodeBlob& code) { - std::vector return_vars = v->has_return_value() ? pre_compile_expr(v->get_return_value(), code) : std::vector{}; + std::vector return_vars; + if (v->has_return_value()) { + TypePtr child_target_type = code.fun_ref->inferred_return_type; + if (code.fun_ref->does_return_self()) { + child_target_type = code.fun_ref->parameters[0].declared_type; + } + return_vars = pre_compile_expr(v->get_return_value(), code, child_target_type); + } if (code.fun_ref->does_return_self()) { return_vars = {}; } @@ -953,18 +1228,18 @@ void process_any_statement(AnyV v, CodeBlob& code) { case ast_empty_statement: return; default: - pre_compile_expr(reinterpret_cast(v), code); + pre_compile_expr(reinterpret_cast(v), code, nullptr); } } -static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, FunctionBodyCode* code_body) { +static void convert_function_body_to_CodeBlob(FunctionPtr fun_ref, FunctionBodyCode* code_body) { auto v_body = fun_ref->ast_root->as()->get_body()->as(); CodeBlob* blob = new CodeBlob{fun_ref->name, fun_ref->loc, fun_ref}; std::vector rvect_import; int total_arg_width = 0; for (int i = 0; i < fun_ref->get_num_params(); ++i) { - total_arg_width += fun_ref->parameters[i].declared_type->calc_width_on_stack(); + total_arg_width += fun_ref->parameters[i].declared_type->get_width_on_stack(); } rvect_import.reserve(total_arg_width); @@ -990,9 +1265,9 @@ static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, Funct tolk_assert(vars_modification_watcher.empty()); } -static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyAsm* asm_body) { +static void convert_asm_body_to_AsmOp(FunctionPtr fun_ref, FunctionBodyAsm* asm_body) { int cnt = fun_ref->get_num_params(); - int width = fun_ref->inferred_return_type->calc_width_on_stack(); + int width = fun_ref->inferred_return_type->get_width_on_stack(); std::vector asm_ops; for (AnyV v_child : fun_ref->ast_root->as()->get_body()->as()->get_asm_commands()) { std::string_view ops = v_child->as()->str_val; // \n\n... @@ -1023,15 +1298,15 @@ static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyA class UpdateArgRetOrderConsideringStackWidth final { public: - static bool should_visit_function(const FunctionData* fun_ref) { + static bool should_visit_function(FunctionPtr fun_ref) { return !fun_ref->is_generic_function() && (!fun_ref->ret_order.empty() || !fun_ref->arg_order.empty()); } - static void start_visiting_function(const FunctionData* fun_ref, V v_function) { + static void start_visiting_function(FunctionPtr fun_ref, V v_function) { int total_arg_mutate_width = 0; bool has_arg_width_not_1 = false; for (const LocalVarData& param : fun_ref->parameters) { - int arg_width = param.declared_type->calc_width_on_stack(); + int arg_width = param.declared_type->get_width_on_stack(); has_arg_width_not_1 |= arg_width != 1; total_arg_mutate_width += param.is_mutate_parameter() * arg_width; } @@ -1045,7 +1320,7 @@ public: cum_arg_width.reserve(1 + fun_ref->get_num_params()); cum_arg_width.push_back(0); for (const LocalVarData& param : fun_ref->parameters) { - cum_arg_width.push_back(total_arg_width += param.declared_type->calc_width_on_stack()); + cum_arg_width.push_back(total_arg_width += param.declared_type->get_width_on_stack()); } std::vector arg_order; for (int i = 0; i < fun_ref->get_num_params(); ++i) { @@ -1062,7 +1337,7 @@ public: // ret_order is a shuffled range 0...N // validate N: a function should return value and mutated arguments onto a stack if (!fun_ref->ret_order.empty()) { - size_t expected_width = fun_ref->inferred_return_type->calc_width_on_stack() + total_arg_mutate_width; + size_t expected_width = fun_ref->inferred_return_type->get_width_on_stack() + total_arg_mutate_width; if (expected_width != fun_ref->ret_order.size()) { v_function->get_body()->error("ret_order (after ->) expected to contain " + std::to_string(expected_width) + " numbers"); } @@ -1072,11 +1347,11 @@ public: class ConvertASTToLegacyOpVisitor final { public: - static bool should_visit_function(const FunctionData* fun_ref) { + static bool should_visit_function(FunctionPtr fun_ref) { return !fun_ref->is_generic_function(); } - static void start_visiting_function(const FunctionData* fun_ref, V) { + static void start_visiting_function(FunctionPtr fun_ref, V) { tolk_assert(fun_ref->is_type_inferring_done()); if (fun_ref->is_code_function()) { convert_function_body_to_CodeBlob(fun_ref, std::get(fun_ref->body)); diff --git a/tolk/pipe-calc-rvalue-lvalue.cpp b/tolk/pipe-calc-rvalue-lvalue.cpp index 041aec89..1f374bc8 100644 --- a/tolk/pipe-calc-rvalue-lvalue.cpp +++ b/tolk/pipe-calc-rvalue-lvalue.cpp @@ -177,6 +177,18 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v->get_expr()); // leave lvalue state unchanged, for `mutate (t.0 as int)` both `t.0 as int` and `t.0` are lvalue } + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v->get_expr()); // leave lvalue state unchanged, for `mutate x!` both `x!` and `x` are lvalue + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v->get_expr()); + restore_state(saved); + } + void visit(V v) override { tolk_assert(cur_state == MarkingState::LValue); mark_vertex_cur_or_rvalue(v); @@ -198,7 +210,7 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; @@ -207,7 +219,7 @@ void pipeline_calculate_rvalue_lvalue() { visit_ast_of_all_functions(); } -void pipeline_calculate_rvalue_lvalue(const FunctionData* fun_ref) { +void pipeline_calculate_rvalue_lvalue(FunctionPtr fun_ref) { CalculateRvalueLvalueVisitor visitor; if (visitor.should_visit_function(fun_ref)) { visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); diff --git a/tolk/pipe-check-pure-impure.cpp b/tolk/pipe-check-pure-impure.cpp index 2b2e1e67..366ff160 100644 --- a/tolk/pipe-check-pure-impure.cpp +++ b/tolk/pipe-check-pure-impure.cpp @@ -34,7 +34,7 @@ static void fire_error_impure_operation_inside_pure_function(AnyV v) { class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFunctionBody { static void fire_if_global_var(AnyExprV v) { if (auto v_ident = v->try_as()) { - if (v_ident->sym->try_as()) { + if (v_ident->sym->try_as()) { fire_error_impure_operation_inside_pure_function(v); } } @@ -81,7 +81,7 @@ class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFuncti } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function() && fun_ref->is_marked_as_pure(); } }; diff --git a/tolk/pipe-check-rvalue-lvalue.cpp b/tolk/pipe-check-rvalue-lvalue.cpp index a824cc5d..3ec47a16 100644 --- a/tolk/pipe-check-rvalue-lvalue.cpp +++ b/tolk/pipe-check-rvalue-lvalue.cpp @@ -37,7 +37,7 @@ static void fire_error_cannot_be_used_as_lvalue(AnyV v, const std::string& detai } GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_modifying_immutable_variable(AnyExprV v, const LocalVarData* var_ref) { +static void fire_error_modifying_immutable_variable(AnyExprV v, LocalVarPtr var_ref) { if (var_ref->param_idx == 0 && var_ref->name == "self") { v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); } else { @@ -47,7 +47,7 @@ static void fire_error_modifying_immutable_variable(AnyExprV v, const LocalVarDa // validate a function used as rvalue, like `var cb = f` // it's not a generic function (ensured earlier at type inferring) and has some more restrictions -static void validate_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) { +static void validate_function_used_as_noncall(AnyExprV v, FunctionPtr fun_ref) { if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { v->error("saving `" + fun_ref->name + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); } @@ -97,6 +97,18 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v->get_expr()); } + void visit(V v) override { + // if `x!` is lvalue, then `x` is also lvalue, so check that `x` is ok + parent::visit(v->get_expr()); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, v->is_negated ? "operator !=" : "operator =="); + } + parent::visit(v->get_expr()); + } + void visit(V v) override { if (v->is_lvalue) { fire_error_cannot_be_used_as_lvalue(v, "literal"); @@ -124,7 +136,7 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { // a reference to a method used as rvalue, like `var v = t.tupleAt` if (v->is_rvalue && v->is_target_fun_ref()) { - validate_function_used_as_noncall(v, std::get(v->target)); + validate_function_used_as_noncall(v, std::get(v->target)); } } @@ -158,17 +170,17 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { if (v->is_lvalue) { tolk_assert(v->sym); - if (const auto* var_ref = v->sym->try_as(); var_ref && var_ref->is_immutable()) { + if (LocalVarPtr var_ref = v->sym->try_as(); var_ref && var_ref->is_immutable()) { fire_error_modifying_immutable_variable(v, var_ref); - } else if (v->sym->try_as()) { + } else if (v->sym->try_as()) { v->error("modifying immutable constant"); - } else if (v->sym->try_as()) { + } else if (v->sym->try_as()) { v->error("function can't be used as lvalue"); } } // a reference to a function used as rvalue, like `var v = someFunction` - if (const FunctionData* fun_ref = v->sym->try_as(); fun_ref && v->is_rvalue) { + if (FunctionPtr fun_ref = v->sym->try_as(); fun_ref && v->is_rvalue) { validate_function_used_as_noncall(v, fun_ref); } } @@ -186,7 +198,7 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp index 05d543b3..9c27029b 100644 --- a/tolk/pipe-constant-folding.cpp +++ b/tolk/pipe-constant-folding.cpp @@ -88,8 +88,19 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { return v; } + AnyExprV replace(V v) override { + parent::replace(v); + + // `null == null` / `null != null` + if (v->get_expr()->type == ast_null_keyword) { + return create_bool_const(v->loc, !v->is_negated); + } + + return v; + } + public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; diff --git a/tolk/pipe-detect-unreachable.cpp b/tolk/pipe-detect-unreachable.cpp index 15824cf3..041e5581 100644 --- a/tolk/pipe-detect-unreachable.cpp +++ b/tolk/pipe-detect-unreachable.cpp @@ -111,11 +111,11 @@ class UnreachableStatementsDetectVisitor final { } public: - static bool should_visit_function(const FunctionData* fun_ref) { + static bool should_visit_function(FunctionPtr fun_ref) { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } - void start_visiting_function(const FunctionData* fun_ref, V v_function) { + void start_visiting_function(FunctionPtr fun_ref, V v_function) { bool control_flow_reaches_end = !always_returns(v_function->get_body()->as()); if (control_flow_reaches_end) { fun_ref->mutate()->assign_is_implicit_return(); @@ -128,7 +128,7 @@ void pipeline_detect_unreachable_statements() { visit_ast_of_all_functions(); } -void pipeline_detect_unreachable_statements(const FunctionData* fun_ref) { +void pipeline_detect_unreachable_statements(FunctionPtr fun_ref) { UnreachableStatementsDetectVisitor visitor; if (UnreachableStatementsDetectVisitor::should_visit_function(fun_ref)) { visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp index 29584cbf..2b7e5557 100644 --- a/tolk/pipe-find-unused-symbols.cpp +++ b/tolk/pipe-find-unused-symbols.cpp @@ -36,7 +36,7 @@ namespace tolk { static void mark_function_used_dfs(const std::unique_ptr& op); -static void mark_function_used(const FunctionData* fun_ref) { +static void mark_function_used(FunctionPtr fun_ref) { if (!fun_ref->is_code_function() || fun_ref->is_really_used()) { // already handled return; } @@ -45,7 +45,7 @@ static void mark_function_used(const FunctionData* fun_ref) { mark_function_used_dfs(std::get(fun_ref->body)->code->ops); } -static void mark_global_var_used(const GlobalVarData* glob_ref) { +static void mark_global_var_used(GlobalVarPtr glob_ref) { glob_ref->mutate()->assign_is_really_used(); } @@ -66,7 +66,7 @@ static void mark_function_used_dfs(const std::unique_ptr& op) { } void pipeline_find_unused_symbols() { - for (const FunctionData* fun_ref : G.all_functions) { + for (FunctionPtr fun_ref : G.all_functions) { if (fun_ref->is_method_id_not_empty()) { // get methods, main and other entrypoints, regular functions with @method_id mark_function_used(fun_ref); } diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 7ef6ba7b..57f481f0 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -39,7 +39,7 @@ void FunctionBodyAsm::set_code(std::vector&& code) { } -static void generate_output_func(const FunctionData* fun_ref) { +static void generate_output_func(FunctionPtr fun_ref) { tolk_assert(fun_ref->is_code_function()); if (G.is_verbosity(2)) { std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->inferred_return_type << std::endl; @@ -119,7 +119,7 @@ void pipeline_generate_fif_output_to_std_cout() { std::cout << "PROGRAM{\n"; bool has_main_procedure = false; - for (const FunctionData* fun_ref : G.all_functions) { + for (FunctionPtr fun_ref : G.all_functions) { if (!fun_ref->does_need_codegen()) { if (G.is_verbosity(2) && fun_ref->is_code_function()) { std::cerr << fun_ref->name << ": code not generated, function does not need codegen\n"; @@ -143,7 +143,7 @@ void pipeline_generate_fif_output_to_std_cout() { throw Fatal("the contract has no entrypoint; forgot `fun onInternalMessage(...)`?"); } - for (const GlobalVarData* var_ref : G.all_global_vars) { + for (GlobalVarPtr var_ref : G.all_global_vars) { if (!var_ref->is_really_used() && G.settings.remove_unused_functions) { if (G.is_verbosity(2)) { std::cerr << var_ref->name << ": variable not generated, it's unused\n"; @@ -154,7 +154,7 @@ void pipeline_generate_fif_output_to_std_cout() { std::cout << std::string(2, ' ') << "DECLGLOBVAR " << var_ref->name << "\n"; } - for (const FunctionData* fun_ref : G.all_functions) { + for (FunctionPtr fun_ref : G.all_functions) { if (!fun_ref->does_need_codegen()) { continue; } diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index abb060a2..2f4290d6 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -63,9 +63,9 @@ namespace tolk { -static void infer_and_save_return_type_of_function(const FunctionData* fun_ref); +static void infer_and_save_return_type_of_function(FunctionPtr fun_ref); -static TypePtr get_or_infer_return_type(const FunctionData* fun_ref) { +static TypePtr get_or_infer_return_type(FunctionPtr fun_ref) { if (!fun_ref->inferred_return_type) { infer_and_save_return_type_of_function(fun_ref); } @@ -83,12 +83,7 @@ static std::string to_string(AnyExprV v_with_type) { } GNU_ATTRIBUTE_NOINLINE -static std::string to_string(const LocalVarData& var_ref) { - return "`" + var_ref.declared_type->as_human_readable() + "`"; -} - -GNU_ATTRIBUTE_NOINLINE -static std::string to_string(const FunctionData* fun_ref) { +static std::string to_string(FunctionPtr fun_ref) { return "`" + fun_ref->as_human_readable() + "`"; } @@ -96,8 +91,8 @@ static std::string to_string(const FunctionData* fun_ref) { // asm functions generally can't handle it, they expect T to be a TVM primitive // (in FunC, `forall` type just couldn't be unified with non-primitives; in Tolk, generic T is expectedly inferred) GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_calling_asm_function_with_non1_stack_width_arg(SrcLocation loc, const FunctionData* fun_ref, const std::vector& substitutions, int arg_idx) { - throw ParseError(loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->calc_width_on_stack()) + " stack slots in TVM, not 1"); +static void fire_error_calling_asm_function_with_non1_stack_width_arg(SrcLocation loc, FunctionPtr fun_ref, const std::vector& substitutions, int arg_idx) { + throw ParseError(loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->get_width_on_stack()) + " stack slots in TVM, not 1"); } // fire an error on `var n = null` @@ -105,7 +100,7 @@ static void fire_error_calling_asm_function_with_non1_stack_width_arg(SrcLocatio // so, it's better to see an error on assignment, that later, on `n` usage and types mismatch // (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_assign_always_null_to_variable(SrcLocation loc, const LocalVarData* assigned_var, bool is_assigned_null_literal) { +static void fire_error_assign_always_null_to_variable(SrcLocation loc, LocalVarPtr assigned_var, bool is_assigned_null_literal) { std::string var_name = assigned_var->name; throw ParseError(loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); } @@ -134,34 +129,26 @@ static void fire_error_cannot_deduce_untyped_tuple_access(SrcLocation loc, int i // fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD static void fire_error_tuple_cannot_have_non1_stack_width_elem(SrcLocation loc, TypePtr inferred_type) { - throw ParseError(loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1"); + throw ParseError(loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->get_width_on_stack()) + " stack slots in TVM, not 1"); } -// check correctness of called arguments counts and their type matching -static void check_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { - int delta_self = lhs_of_dot_call ? 1 : 0; - int n_arguments = v->size() + delta_self; - int n_parameters = fun_ref->get_num_params(); - - // Tolk doesn't have optional parameters currently, so just compare counts - if (!n_parameters && lhs_of_dot_call) { - v->error("`" + fun_ref->name + "` has no parameters and can not be called as method"); - } - if (n_parameters < n_arguments) { - v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - if (n_arguments < n_parameters) { - v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - - if (lhs_of_dot_call) { - if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(lhs_of_dot_call->inferred_type)) { - lhs_of_dot_call->error("can not call method for " + to_string(fun_ref->parameters[0]) + " with object of type " + to_string(lhs_of_dot_call)); +// check type correctness of a passed argument when calling a function/method +static void check_function_argument(TypePtr param_type, bool is_mutate_param, AnyExprV ith_arg, bool is_obj_of_dot_call) { + // given `f(x: int)` and a call `f(expr)`, check that expr_type is assignable to `int` + if (!param_type->can_rhs_be_assigned(ith_arg->inferred_type)) { + if (is_obj_of_dot_call) { + ith_arg->error("can not call method for " + to_string(param_type) + " with object of type " + to_string(ith_arg)); + } else { + ith_arg->error("can not pass " + to_string(ith_arg) + " to " + to_string(param_type)); } } - for (int i = 0; i < v->size(); ++i) { - if (!fun_ref->parameters[i + delta_self].declared_type->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { - v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(fun_ref->parameters[i + delta_self])); + // given `f(x: mutate int?)` and a call `f(expr)`, check that `int?` is assignable to expr_type + // (for instance, can't call such a function with `f(mutate intVal)`, since f can potentially assign null to it) + if (is_mutate_param && !ith_arg->inferred_type->can_rhs_be_assigned(param_type)) { + if (is_obj_of_dot_call) { + ith_arg->error("can not call method for mutate " + to_string(param_type) + " with object of type " + to_string(ith_arg) + ", because mutation is not type compatible"); + } else { + ith_arg->error("can not pass " + to_string(ith_arg) + " to mutate " + to_string(param_type) + ", because mutation is not type compatible"); } } } @@ -189,6 +176,13 @@ class TypeInferringUnifyStrategy { return t2; } + if (t1 == TypeDataNullLiteral::create()) { + return TypeDataNullable::create(t2); + } + if (t2 == TypeDataNullLiteral::create()) { + return TypeDataNullable::create(t1); + } + const auto* tensor1 = t1->try_as(); const auto* tensor2 = t2->try_as(); if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { @@ -256,8 +250,8 @@ public: // handle __expect_type(expr, "type") call // this is used in compiler tests GNU_ATTRIBUTE_NOINLINE GNU_ATTRIBUTE_COLD -static void handle_possible_compiler_internal_call(const FunctionData* current_function, V v) { - const FunctionData* fun_ref = v->fun_maybe; +static void handle_possible_compiler_internal_call(FunctionPtr current_function, V v) { + FunctionPtr fun_ref = v->fun_maybe; tolk_assert(fun_ref && fun_ref->is_builtin_function()); static_cast(current_function); @@ -279,7 +273,7 @@ static void handle_possible_compiler_internal_call(const FunctionData* current_f * 2) easy to maintain a hint (see comments at the top of the file) */ class InferCheckTypesAndCallsAndFieldsVisitor final { - const FunctionData* current_function = nullptr; + FunctionPtr current_function = nullptr; TypeInferringUnifyStrategy return_unifier; GNU_ATTRIBUTE_ALWAYS_INLINE @@ -298,14 +292,14 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { dst->mutate()->assign_inferred_type(inferred_type); } - static void assign_inferred_type(const LocalVarData* local_var_or_param, TypePtr inferred_type) { + static void assign_inferred_type(LocalVarPtr local_var_or_param, TypePtr inferred_type) { #ifdef TOLK_DEBUG tolk_assert(inferred_type != nullptr && !inferred_type->has_unresolved_inside() && !inferred_type->has_genericT_inside()); #endif local_var_or_param->mutate()->assign_inferred_type(inferred_type); } - static void assign_inferred_type(const FunctionData* fun_ref, TypePtr inferred_return_type, TypePtr inferred_full_type) { + static void assign_inferred_type(FunctionPtr fun_ref, TypePtr inferred_return_type, TypePtr inferred_full_type) { #ifdef TOLK_DEBUG tolk_assert(inferred_return_type != nullptr && !inferred_return_type->has_unresolved_inside() && !inferred_return_type->has_genericT_inside()); #endif @@ -365,6 +359,10 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { return infer_ternary_operator(v->as(), hint); case ast_cast_as_operator: return infer_cast_as_operator(v->as()); + case ast_not_null_operator: + return infer_not_null_operator(v->as()); + case ast_is_null_check: + return infer_is_null_check(v->as()); case ast_parenthesized_expression: return infer_parenthesized(v->as(), hint); case ast_reference: @@ -388,14 +386,29 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } } + static TypePtr unwrap_nullable(TypePtr type) { + while (const TypeDataNullable* as_nullable = type->try_as()) { + type = as_nullable->inner; + } + return type; + } + static bool expect_integer(AnyExprV v_inferred) { return v_inferred->inferred_type == TypeDataInt::create(); } + static bool expect_integer(TypePtr inferred_type) { + return inferred_type == TypeDataInt::create(); + } + static bool expect_boolean(AnyExprV v_inferred) { return v_inferred->inferred_type == TypeDataBool::create(); } + static bool expect_boolean(TypePtr inferred_type) { + return inferred_type == TypeDataBool::create(); + } + static void infer_int_const(V v) { assign_inferred_type(v, TypeDataInt::create()); } @@ -467,7 +480,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } } else { if (rhs_type == TypeDataNullLiteral::create()) { - fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); + fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); } assign_inferred_type(lhs_var, rhs_type); assign_inferred_type(lhs_var->var_ref, rhs_type); @@ -520,7 +533,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // check `untypedTuple.0 = rhs_tensor` and other non-1 width elements if (auto lhs_dot = lhs->try_as()) { if (lhs_dot->is_target_indexed_access() && lhs_dot->get_obj()->inferred_type == TypeDataTuple::create()) { - if (rhs_type->calc_width_on_stack() != 1) { + if (rhs_type->get_width_on_stack() != 1) { fire_error_tuple_cannot_have_non1_stack_width_elem(err_loc->loc, rhs_type); } } @@ -563,8 +576,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { assign_inferred_type(v, lhs); if (!builtin_func.empty()) { - const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); - tolk_assert(builtin_sym); + FunctionPtr builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->try_as(); v->mutate()->assign_fun_ref(builtin_sym); } } @@ -598,8 +610,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } if (!builtin_func.empty()) { - const FunctionData* builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->as(); - tolk_assert(builtin_sym); + FunctionPtr builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->try_as(); v->mutate()->assign_fun_ref(builtin_sym); } } @@ -617,8 +628,8 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // == != can compare both integers and booleans, (int == bool) is NOT allowed case tok_eq: case tok_neq: { - bool both_int = expect_integer(lhs) && expect_integer(rhs); - bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + bool both_int = expect_integer(unwrap_nullable(lhs->inferred_type)) && expect_integer(unwrap_nullable(rhs->inferred_type)); + bool both_bool = expect_boolean(unwrap_nullable(lhs->inferred_type)) && expect_boolean(unwrap_nullable(rhs->inferred_type)); if (!both_int && !both_bool) { if (lhs->inferred_type == rhs->inferred_type) { // compare slice with slice v->error("type " + to_string(lhs) + " can not be compared with `== !=`"); @@ -674,8 +685,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } if (!builtin_func.empty()) { - const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); - tolk_assert(builtin_sym); + FunctionPtr builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->try_as(); v->mutate()->assign_fun_ref(builtin_sym); } } @@ -706,22 +716,38 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { assign_inferred_type(v, v->cast_to_type); } + void infer_is_null_check(V v) { + infer_any_expr(v->get_expr()); + assign_inferred_type(v, TypeDataBool::create()); + } + + void infer_not_null_operator(V v) { + infer_any_expr(v->get_expr()); + if (const auto* as_nullable = v->get_expr()->inferred_type->try_as()) { + // operator `!` used for `T?`, leave `T` + assign_inferred_type(v, as_nullable->inner); + } else { + // operator `!` used for non-nullable, probably a warning should be printed + assign_inferred_type(v, v->get_expr()); + } + } + void infer_parenthesized(V v, TypePtr hint) { infer_any_expr(v->get_expr(), hint); assign_inferred_type(v, v->get_expr()); } static void infer_reference(V v) { - if (const auto* var_ref = v->sym->try_as()) { + if (LocalVarPtr var_ref = v->sym->try_as()) { assign_inferred_type(v, var_ref->declared_type); - } else if (const auto* const_ref = v->sym->try_as()) { + } else if (GlobalConstPtr const_ref = v->sym->try_as()) { assign_inferred_type(v, const_ref->is_int_const() ? TypeDataInt::create() : TypeDataSlice::create()); - } else if (const auto* glob_ref = v->sym->try_as()) { + } else if (GlobalVarPtr glob_ref = v->sym->try_as()) { assign_inferred_type(v, glob_ref->declared_type); - } else if (const auto* fun_ref = v->sym->try_as()) { + } else if (FunctionPtr fun_ref = v->sym->try_as()) { // it's `globalF` / `globalF` - references to functions used as non-call V v_instantiationTs = v->get_instantiationTs(); @@ -758,7 +784,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // given `genericF` / `t.tupleFirst` (the user manually specified instantiation Ts), // validate and collect them // returns: [int, slice] / [cell] - static std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, const FunctionData* fun_ref, V instantiationT_list) { + static std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, FunctionPtr fun_ref, V instantiationT_list) { if (fun_ref->genericTs->size() != instantiationT_list->get_items().size()) { throw ParseError(loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); } @@ -778,11 +804,11 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // example: was `t.tuplePush(2)`, read , instantiate `tuplePush` (will later fail type check) // example: was `var cb = t.tupleFirst;` (used as reference, as non-call), instantiate `tupleFirst` // returns fun_ref to instantiated function - static const FunctionData* check_and_instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, std::vector&& substitutionTs) { + static FunctionPtr check_and_instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, std::vector&& substitutionTs) { // T for asm function must be a TVM primitive (width 1), otherwise, asm would act incorrectly if (fun_ref->is_asm_function() || fun_ref->is_builtin_function()) { for (int i = 0; i < static_cast(substitutionTs.size()); ++i) { - if (substitutionTs[i]->calc_width_on_stack() != 1) { + if (substitutionTs[i]->get_width_on_stack() != 1) { fire_error_calling_asm_function_with_non1_stack_width_arg(loc, fun_ref, substitutionTs, i); } } @@ -836,7 +862,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { if (hint == nullptr) { fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at); } - if (hint->calc_width_on_stack() != 1) { + if (hint->get_width_on_stack() != 1) { fire_error_tuple_cannot_have_non1_stack_width_elem(v->loc, hint); } item_type = hint; @@ -850,14 +876,14 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` const Symbol* sym = lookup_global_symbol(field_name); - const FunctionData* fun_ref = sym ? sym->try_as() : nullptr; + FunctionPtr fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref) { v_ident->error("non-existing field `" + static_cast(field_name) + "` of type " + to_string(obj_type)); } // `t.tupleSize` is ok, `cs.tupleSize` not if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(obj_type)) { - v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0]) + " with object of type " + to_string(obj_type)); + v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0].declared_type) + " with object of type " + to_string(obj_type)); } if (fun_ref->is_generic_function() && !v_instantiationTs) { @@ -886,12 +912,12 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` int delta_self = 0; AnyExprV dot_obj = nullptr; - const FunctionData* fun_ref = nullptr; + FunctionPtr fun_ref = nullptr; V v_instantiationTs = nullptr; if (auto v_ref = callee->try_as()) { // `globalF()` / `globalF()` / `local_var()` / `SOME_CONST()` - fun_ref = v_ref->sym->try_as(); // not null for `globalF` + fun_ref = v_ref->sym->try_as(); // not null for `globalF` v_instantiationTs = v_ref->get_instantiationTs(); // present for `globalF()` } else if (auto v_dot = callee->try_as()) { @@ -910,7 +936,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } else { // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` const Symbol* sym = lookup_global_symbol(field_name); - fun_ref = sym ? sym->try_as() : nullptr; + fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref) { v_dot->get_identifier()->error("non-existing method `" + static_cast(field_name) + "` of type " + to_string(dot_obj)); } @@ -921,30 +947,26 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // fun_ref remains nullptr } - // infer argument types, looking at fun_ref's parameters as hints - for (int i = 0; i < v->get_num_args(); ++i) { - TypePtr param_type = fun_ref && i < fun_ref->get_num_params() - delta_self ? fun_ref->parameters[delta_self + i].declared_type : nullptr; - auto arg_i = v->get_arg(i); - infer_any_expr(arg_i->get_expr(), param_type && !param_type->has_genericT_inside() ? param_type : nullptr); - assign_inferred_type(arg_i, arg_i->get_expr()); - } - // handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` / `tensorVar.0()` if (!fun_ref) { // treat callee like a usual expression, which must have "callable" inferred type infer_any_expr(callee); const TypeDataFunCallable* f_callable = callee->inferred_type->try_as(); if (!f_callable) { // `5()` / `SOME_CONST()` / `null()` - v->error("calling a non-function"); + v->error("calling a non-function " + to_string(callee->inferred_type)); } // check arguments count and their types if (v->get_num_args() != static_cast(f_callable->params_types.size())) { v->error("expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); } for (int i = 0; i < v->get_num_args(); ++i) { - if (!f_callable->params_types[i]->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { - v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(f_callable->params_types[i])); + auto arg_i = v->get_arg(i)->get_expr(); + TypePtr param_type = f_callable->params_types[i]; + infer_any_expr(arg_i, param_type); + if (!param_type->can_rhs_be_assigned(arg_i->inferred_type)) { + arg_i->error("can not pass " + to_string(arg_i) + " to " + to_string(param_type)); } + assign_inferred_type(v->get_arg(i), arg_i); } v->mutate()->assign_fun_ref(nullptr); // no fun_ref to a global function assign_inferred_type(v, f_callable->return_type); @@ -952,30 +974,75 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) + // we're going to iterate over passed arguments, check type compatibility, and (if generic) infer substitutionTs + // at first, check arguments count (Tolk doesn't have optional parameters, so just compare counts) + int n_arguments = v->get_num_args() + delta_self; + int n_parameters = fun_ref->get_num_params(); + if (!n_parameters && dot_obj) { + v->error("`" + fun_ref->name + "` has no parameters and can not be called as method"); + } + if (n_parameters < n_arguments) { + v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + if (n_arguments < n_parameters) { + v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + + // now, for every passed argument, we need to infer its type, and check it against parameter type + // for regular functions, it's obvious + // but for generic functions, we need to infer type arguments (substitutionTs) on the fly + // (unless Ts are specified by a user like `f(args)` / `t.tupleAt()`, take them) + GenericSubstitutionsDeduceForCall* deducingTs = fun_ref->is_generic_function() ? new GenericSubstitutionsDeduceForCall(fun_ref) : nullptr; + if (deducingTs && v_instantiationTs) { + deducingTs->provide_manually_specified(collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs)); + } + + // loop over every argument, for `obj.method()` obj is the first one + // if genericT deducing has a conflict, ParseError is thrown + // note, that deducing Ts one by one is important to manage control flow (mutate params work like assignments) + // a corner case, e.g. `f(v1:T?, v2:T?)` and `f(null,2)` will fail on first argument, won't try the second one + if (dot_obj) { + const LocalVarData& param_0 = fun_ref->parameters[0]; + TypePtr param_type = param_0.declared_type; + if (param_type->has_genericT_inside()) { + param_type = deducingTs->auto_deduce_from_argument(dot_obj->loc, param_type, dot_obj->inferred_type); + } + check_function_argument(param_type, param_0.is_mutate_parameter(), dot_obj, true); + } + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& param_i = fun_ref->parameters[delta_self + i]; + AnyExprV arg_i = v->get_arg(i)->get_expr(); + TypePtr param_type = param_i.declared_type; + if (param_type->has_genericT_inside() && deducingTs->is_manually_specified()) { // `f(a)` + param_type = deducingTs->replace_by_manually_specified(param_type); + } + if (param_type->has_genericT_inside()) { // `f(a)` where f is generic: use `a` to infer param type + infer_any_expr(arg_i); // then arg_i is inferred without any hint + param_type = deducingTs->auto_deduce_from_argument(arg_i->loc, param_type, arg_i->inferred_type); + } else { + infer_any_expr(arg_i, param_type); // param_type is hint, helps infer arg_i + } + assign_inferred_type(v->get_arg(i), arg_i); // arg itself is an expression + check_function_argument(param_type, param_i.is_mutate_parameter(), arg_i, false); + } + // if it's a generic function `f`, we need to instantiate it, like `f` // same for generic methods `t.tupleAt`, need to achieve `t.tupleAt` - if (fun_ref->is_generic_function() && v_instantiationTs) { - // if Ts are specified by a user like `f(args)` / `t.tupleAt()`, take them - std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); - fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); - - } else if (fun_ref->is_generic_function()) { - // if `f` called like `f(args)`, deduce T from arg types - std::vector arg_types; - arg_types.reserve(delta_self + v->get_num_args()); - if (dot_obj) { - arg_types.push_back(dot_obj->inferred_type); + if (fun_ref->is_generic_function()) { + // if `f(args)` was called, Ts were inferred; check that all of them are known + int idx = deducingTs->get_first_not_deduced_idx(); + if (idx != -1 && hint && fun_ref->declared_return_type->has_genericT_inside()) { + // example: `t.tupleFirst()`, T doesn't depend on arguments, but is determined by return type + // if used like `var x: int = t.tupleFirst()` / `t.tupleFirst() as int` / etc., use hint + deducingTs->auto_deduce_from_argument(v->loc, fun_ref->declared_return_type, hint); + idx = deducingTs->get_first_not_deduced_idx(); } - for (int i = 0; i < v->get_num_args(); ++i) { - arg_types.push_back(v->get_arg(i)->inferred_type); + if (idx != -1) { + v->error("can not deduce " + fun_ref->genericTs->get_nameT(idx)); } - - td::Result> deduced = deduce_substitutionTs_on_generic_func_call(fun_ref, std::move(arg_types), hint); - if (deduced.is_error()) { - v->error(deduced.error().message().str() + " for generic function " + to_string(fun_ref)); - } - fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, deduced.move_as_ok()); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, deducingTs->flush()); + delete deducingTs; } else if (UNLIKELY(v_instantiationTs != nullptr)) { // non-generic function/method called with type arguments, like `c.cellHash()` / `beginCell()` @@ -988,8 +1055,6 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { v->get_callee()->as()->mutate()->assign_target(fun_ref); v->get_callee()->as()->mutate()->assign_inferred_type(fun_ref->inferred_full_type); } - // check arguments count and their types - check_function_arguments(fun_ref, v->get_arg_list(), dot_obj); // get return type either from user-specified declaration or infer here on demand traversing its body get_or_infer_return_type(fun_ref); TypePtr inferred_type = dot_obj && fun_ref->does_return_self() ? dot_obj->inferred_type : fun_ref->inferred_return_type; @@ -1020,7 +1085,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { for (int i = 0; i < v->size(); ++i) { AnyExprV item = v->get_item(i); infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); - if (item->inferred_type->calc_width_on_stack() != 1) { + if (item->inferred_type->get_width_on_stack() != 1) { fire_error_tuple_cannot_have_non1_stack_width_elem(v->get_item(i)->loc, item->inferred_type); } types_list.emplace_back(item->inferred_type); @@ -1134,7 +1199,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { v->get_thrown_code()->error("excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); } infer_any_expr(v->get_thrown_arg()); - if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->calc_width_on_stack() != 1) { + if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->get_width_on_stack() != 1) { v->get_thrown_arg()->error("can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); } } @@ -1153,7 +1218,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { static void process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type) { if (auto v_ref = catch_var->try_as(); v_ref && v_ref->sym) { // not underscore - assign_inferred_type(v_ref->sym->as(), catch_var_type); + assign_inferred_type(v_ref->sym->try_as(), catch_var_type); } assign_inferred_type(catch_var, catch_var_type); } @@ -1163,7 +1228,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // `catch` has exactly 2 variables: excNo and arg (when missing, they are implicit underscores) // `arg` is a curious thing, it can be any TVM primitive, so assign unknown to it - // hence, using `fInt(arg)` (int from parameter is a hint) or `arg as slice` works well + // hence, using `fInt(arg)` (int from parameter is a target type) or `arg as slice` works well // it's not truly correct, because `arg as (int,int)` also compiles, but can never happen, but let it be user responsibility tolk_assert(v->get_catch_expr()->size() == 2); std::vector types_list = {TypeDataInt::create(), TypeDataUnknown::create()}; @@ -1175,7 +1240,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } public: - static void assign_fun_full_type(const FunctionData* fun_ref, TypePtr inferred_return_type) { + static void assign_fun_full_type(FunctionPtr fun_ref, TypePtr inferred_return_type) { // calculate function full type `fun(params) -> ret_type` std::vector params_types; params_types.reserve(fun_ref->get_num_params()); @@ -1185,7 +1250,7 @@ public: assign_inferred_type(fun_ref, inferred_return_type, TypeDataFunCallable::create(std::move(params_types), inferred_return_type)); } - void start_visiting_function(const FunctionData* fun_ref, V v_function) { + void start_visiting_function(FunctionPtr fun_ref, V v_function) { if (fun_ref->is_code_function()) { current_function = fun_ref; process_any_statement(v_function->get_body()); @@ -1212,12 +1277,12 @@ public: class LaunchInferTypesAndMethodsOnce final { public: - static bool should_visit_function(const FunctionData* fun_ref) { + static bool should_visit_function(FunctionPtr fun_ref) { // since inferring can be requested on demand, prevent second execution from a regular pipeline launcher return !fun_ref->is_type_inferring_done() && !fun_ref->is_generic_function(); } - static void start_visiting_function(const FunctionData* fun_ref, V v_function) { + static void start_visiting_function(FunctionPtr fun_ref, V v_function) { InferCheckTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, v_function); } @@ -1227,8 +1292,8 @@ public: // example: `fun f() { return g(); } fun g() { ... }` // when analyzing `f()`, we need to infer what fun_ref=g returns // (if `g` is generic, it was already instantiated, so fun_ref=g is here) -static void infer_and_save_return_type_of_function(const FunctionData* fun_ref) { - static std::vector called_stack; +static void infer_and_save_return_type_of_function(FunctionPtr fun_ref) { + static std::vector called_stack; tolk_assert(!fun_ref->is_generic_function() && !fun_ref->is_type_inferring_done()); // if `g` has return type declared, like `fun g(): int { ... }`, don't traverse its body @@ -1255,7 +1320,7 @@ void pipeline_infer_types_and_calls_and_fields() { visit_ast_of_all_functions(); } -void pipeline_infer_types_and_calls_and_fields(const FunctionData* fun_ref) { +void pipeline_infer_types_and_calls_and_fields(FunctionPtr fun_ref) { InferCheckTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); } diff --git a/tolk/pipe-optimize-boolean-expr.cpp b/tolk/pipe-optimize-boolean-expr.cpp index a2e67047..c4c5d1dc 100644 --- a/tolk/pipe-optimize-boolean-expr.cpp +++ b/tolk/pipe-optimize-boolean-expr.cpp @@ -53,7 +53,7 @@ struct OptimizerBooleanExpressionsReplacer final : ASTReplacerInFunctionBody { auto v_not = createV(loc, "!", tok_logical_not, rhs); v_not->assign_inferred_type(TypeDataBool::create()); v_not->assign_rvalue_true(); - v_not->assign_fun_ref(lookup_global_symbol("!b_")->as()); + v_not->assign_fun_ref(lookup_global_symbol("!b_")->try_as()); return v_not; } @@ -75,7 +75,7 @@ protected: auto v_neq = createV(v->loc, "!=", tok_neq, cond_not_not, v_zero); v_neq->mutate()->assign_rvalue_true(); v_neq->mutate()->assign_inferred_type(TypeDataBool::create()); - v_neq->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); + v_neq->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->try_as()); return v_neq; } } @@ -117,12 +117,17 @@ protected: } v = createV(v->loc, !v->is_ifnot, v_cond_unary->get_rhs(), v->get_if_body(), v->get_else_body()); } + // `if (x != null)` -> ifnot(x == null) + if (auto v_cond_isnull = v->get_cond()->try_as(); v_cond_isnull && v_cond_isnull->is_negated) { + v_cond_isnull->mutate()->assign_is_negated(!v_cond_isnull->is_negated); + v = createV(v->loc, !v->is_ifnot, v_cond_isnull, v->get_if_body(), v->get_else_body()); + } return v; } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; diff --git a/tolk/pipe-refine-lvalue-for-mutate.cpp b/tolk/pipe-refine-lvalue-for-mutate.cpp index 540d7413..a8b4f1ae 100644 --- a/tolk/pipe-refine-lvalue-for-mutate.cpp +++ b/tolk/pipe-refine-lvalue-for-mutate.cpp @@ -34,7 +34,7 @@ namespace tolk { GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_invalid_mutate_arg_passed(AnyExprV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { +static void fire_error_invalid_mutate_arg_passed(AnyExprV v, FunctionPtr fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { std::string arg_str(arg_expr->type == ast_reference ? arg_expr->as()->get_name() : "obj"); // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` @@ -60,7 +60,7 @@ static void fire_error_invalid_mutate_arg_passed(AnyExprV v, const FunctionData* class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` - const FunctionData* fun_ref = v->fun_maybe; + FunctionPtr fun_ref = v->fun_maybe; if (!fun_ref) { parent::visit(v); for (int i = 0; i < v->get_num_args(); ++i) { @@ -86,6 +86,8 @@ class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBod leftmost_obj = as_par->get_expr(); } else if (auto as_cast = leftmost_obj->try_as()) { leftmost_obj = as_cast->get_expr(); + } else if (auto as_nn = leftmost_obj->try_as()) { + leftmost_obj = as_nn->get_expr(); } else { break; } @@ -114,7 +116,7 @@ class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBod } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index 2dae0d23..45246d6b 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -176,8 +176,8 @@ static void register_function(V v) { genericTs = construct_genericTs(v->genericsT_list); } if (v->is_builtin_function()) { - const Symbol* builtin_func = lookup_global_symbol(func_name); - const FunctionData* fun_ref = builtin_func ? builtin_func->as() : nullptr; + const Symbol* sym = lookup_global_symbol(func_name); + FunctionPtr fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref || !fun_ref->is_builtin_function()) { v->error("`builtin` used for non-builtin function"); } @@ -202,7 +202,7 @@ static void register_function(V v) { f_sym->method_id = static_cast(v->method_id->to_long()); } else if (v->flags & FunctionData::flagGetMethod) { f_sym->method_id = calculate_method_id_by_func_name(func_name); - for (const FunctionData* other : G.all_get_methods) { + for (FunctionPtr other : G.all_get_methods) { if (other->method_id == f_sym->method_id) { v->error(PSTRING() << "GET methods hash collision: `" << other->name << "` and `" << f_sym->name << "` produce the same hash. Consider renaming one of these functions."); } diff --git a/tolk/pipe-resolve-identifiers.cpp b/tolk/pipe-resolve-identifiers.cpp index 03b23c3c..95229d20 100644 --- a/tolk/pipe-resolve-identifiers.cpp +++ b/tolk/pipe-resolve-identifiers.cpp @@ -119,7 +119,7 @@ struct NameAndScopeResolver { return G.symtable.lookup(name); } - void add_local_var(const LocalVarData* v_sym) { + void add_local_var(LocalVarPtr v_sym) { if (UNLIKELY(scopes.empty())) { throw Fatal("unexpected scope_level = 0"); } @@ -168,9 +168,9 @@ static TypePtr finalize_type_data(TypePtr type_data, const GenericsDeclaration* class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { // more correctly this field shouldn't be static, but currently there is no need to make it a part of state static NameAndScopeResolver current_scope; - static const FunctionData* current_function; + static FunctionPtr current_function; - static const LocalVarData* create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) { + static LocalVarPtr create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) { LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, declared_type, immutable * LocalVarData::flagImmutable, -1); current_scope.add_local_var(v_sym); return v_sym; @@ -178,7 +178,7 @@ class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { static void process_catch_variable(AnyExprV catch_var) { if (auto v_ref = catch_var->try_as()) { - const LocalVarData* var_ref = create_local_var_sym(v_ref->get_name(), catch_var->loc, nullptr, true); + LocalVarPtr var_ref = create_local_var_sym(v_ref->get_name(), catch_var->loc, nullptr, true); v_ref->mutate()->assign_sym(var_ref); } } @@ -190,14 +190,14 @@ protected: if (sym == nullptr) { v->error("`redef` for unknown variable"); } - const LocalVarData* var_ref = sym->try_as(); + LocalVarPtr var_ref = sym->try_as(); if (!var_ref) { v->error("`redef` for unknown variable"); } v->mutate()->assign_var_ref(var_ref); } else { TypePtr declared_type = finalize_type_data(v->declared_type, current_function->genericTs); - const LocalVarData* var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable); + LocalVarPtr var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable); v->mutate()->assign_resolved_type(declared_type); v->mutate()->assign_var_ref(var_ref); } @@ -216,7 +216,7 @@ protected: v->mutate()->assign_sym(sym); // for global functions, global vars and constants, `import` must exist - if (!sym->try_as()) { + if (!sym->try_as()) { check_import_exists_when_using_sym(v, sym); } @@ -276,14 +276,14 @@ protected: } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { // this pipe is done just after parsing // visit both asm and code functions, resolve identifiers in parameter/return types everywhere // for generic functions, unresolved "T" will be replaced by TypeDataGenericT return true; } - void start_visiting_function(const FunctionData* fun_ref, V v) override { + void start_visiting_function(FunctionPtr fun_ref, V v) override { current_function = fun_ref; for (int i = 0; i < v->get_num_params(); ++i) { @@ -313,7 +313,7 @@ public: }; NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; -const FunctionData* AssignSymInsideFunctionVisitor::current_function = nullptr; +FunctionPtr AssignSymInsideFunctionVisitor::current_function = nullptr; void pipeline_resolve_identifiers_and_assign_symbols() { AssignSymInsideFunctionVisitor visitor; @@ -337,7 +337,7 @@ void pipeline_resolve_identifiers_and_assign_symbols() { } } -void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData* fun_ref) { +void pipeline_resolve_identifiers_and_assign_symbols(FunctionPtr fun_ref) { AssignSymInsideFunctionVisitor visitor; if (visitor.should_visit_function(fun_ref)) { visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); diff --git a/tolk/pipeline.h b/tolk/pipeline.h index 6aec2b5e..ab65ef80 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -49,10 +49,10 @@ void pipeline_generate_fif_output_to_std_cout(); // these pipes also can be called per-function individually // they are called for instantiated generics functions, when `f` is deeply cloned as `f` -void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData*); -void pipeline_calculate_rvalue_lvalue(const FunctionData*); -void pipeline_detect_unreachable_statements(const FunctionData*); -void pipeline_infer_types_and_calls_and_fields(const FunctionData*); +void pipeline_resolve_identifiers_and_assign_symbols(FunctionPtr); +void pipeline_calculate_rvalue_lvalue(FunctionPtr); +void pipeline_detect_unreachable_statements(FunctionPtr); +void pipeline_infer_types_and_calls_and_fields(FunctionPtr); } // namespace tolk diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index c56dc6ed..48b0b89d 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -120,7 +120,7 @@ static void fire_error_redefinition_of_symbol(SrcLocation loc, const Symbol* pre throw ParseError(loc, "redefinition of built-in symbol"); } -void GlobalSymbolTable::add_function(const FunctionData* f_sym) { +void GlobalSymbolTable::add_function(FunctionPtr f_sym) { auto key = key_hash(f_sym->name); auto [it, inserted] = entries.emplace(key, f_sym); if (!inserted) { @@ -128,7 +128,7 @@ void GlobalSymbolTable::add_function(const FunctionData* f_sym) { } } -void GlobalSymbolTable::add_global_var(const GlobalVarData* g_sym) { +void GlobalSymbolTable::add_global_var(GlobalVarPtr g_sym) { auto key = key_hash(g_sym->name); auto [it, inserted] = entries.emplace(key, g_sym); if (!inserted) { @@ -136,7 +136,7 @@ void GlobalSymbolTable::add_global_var(const GlobalVarData* g_sym) { } } -void GlobalSymbolTable::add_global_const(const GlobalConstData* c_sym) { +void GlobalSymbolTable::add_global_const(GlobalConstPtr c_sym) { auto key = key_hash(c_sym->name); auto [it, inserted] = entries.emplace(key, c_sym); if (!inserted) { diff --git a/tolk/symtable.h b/tolk/symtable.h index 27753ceb..9419afce 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -37,17 +37,12 @@ struct Symbol { virtual ~Symbol() = default; - template - const T* as() const { + template + ConstTPtr try_as() const { #ifdef TOLK_DEBUG - assert(dynamic_cast(this) != nullptr); + assert(this != nullptr); #endif - return dynamic_cast(this); - } - - template - const T* try_as() const { - return dynamic_cast(this); + return dynamic_cast(this); } }; @@ -229,9 +224,9 @@ class GlobalSymbolTable { } public: - void add_function(const FunctionData* f_sym); - void add_global_var(const GlobalVarData* g_sym); - void add_global_const(const GlobalConstData* c_sym); + void add_function(FunctionPtr f_sym); + void add_global_var(GlobalVarPtr g_sym); + void add_global_const(GlobalConstPtr c_sym); const Symbol* lookup(std::string_view name) const { const auto it = entries.find(key_hash(name)); diff --git a/tolk/tolk.h b/tolk/tolk.h index 4086d7f7..d218d510 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -45,7 +45,7 @@ typedef int const_idx_t; struct TmpVar { var_idx_t ir_idx; // every var in IR represents 1 stack slot - TypePtr v_type; // calc_width_on_stack() is 1 + TypePtr v_type; // get_width_on_stack() is 1 std::string name; // "x" for vars originated from user sources; "x.0" for tensor components; empty for implicitly created tmp vars SrcLocation loc; // location of var declaration in sources or where a tmp var was originated #ifdef TOLK_DEBUG @@ -283,8 +283,8 @@ struct Op { enum { _Disabled = 1, _NoReturn = 4, _Impure = 24 }; int flags; std::unique_ptr next; - const FunctionData* f_sym = nullptr; - const GlobalVarData* g_sym = nullptr; + FunctionPtr f_sym = nullptr; + GlobalVarPtr g_sym = nullptr; SrcLocation where; VarDescrList var_info; std::vector args; @@ -313,19 +313,19 @@ struct Op { : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(std::move(_left)), right(std::move(_right)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, - const FunctionData* _fun) + FunctionPtr _fun) : cl(_cl), flags(0), f_sym(_fun), where(_where), left(_left), right(_right) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, - const FunctionData* _fun) + FunctionPtr _fun) : cl(_cl), flags(0), f_sym(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, - const GlobalVarData* _gvar) + GlobalVarPtr _gvar) : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(_left), right(_right) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, - const GlobalVarData* _gvar) + GlobalVarPtr _gvar) : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(std::move(_left)), right(std::move(_right)) { } @@ -1083,7 +1083,7 @@ struct FunctionBodyAsm { struct CodeBlob { int var_cnt, in_var_cnt; - const FunctionData* fun_ref; + FunctionPtr fun_ref; std::string name; SrcLocation loc; std::vector vars; @@ -1094,7 +1094,7 @@ struct CodeBlob { #endif std::stack*> cur_ops_stack; bool require_callxargs = false; - CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref) + CodeBlob(std::string name, SrcLocation loc, FunctionPtr fun_ref) : var_cnt(0), in_var_cnt(0), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) { } template diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp index c7122e10..6cd353d5 100644 --- a/tolk/type-system.cpp +++ b/tolk/type-system.cpp @@ -108,6 +108,19 @@ void type_system_init() { // and creates an object only if it isn't found in a global hashtable // +TypePtr TypeDataNullable::create(TypePtr inner) { + TypeDataTypeIdCalculation hash(1774084920039440885ULL); + hash.feed_child(inner); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + // most types (int?, slice?, etc.), when nullable, still occupy 1 stack slot (holding TVM NULL at runtime) + // but for example for `(int, int)` we need an extra stack slot "null flag" + int width_on_stack = inner->can_hold_tvm_null_instead() ? 1 : inner->get_width_on_stack() + 1; + return hash.register_unique(new TypeDataNullable(hash.type_id(), hash.children_flags(), width_on_stack, inner)); +} + TypePtr TypeDataFunCallable::create(std::vector&& params_types, TypePtr return_type) { TypeDataTypeIdCalculation hash(3184039965511020991ULL); for (TypePtr param : params_types) { @@ -143,7 +156,11 @@ TypePtr TypeDataTensor::create(std::vector&& items) { if (TypePtr existing = hash.get_existing()) { return existing; } - return hash.register_unique(new TypeDataTensor(hash.type_id(), hash.children_flags(), std::move(items))); + int width_on_stack = 0; + for (TypePtr item : items) { + width_on_stack += item->get_width_on_stack(); + } + return hash.register_unique(new TypeDataTensor(hash.type_id(), hash.children_flags(), width_on_stack, std::move(items))); } TypePtr TypeDataTypedTuple::create(std::vector&& items) { @@ -178,6 +195,12 @@ TypePtr TypeDataUnresolved::create(std::string&& text, SrcLocation loc) { // only non-trivial implementations are here; trivial are defined in .h file // +std::string TypeDataNullable::as_human_readable() const { + std::string nested = inner->as_human_readable(); + bool embrace = inner->try_as(); + return embrace ? "(" + nested + ")?" : nested + "?"; +} + std::string TypeDataFunCallable::as_human_readable() const { std::string result = "("; for (TypePtr param : params_types) { @@ -223,6 +246,11 @@ std::string TypeDataTypedTuple::as_human_readable() const { // only non-trivial implementations are here; by default (no children), `callback(this)` is executed // +void TypeDataNullable::traverse(const TraverserCallbackT& callback) const { + callback(this); + inner->traverse(callback); +} + void TypeDataFunCallable::traverse(const TraverserCallbackT& callback) const { callback(this); for (TypePtr param : params_types) { @@ -254,6 +282,10 @@ void TypeDataTypedTuple::traverse(const TraverserCallbackT& callback) const { // only non-trivial implementations are here; by default (no children), `return callback(this)` is executed // +TypePtr TypeDataNullable::replace_children_custom(const ReplacerCallbackT& callback) const { + return callback(create(inner->replace_children_custom(callback))); +} + TypePtr TypeDataFunCallable::replace_children_custom(const ReplacerCallbackT& callback) const { std::vector mapped; mapped.reserve(params_types.size()); @@ -282,53 +314,17 @@ TypePtr TypeDataTypedTuple::replace_children_custom(const ReplacerCallbackT& cal } -// -------------------------------------------- -// calc_width_on_stack() -// -// returns the number of stack slots occupied by a variable of this type -// only non-trivial implementations are here; by default (most types) occupy 1 stack slot -// - -int TypeDataGenericT::calc_width_on_stack() const { - // this function is invoked only in functions with generics already instantiated - assert(false); - return -999999; -} - -int TypeDataTensor::calc_width_on_stack() const { - int sum = 0; - for (TypePtr item : items) { - sum += item->calc_width_on_stack(); - } - return sum; -} - -int TypeDataUnresolved::calc_width_on_stack() const { - // since early pipeline stages, no unresolved types left - assert(false); - return -999999; -} - -int TypeDataVoid::calc_width_on_stack() const { - return 0; -} - - // -------------------------------------------- // can_rhs_be_assigned() // // on `var lhs: = rhs`, having inferred rhs_type, check that it can be assigned without any casts // the same goes for passing arguments, returning values, etc. — where the "receiver" (lhs) checks "applier" (rhs) -// for now, `null` can be assigned to any TVM primitive, be later we'll have T? types and null safety // bool TypeDataInt::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } return false; } @@ -336,9 +332,6 @@ bool TypeDataBool::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } return false; } @@ -346,9 +339,6 @@ bool TypeDataCell::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } return false; } @@ -356,9 +346,6 @@ bool TypeDataSlice::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } return false; } @@ -366,9 +353,6 @@ bool TypeDataBuilder::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } return false; } @@ -376,9 +360,6 @@ bool TypeDataTuple::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } return false; } @@ -386,9 +367,6 @@ bool TypeDataContinuation::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } return false; } @@ -396,6 +374,19 @@ bool TypeDataNullLiteral::can_rhs_be_assigned(TypePtr rhs) const { return rhs == this; } +bool TypeDataNullable::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + if (const TypeDataNullable* rhs_nullable = rhs->try_as()) { + return inner->can_rhs_be_assigned(rhs_nullable->inner); + } + return inner->can_rhs_be_assigned(rhs); +} + bool TypeDataFunCallable::can_rhs_be_assigned(TypePtr rhs) const { return rhs == this; } @@ -414,7 +405,6 @@ bool TypeDataTensor::can_rhs_be_assigned(TypePtr rhs) const { } return true; } - // note, that tensors can not accept null return false; } @@ -427,9 +417,6 @@ bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { } return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } return false; } @@ -455,41 +442,69 @@ bool TypeDataVoid::can_rhs_be_assigned(TypePtr rhs) const { // bool TypeDataInt::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { // `int` as `int?` + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataBool::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this || cast_to == TypeDataInt::create(); } bool TypeDataCell::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataSlice::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataBuilder::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataContinuation::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataNullLiteral::can_be_casted_with_as_operator(TypePtr cast_to) const { - return cast_to == this - || cast_to == TypeDataInt::create() || cast_to == TypeDataBool::create() || cast_to == TypeDataCell::create() || cast_to == TypeDataSlice::create() - || cast_to == TypeDataBuilder::create() || cast_to == TypeDataContinuation::create() || cast_to == TypeDataTuple::create() - || cast_to->try_as(); + return cast_to == this || cast_to->try_as(); +} + +bool TypeDataNullable::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return inner->can_be_casted_with_as_operator(to_nullable->inner); + } + return false; } bool TypeDataFunCallable::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return this == cast_to; } @@ -506,6 +521,9 @@ bool TypeDataTensor::can_be_casted_with_as_operator(TypePtr cast_to) const { } return true; } + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return false; } @@ -518,14 +536,15 @@ bool TypeDataTypedTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { } return true; } + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return false; } bool TypeDataUnknown::can_be_casted_with_as_operator(TypePtr cast_to) const { - // 'unknown' can be cast to any type - // (though it's not valid for exception arguments when casting them to non-1 stack width, - // but to ensure it, we need a special type "unknown TVM primitive", which is overwhelming I think) - return true; + // 'unknown' can be cast to any TVM value + return cast_to->get_width_on_stack() == 1; } bool TypeDataUnresolved::can_be_casted_with_as_operator(TypePtr cast_to) const { @@ -537,12 +556,45 @@ bool TypeDataVoid::can_be_casted_with_as_operator(TypePtr cast_to) const { } +// -------------------------------------------- +// can_hold_tvm_null_instead() +// +// assigning `null` to a primitive variable like `int?` / `cell?` can store TVM NULL inside the same slot +// (that's why the default implementation is just "return true", and most of types occupy 1 slot) +// but for complex variables, like `(int, int)?`, "null presence" is kept in a separate slot (UTag for union types) +// though still, tricky situations like `(int, ())?` can still "embed" TVM NULL in parallel with original value +// + +bool TypeDataNullable::can_hold_tvm_null_instead() const { + if (get_width_on_stack() != 1) { // `(int, int)?` / `()?` can not hold null instead + return false; // only `int?` / `cell?` / `StructWith1IntField?` can + } // and some tricky situations like `(int, ())?`, but not `(int?, ())?` + return !inner->can_hold_tvm_null_instead(); +} + +bool TypeDataTensor::can_hold_tvm_null_instead() const { + if (get_width_on_stack() != 1) { // `(int, int)` / `()` can not hold null instead, since null is 1 slot + return false; // only `((), int)` and similar can: + } // one item is width 1 (and not nullable), others are 0 + for (TypePtr item : items) { + if (item->get_width_on_stack() == 1 && !item->can_hold_tvm_null_instead()) { + return false; + } + } + return true; +} + +bool TypeDataVoid::can_hold_tvm_null_instead() const { + return false; +} + + // -------------------------------------------- // parsing type from tokens // // here we implement parsing types (mostly after colon) to TypeData // example: `var v: int` is TypeDataInt -// example: `var v: (builder, [cell])` is TypeDataTensor(TypeDataBuilder, TypeDataTypedTuple(TypeDataCell)) +// example: `var v: (builder?, [cell])` is TypeDataTensor(TypeDataNullable(TypeDataBuilder), TypeDataTypedTuple(TypeDataCell)) // example: `fun f(): ()` is TypeDataTensor() (an empty one) // // note, that unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved, @@ -633,7 +685,8 @@ static TypePtr parse_type_nullable(Lexer& lex) { TypePtr result = parse_simple_type(lex); if (lex.tok() == tok_question) { - lex.error("nullable types are not supported yet"); + lex.next(); + result = TypeDataNullable::create(result); } return result; diff --git a/tolk/type-system.h b/tolk/type-system.h index 482039e6..02e50fc2 100644 --- a/tolk/type-system.h +++ b/tolk/type-system.h @@ -50,6 +50,8 @@ class TypeData { const uint64_t type_id; // bits of flag_mask, to store often-used properties and return them without tree traversing const int flags; + // how many slots on a stack this type occupies (calculated on creation), e.g. `int`=1, `(int,int)`=2, `(int,int)?`=3 + const int width_on_stack; friend class TypeDataTypeIdCalculation; @@ -60,9 +62,10 @@ protected: flag_contains_unresolved_inside = 1 << 3, }; - explicit TypeData(uint64_t type_id, int flags_with_children) + explicit TypeData(uint64_t type_id, int flags_with_children, int width_on_stack) : type_id(type_id) - , flags(flags_with_children) { + , flags(flags_with_children) + , width_on_stack(width_on_stack) { } public: @@ -74,6 +77,7 @@ public: } uint64_t get_type_id() const { return type_id; } + int get_width_on_stack() const { return width_on_stack; } bool has_unknown_inside() const { return flags & flag_contains_unknown_inside; } bool has_genericT_inside() const { return flags & flag_contains_genericT_inside; } @@ -86,6 +90,10 @@ public: virtual bool can_rhs_be_assigned(TypePtr rhs) const = 0; virtual bool can_be_casted_with_as_operator(TypePtr cast_to) const = 0; + virtual bool can_hold_tvm_null_instead() const { + return true; + } + virtual void traverse(const TraverserCallbackT& callback) const { callback(this); } @@ -93,17 +101,13 @@ public: virtual TypePtr replace_children_custom(const ReplacerCallbackT& callback) const { return callback(this); } - - virtual int calc_width_on_stack() const { - return 1; - } }; /* * `int` is TypeDataInt, representation of TVM int. */ class TypeDataInt final : public TypeData { - TypeDataInt() : TypeData(1ULL, 0) {} + TypeDataInt() : TypeData(1ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -121,7 +125,7 @@ public: * From the type system point of view, int and bool are different, not-autocastable types. */ class TypeDataBool final : public TypeData { - TypeDataBool() : TypeData(2ULL, 0) {} + TypeDataBool() : TypeData(2ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -138,7 +142,7 @@ public: * `cell` is TypeDataCell, representation of TVM cell. */ class TypeDataCell final : public TypeData { - TypeDataCell() : TypeData(3ULL, 0) {} + TypeDataCell() : TypeData(3ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -155,7 +159,7 @@ public: * `slice` is TypeDataSlice, representation of TVM slice. */ class TypeDataSlice final : public TypeData { - TypeDataSlice() : TypeData(4ULL, 0) {} + TypeDataSlice() : TypeData(4ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -172,7 +176,7 @@ public: * `builder` is TypeDataBuilder, representation of TVM builder. */ class TypeDataBuilder final : public TypeData { - TypeDataBuilder() : TypeData(5ULL, 0) {} + TypeDataBuilder() : TypeData(5ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -191,7 +195,7 @@ public: * so getting its element results in TypeDataUnknown (which must be assigned/cast explicitly). */ class TypeDataTuple final : public TypeData { - TypeDataTuple() : TypeData(6ULL, 0) {} + TypeDataTuple() : TypeData(6ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -209,7 +213,7 @@ public: * It's like "untyped callable", not compatible with other types. */ class TypeDataContinuation final : public TypeData { - TypeDataContinuation() : TypeData(7ULL, 0) {} + TypeDataContinuation() : TypeData(7ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -224,12 +228,12 @@ public: /* * `null` has TypeDataNullLiteral type. - * Currently, it can be assigned to int/slice/etc., but later Tolk will have T? types and null safety. + * It can be assigned only to nullable types (`int?`, etc.), to ensure null safety. * Note, that `var i = null`, though valid (i would be constant null), fires an "always-null" compilation error * (it's much better for user to see an error here than when he passes this variable somewhere). */ class TypeDataNullLiteral final : public TypeData { - TypeDataNullLiteral() : TypeData(8ULL, 0) {} + TypeDataNullLiteral() : TypeData(8ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -242,6 +246,30 @@ public: bool can_be_casted_with_as_operator(TypePtr cast_to) const override; }; +/* + * `T?` is "nullable T". + * It can be converted to T either with ! (non-null assertion operator) or with smart casts. + */ +class TypeDataNullable final : public TypeData { + TypeDataNullable(uint64_t type_id, int children_flags, int width_on_stack, TypePtr inner) + : TypeData(type_id, children_flags, width_on_stack) + , inner(inner) {} + +public: + const TypePtr inner; + + static TypePtr create(TypePtr inner); + + bool is_primitive_nullable() const { return get_width_on_stack() == 1 && inner->get_width_on_stack() == 1; } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; + bool can_hold_tvm_null_instead() const override; +}; + /* * `fun(int, int) -> void` is TypeDataFunCallable, think of is as a typed continuation. * A type of function `fun f(x: int) { return x; }` is actually `fun(int) -> int`. @@ -249,7 +277,7 @@ public: */ class TypeDataFunCallable final : public TypeData { TypeDataFunCallable(uint64_t type_id, int children_flags, std::vector&& params_types, TypePtr return_type) - : TypeData(type_id, children_flags) + : TypeData(type_id, children_flags, 1) , params_types(std::move(params_types)) , return_type(return_type) {} @@ -275,7 +303,7 @@ public: */ class TypeDataGenericT final : public TypeData { TypeDataGenericT(uint64_t type_id, std::string&& nameT) - : TypeData(type_id, flag_contains_genericT_inside) + : TypeData(type_id, flag_contains_genericT_inside, -999999) // width undefined until instantiated , nameT(std::move(nameT)) {} public: @@ -286,7 +314,6 @@ public: std::string as_human_readable() const override { return nameT; } bool can_rhs_be_assigned(TypePtr rhs) const override; bool can_be_casted_with_as_operator(TypePtr cast_to) const override; - int calc_width_on_stack() const override; }; /* @@ -296,8 +323,8 @@ public: * A tensor can be empty. */ class TypeDataTensor final : public TypeData { - TypeDataTensor(uint64_t type_id, int children_flags, std::vector&& items) - : TypeData(type_id, children_flags) + TypeDataTensor(uint64_t type_id, int children_flags, int width_on_stack, std::vector&& items) + : TypeData(type_id, children_flags, width_on_stack) , items(std::move(items)) {} public: @@ -312,7 +339,7 @@ public: bool can_be_casted_with_as_operator(TypePtr cast_to) const override; void traverse(const TraverserCallbackT& callback) const override; TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; - int calc_width_on_stack() const override; + bool can_hold_tvm_null_instead() const override; }; /* @@ -322,7 +349,7 @@ public: */ class TypeDataTypedTuple final : public TypeData { TypeDataTypedTuple(uint64_t type_id, int children_flags, std::vector&& items) - : TypeData(type_id, children_flags) + : TypeData(type_id, children_flags, 1) , items(std::move(items)) {} public: @@ -346,7 +373,7 @@ public: * The only thing available to do with unknown is to cast it: `catch (excNo, arg) { var i = arg as int; }` */ class TypeDataUnknown final : public TypeData { - TypeDataUnknown() : TypeData(20ULL, flag_contains_unknown_inside) {} + TypeDataUnknown() : TypeData(20ULL, flag_contains_unknown_inside, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -367,7 +394,7 @@ public: */ class TypeDataUnresolved final : public TypeData { TypeDataUnresolved(uint64_t type_id, std::string&& text, SrcLocation loc) - : TypeData(type_id, flag_contains_unresolved_inside) + : TypeData(type_id, flag_contains_unresolved_inside, -999999) , text(std::move(text)) , loc(loc) {} @@ -380,7 +407,6 @@ public: std::string as_human_readable() const override { return text + "*"; } bool can_rhs_be_assigned(TypePtr rhs) const override; bool can_be_casted_with_as_operator(TypePtr cast_to) const override; - int calc_width_on_stack() const override; }; /* @@ -389,7 +415,7 @@ public: * Empty tensor is not compatible with void, although at IR level they are similar, 0 stack slots. */ class TypeDataVoid final : public TypeData { - TypeDataVoid() : TypeData(10ULL, 0) {} + TypeDataVoid() : TypeData(10ULL, 0, 0) {} static TypePtr singleton; friend void type_system_init(); @@ -400,7 +426,7 @@ public: std::string as_human_readable() const override { return "void"; } bool can_rhs_be_assigned(TypePtr rhs) const override; bool can_be_casted_with_as_operator(TypePtr cast_to) const override; - int calc_width_on_stack() const override; + bool can_hold_tvm_null_instead() const override; }; From 7bcb8b895f4d7c18b3af08bb91ebf4b5e9d7a750 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 24 Feb 2025 20:14:16 +0300 Subject: [PATCH 57/61] [Tolk] Smart casts and control flow graph With the introduction of nullable types, we want the compiler to be smart in cases like > if (x == null) return; > // x is int now or > if (x == null) x = 0; > // x is int now These are called smart casts: when the type of variable at particular usage might differ from its declaration. Implementing smart casts is very challenging. They are based on building control-flow graph and handling every AST vertex with care. Actually, I represent cfg not a as a "graph with edges". Instead, it's a "structured DFS" for the AST: 1) at every point of inferring, we have "current flow facts" 2) when we see an `if (...)`, we create two derived contexts 3) after `if`, finalize them at the end and unify 4) if we detect unreachable code, we mark that context In other words, we get the effect of a CFG but in a more direct approach. That's enough for AST-level data-flow. Smart casts work for local variables and tensor/tuple indices. Compilation errors have been reworked and now are more friendly. There are also compilation warnings for always true/false conditions inside if, assert, etc. --- tolk-tester/tests/indexed-access.tolk | 4 +- tolk-tester/tests/invalid-generics-13.tolk | 2 +- tolk-tester/tests/invalid-generics-14.tolk | 17 + tolk-tester/tests/invalid-generics-7.tolk | 3 +- tolk-tester/tests/invalid-typing-11.tolk | 1 + tolk-tester/tests/invalid-typing-19.tolk | 12 + tolk-tester/tests/invalid-typing-20.tolk | 15 + tolk-tester/tests/invalid-typing-21.tolk | 14 + tolk-tester/tests/invalid-typing-22.tolk | 9 + tolk-tester/tests/invalid-typing-23.tolk | 15 + tolk-tester/tests/invalid-typing-24.tolk | 16 + tolk-tester/tests/invalid-typing-25.tolk | 14 + tolk-tester/tests/invalid-typing-26.tolk | 12 + tolk-tester/tests/invalid-typing-27.tolk | 18 + tolk-tester/tests/invalid-typing-28.tolk | 15 + tolk-tester/tests/invalid-typing-29.tolk | 14 + tolk-tester/tests/invalid-typing-30.tolk | 15 + tolk-tester/tests/invalid-typing-44.tolk | 9 + tolk-tester/tests/invalid-typing-45.tolk | 9 + tolk-tester/tests/never-type-tests.tolk | 28 + tolk-tester/tests/null-keyword.tolk | 27 +- tolk-tester/tests/nullable-tensors.tolk | 46 +- tolk-tester/tests/nullable-types.tolk | 2 +- tolk-tester/tests/smart-cast-tests.tolk | 678 ++++++++++++ tolk-tester/tests/unreachable-3.tolk | 22 + tolk-tester/tests/warnings-1.tolk | 28 + tolk-tester/tests/warnings-2.tolk | 26 + tolk/CMakeLists.txt | 3 +- tolk/abscode.cpp | 2 +- tolk/ast.cpp | 9 + tolk/ast.h | 7 + tolk/generics-helpers.cpp | 5 +- tolk/generics-helpers.h | 2 +- tolk/pipe-ast-to-legacy.cpp | 102 +- tolk/pipe-check-inferred-types.cpp | 586 +++++++++++ tolk/pipe-detect-unreachable.cpp | 138 --- tolk/pipe-infer-types-and-calls.cpp | 1110 ++++++++++---------- tolk/pipe-resolve-identifiers.cpp | 72 +- tolk/pipeline.h | 2 +- tolk/smart-casts-cfg.cpp | 472 +++++++++ tolk/smart-casts-cfg.h | 207 ++++ tolk/src-file.cpp | 13 +- tolk/src-file.h | 10 +- tolk/symtable.cpp | 3 - tolk/tolk.cpp | 2 +- tolk/type-system.cpp | 53 +- tolk/type-system.h | 21 + 47 files changed, 3057 insertions(+), 833 deletions(-) create mode 100644 tolk-tester/tests/invalid-generics-14.tolk create mode 100644 tolk-tester/tests/invalid-typing-19.tolk create mode 100644 tolk-tester/tests/invalid-typing-20.tolk create mode 100644 tolk-tester/tests/invalid-typing-21.tolk create mode 100644 tolk-tester/tests/invalid-typing-22.tolk create mode 100644 tolk-tester/tests/invalid-typing-23.tolk create mode 100644 tolk-tester/tests/invalid-typing-24.tolk create mode 100644 tolk-tester/tests/invalid-typing-25.tolk create mode 100644 tolk-tester/tests/invalid-typing-26.tolk create mode 100644 tolk-tester/tests/invalid-typing-27.tolk create mode 100644 tolk-tester/tests/invalid-typing-28.tolk create mode 100644 tolk-tester/tests/invalid-typing-29.tolk create mode 100644 tolk-tester/tests/invalid-typing-30.tolk create mode 100644 tolk-tester/tests/invalid-typing-44.tolk create mode 100644 tolk-tester/tests/invalid-typing-45.tolk create mode 100644 tolk-tester/tests/never-type-tests.tolk create mode 100644 tolk-tester/tests/smart-cast-tests.tolk create mode 100644 tolk-tester/tests/unreachable-3.tolk create mode 100644 tolk-tester/tests/warnings-1.tolk create mode 100644 tolk-tester/tests/warnings-2.tolk create mode 100644 tolk/pipe-check-inferred-types.cpp delete mode 100644 tolk/pipe-detect-unreachable.cpp create mode 100644 tolk/smart-casts-cfg.cpp create mode 100644 tolk/smart-casts-cfg.h diff --git a/tolk-tester/tests/indexed-access.tolk b/tolk-tester/tests/indexed-access.tolk index 7915536e..e2bd3dd9 100644 --- a/tolk-tester/tests/indexed-access.tolk +++ b/tolk-tester/tests/indexed-access.tolk @@ -178,7 +178,7 @@ fun test114(f: int, s: int) { @method_id(115) fun test115() { var y = [[[[true]]]]; - return (y, y.0.0.0.0 = !y.0.0.0.0, y.0); + return (y, ((((y).0).0).0).0 = !y.0.0.0.0, y.0); } @method_id(116) @@ -248,7 +248,7 @@ fun test122(x: (int, int)) { @method_id(123) fun test123() { var t = [[10, 20]] as [[int,int]]?; - t!.0.0 = t!.0.1 = 100; + ((t!).0).0 = ((t!).0).1 = 100; return t; } diff --git a/tolk-tester/tests/invalid-generics-13.tolk b/tolk-tester/tests/invalid-generics-13.tolk index 7574bde7..d10e2174 100644 --- a/tolk-tester/tests/invalid-generics-13.tolk +++ b/tolk-tester/tests/invalid-generics-13.tolk @@ -6,6 +6,6 @@ fun cantApplyPlusOnNullable() { /** @compilation_should_fail -@stderr while instantiating generic function `calcSum` +@stderr in function `calcSum` @stderr can not apply operator `+` to `int?` and `int?` */ diff --git a/tolk-tester/tests/invalid-generics-14.tolk b/tolk-tester/tests/invalid-generics-14.tolk new file mode 100644 index 00000000..eb3adc92 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-14.tolk @@ -0,0 +1,17 @@ +fun eq(v: X) {} + +fun cantDeduceWhenNotInferred() { + // at type inferring (before type checking) they are unknown + var (x, y) = 2; + + eq(x as int); // ok (since execution doesn't reach type checking) + eq(x); // ok (since execution doesn't reach type checking) + eq(x); +} + +/** +@compilation_should_fail +@stderr in function `cantDeduceWhenNotInferred` +@stderr can not deduce X for generic function `eq` +@stderr eq(x); + */ diff --git a/tolk-tester/tests/invalid-generics-7.tolk b/tolk-tester/tests/invalid-generics-7.tolk index b51bb82c..076b7804 100644 --- a/tolk-tester/tests/invalid-generics-7.tolk +++ b/tolk-tester/tests/invalid-generics-7.tolk @@ -11,8 +11,7 @@ fun foo(value: X) : X { /** @compilation_should_fail -@stderr while instantiating generic function `foo` -@stderr while instantiating generic function `bar` +@stderr in function `bar` @stderr can not convert type `int` to return type `slice` @stderr return 1 */ diff --git a/tolk-tester/tests/invalid-typing-11.tolk b/tolk-tester/tests/invalid-typing-11.tolk index d6aa09c3..f6e89d08 100644 --- a/tolk-tester/tests/invalid-typing-11.tolk +++ b/tolk-tester/tests/invalid-typing-11.tolk @@ -3,6 +3,7 @@ fun failBitwiseNotOnBool() { if (~eq) { return 0; } + return -1; } /** diff --git a/tolk-tester/tests/invalid-typing-19.tolk b/tolk-tester/tests/invalid-typing-19.tolk new file mode 100644 index 00000000..58b6c1fc --- /dev/null +++ b/tolk-tester/tests/invalid-typing-19.tolk @@ -0,0 +1,12 @@ +fun getNullableInt(): int? { return 5; } + +fun testCantApplyNotNullForAlwaysNull() { + var x: int? = getNullableInt(); + if (x != null) { return 0; } + return x! + 1; +} + +/** +@compilation_should_fail +@stderr operator `!` used for always null expression + */ diff --git a/tolk-tester/tests/invalid-typing-20.tolk b/tolk-tester/tests/invalid-typing-20.tolk new file mode 100644 index 00000000..457bc97a --- /dev/null +++ b/tolk-tester/tests/invalid-typing-20.tolk @@ -0,0 +1,15 @@ +fun getNullableInt(): int? { return 5; } + +fun testFlowContextAppliedInBinaryOperator() { + var x: int? = getNullableInt(); + var y: int? = getNullableInt(); + if ((y = null) < y) { + return -100; + } + return 0; +} + +/** +@compilation_should_fail +@stderr can not apply operator `<` to `null` and `null` + */ diff --git a/tolk-tester/tests/invalid-typing-21.tolk b/tolk-tester/tests/invalid-typing-21.tolk new file mode 100644 index 00000000..d2a815ee --- /dev/null +++ b/tolk-tester/tests/invalid-typing-21.tolk @@ -0,0 +1,14 @@ +fun getNullableInt(): int? { return 5; } + +fun testNeverTypeOccurs() { + var x: int? = getNullableInt(); + if (x == null && x != null) { + return x + 0; + } + return 0; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `never` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-22.tolk b/tolk-tester/tests/invalid-typing-22.tolk new file mode 100644 index 00000000..f962f364 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-22.tolk @@ -0,0 +1,9 @@ +fun testLogicalAndNotConditionDoesntAffect(x: int?) { + var gt1 = x != null && x > 1; + return x + 0; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-23.tolk b/tolk-tester/tests/invalid-typing-23.tolk new file mode 100644 index 00000000..74feed52 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-23.tolk @@ -0,0 +1,15 @@ +fun getTensor(): (int?, int?) { return (5, null); } + +fun testSmartCastsForFieldsDropAfterAssign() { + var t = getTensor(); + if (t.0 != null && t.1 != null) { + t = getTensor(); + return t.0 + t.1; + } + return -1; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-24.tolk b/tolk-tester/tests/invalid-typing-24.tolk new file mode 100644 index 00000000..75f61be9 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-24.tolk @@ -0,0 +1,16 @@ +fun getNullableInt(): int? { return 5; } + +fun getTensor(x: int?): (int?, int) { return (x, 0); } + +fun testSmartCastsDropAfterAssign() { + var x: int? = 0; + var y: int? = 0; + (getTensor(x = getNullableInt()).0, getTensor(y = getNullableInt()).0) = (x + y, x - y); + return x+y; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int?` +@stderr x + y, x - y + */ diff --git a/tolk-tester/tests/invalid-typing-25.tolk b/tolk-tester/tests/invalid-typing-25.tolk new file mode 100644 index 00000000..1621bab1 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-25.tolk @@ -0,0 +1,14 @@ +fun takeNullableTensor(mutate ij: (int, int)?) { } + +fun testSmartCastsDropAfterMutate() { + var x: (int, int)? = (1, 2); + return x.0; // ok + takeNullableTensor(mutate x); + return x.1; // error +} + +/** +@compilation_should_fail +@stderr type `(int, int)?` is not indexable +@stderr return x.1 + */ diff --git a/tolk-tester/tests/invalid-typing-26.tolk b/tolk-tester/tests/invalid-typing-26.tolk new file mode 100644 index 00000000..bf5a1165 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-26.tolk @@ -0,0 +1,12 @@ +fun getNullableInt(): int? { return 5; } + +fun testAssertThrowIsConditional() { + var (x, y) = (getNullableInt(), getNullableInt()); + assert(x != null) throw(y = 10); + return x + y; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-27.tolk b/tolk-tester/tests/invalid-typing-27.tolk new file mode 100644 index 00000000..3861403b --- /dev/null +++ b/tolk-tester/tests/invalid-typing-27.tolk @@ -0,0 +1,18 @@ +fun assignNull2(mutate x: T1?, mutate y: T2?) { + if (false) { + x = null; + y = null; + } +} + +fun testSmartCastsDropAfterNullableGeneric() { + var (x: int?, y: int?) = (1, 2); + x * y; // ok + assignNull2(x, y); // treated like assignments to nullable + x << y; // error +} + +/** +@compilation_should_fail +@stderr can not apply operator `<<` to `int?` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-28.tolk b/tolk-tester/tests/invalid-typing-28.tolk new file mode 100644 index 00000000..5d60ff22 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-28.tolk @@ -0,0 +1,15 @@ +fun getNullableInt(): int? { return 5; } + +fun testReassignInRedef() { + var t1: int? = getNullableInt(); + if (t1 != null) { + var (t1 redef, t2) = (getNullableInt(), 5); + return t1 + t2; + } + return -1; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-29.tolk b/tolk-tester/tests/invalid-typing-29.tolk new file mode 100644 index 00000000..e8a4e5e2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-29.tolk @@ -0,0 +1,14 @@ +fun getNullableInt(): int? { return 5; } + +fun testTryBodyDontSmartCast() { + var x = getNullableInt(); + try { + x = 5; + } catch {} + return x * 10; // x is not int here; for now, we have no exception edges, assuming it can be anywhere inside try +} + +/** +@compilation_should_fail +@stderr can not apply operator `*` to `int?` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-30.tolk b/tolk-tester/tests/invalid-typing-30.tolk new file mode 100644 index 00000000..53dfc5ca --- /dev/null +++ b/tolk-tester/tests/invalid-typing-30.tolk @@ -0,0 +1,15 @@ +fun getNullableInt(): int? { return 5; } + +fun testDoWhileCondition() { + var (x: int?, y: int?) = (10, 20); + do { + x = getNullableInt(); + y = getNullableInt(); + } while(x == null); + return x * y; // x is 100% int, but y is not +} + +/** +@compilation_should_fail +@stderr can not apply operator `*` to `int` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-44.tolk b/tolk-tester/tests/invalid-typing-44.tolk new file mode 100644 index 00000000..2ec5d0e8 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-44.tolk @@ -0,0 +1,9 @@ +fun cantAssignIntToTensor() { + var (x, y) = 2; + x + y; +} + +/** +@compilation_should_fail +@stderr can not assign `int` to a tensor + */ diff --git a/tolk-tester/tests/invalid-typing-45.tolk b/tolk-tester/tests/invalid-typing-45.tolk new file mode 100644 index 00000000..b357b637 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-45.tolk @@ -0,0 +1,9 @@ +fun cantAssignSizesMismatch() { + var [x, y] = [2, 3, 4]; + x + y; +} + +/** +@compilation_should_fail +@stderr can not assign `[int, int, int]`, sizes mismatch + */ diff --git a/tolk-tester/tests/never-type-tests.tolk b/tolk-tester/tests/never-type-tests.tolk new file mode 100644 index 00000000..89447389 --- /dev/null +++ b/tolk-tester/tests/never-type-tests.tolk @@ -0,0 +1,28 @@ +fun takeInt(a: int) {} + +@method_id(101) +fun test1(x: int?) { + if (x == null && x != null) { + var y = x; + __expect_type(y, "never"); + __expect_type(y!, "never"); + // `never` type is assignable to anything, flow won't reach this point + var t: (int, int) = x; + t = y; + takeInt(x); + var cb: (int) -> int = x; + x as int?; + x as (int, int)?; + x as never; + return x; + } + return 123; +} + +fun main() { + __expect_type(test1, "(int?) -> int"); +} + +/** +@testcase | 101 | null | 123 + */ diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 3ea0aaa2..65890a92 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -26,7 +26,9 @@ fun test2(x: int?) { if (null != x) { var y: int? = null; if (y != null) { return 10; } - return y; + if (10 < 20) { // always true at runtime (not at compile-time) + return y; + } } try { return x! + 10; // will throw, since not a number @@ -45,14 +47,6 @@ fun test3(x: int) { return myIsNull(x > 10 ? null : x); } -fun getUntypedNull() { - var untyped: null = null; - if (true) { - return untyped; - } - return untyped; -} - @method_id(104) fun test4(): null { var (_, (_, untyped: null)) = (3, (createEmptyTuple, null)); @@ -62,12 +56,6 @@ fun test4(): null { return untyped; } -@method_id(105) -fun test5() { - var n: slice? = getUntypedNull(); - return !(null == n) ? n!.loadInt(32) : 100; -} - @method_id(107) fun test7() { var b = beginCell().storeMaybeRef(null) as builder?; @@ -85,6 +73,7 @@ fun test8() { } fun main() { + // the compiler optimizes this at compile-time var i: int? = null; if (i == null) { return 1; @@ -99,7 +88,6 @@ fun main() { @testcase | 103 | 5 | 5 @testcase | 103 | 15 | -1 @testcase | 104 | | (null) -@testcase | 105 | | 100 @testcase | 107 | | -11 @fif_codegen """ @@ -127,12 +115,7 @@ fun main() { """ main PROC:<{ // - PUSHNULL // i - ISNULL // '2 - IFJMP:<{ // - 1 PUSHINT // '3=1 - }> // - 10 PUSHINT // '4=10 + 1 PUSHINT // '3=1 }> """ diff --git a/tolk-tester/tests/nullable-tensors.tolk b/tolk-tester/tests/nullable-tensors.tolk index 4008482f..d0720273 100644 --- a/tolk-tester/tests/nullable-tensors.tolk +++ b/tolk-tester/tests/nullable-tensors.tolk @@ -73,7 +73,7 @@ fun test104() { var t1_1: (int, int)? = (1, 2); var t1_2: (int, int)? = t1_1; var t1_3: (int, int)? = t1_1!; - var t2_1: (int, int)? = null; + var t2_1: (int, int)? = getNullableTensor(null); var t2_2 = t2_1; return (t1_3, t2_2); } @@ -101,9 +101,12 @@ fun test108(x1: (int, int)) { incrementTensorComponents(mutate x1); x1.incrementTensorComponents(); var x2: (int, int)? = x1; + __expect_type(x2, "(int, int)"); x2.incrementNullableTensorComponents().incrementNullableTensorComponents(); incrementNullableTensorComponents(mutate x2); + __expect_type(x2, "(int, int)?"); var x3: (int, int)? = null; + __expect_type(x3, "null"); x3.incrementNullableTensorComponents().incrementNullableTensorComponents(); incrementNullableTensorComponents(mutate x3); return (x1, x2, x3); @@ -148,7 +151,7 @@ fun test111() { var x = (1, 2); assignFirstComponent(mutate x, 50); var x2: (int, int)? = null; - var x3 = x2; + var x3 = x2 as (int, int)?; assignFirstComponentNullable(mutate x2, 30); assignFirstComponentNullable(mutate x3, 70); g110_1 = (1, 2); @@ -361,23 +364,36 @@ fun test132() { return (result, 777, aln1, aln2, doubleNulls.1 == null, doubleNulls); } +@method_id(133) +fun test133() { + var x: (int, int)? = (10, 20); + return sumOfTensor(x) + x.0 + x.1; // smart casted +} + +@method_id(134) +fun test134(): (int, int)? { + var x: (int, int)? = (10, 20); + incrementTensorComponents(mutate x); // smart casted + return x; +} + fun getNormalNullableTensorWidth1(vLess100: int?): ([int?], ())? { - if (vLess100 != null && vLess100! >= 100) { + if (vLess100 != null && vLess100 >= 100) { return null; } return ([vLess100], ()); // such a nullable tensor can store NULL in the same slot } fun getTrickyNullableTensorWidth1(vLess100: int?): (int?, ())? { - if (vLess100 != null && vLess100! >= 100) { + if (vLess100 != null && vLess100 >= 100) { return null; } return (vLess100, ()); // such a nullable tensor requires an extra stack slot for null presence } fun getEvenTrickierNullableWidth1(vLess100: int?): ((), (int?, ()), ())? { - if (vLess100 != null && vLess100! >= 100) { + if (vLess100 != null && vLess100 >= 100) { return null; } return ((), (vLess100, ()), ()); @@ -406,35 +422,35 @@ fun main(){} /** @testcase | 101 | | 1 2 -1 @testcase | 102 | | 1 2 -1 (null) (null) 0 -@testcase | 103 | 1 2 | 3 3 0 1 2 -1 -@testcase | 104 | | 1 2 -1 (null) (null) 0 +@testcase | 103 | 1 2 | 3 3 0 1 2 +@testcase | 104 | | 1 2 (null) (null) 0 @testcase | 105 | | (null) (null) (null) 0 1 2 3 -1 -@testcase | 106 | | 1 2 -1 +@testcase | 106 | | 1 2 @testcase | 107 | | 0 0 -1 0 0 -1 @testcase | 108 | 5 6 | 7 8 10 11 -1 (null) (null) 0 @testcase | 109 | | 0 0 -1 0 -1 0 0 -1 -1 @testcase | 110 | | 3 4 (null) (null) 0 6 7 -1 @testcase | 111 | | 50 30 70 90 100 -@testcase | 112 | | 12 22 -1 +@testcase | 112 | | 12 22 @testcase | 113 | | -1 @testcase | 114 | | (null) (null) (null) 0 (null) (null) (null) 0 @testcase | 115 | | 2 3 7 (null) (null) 0 5 0 -1 0 @testcase | 116 | -1 | (null) (null) 0 (null) (null) 0 @testcase | 116 | 0 | 1 2 -1 1 2 -1 -@testcase | 117 | | (null) (null) 0 1 3 +@testcase | 117 | | (null) 1 3 @testcase | 118 | 5 | 5 10 -1 @testcase | 118 | null | (null) (null) 0 -@testcase | 119 | | (null) (null) 0 (null) (null) 0 1 2 -1 100 +@testcase | 119 | | (null) (null) 1 2 -1 100 @testcase | 120 | -1 | (null) (null) 0 @testcase | 120 | 0 | 1 2 -1 @testcase | 121 | | [ 1 [ 3 4 ] ] @testcase | 122 | 0 | [ 1 [ 3 4 ] 4 (null) ] @testcase | 122 | -1 | [ 1 (null) 4 (null) ] @testcase | 123 | | 1 3 4 -1 -@testcase | 124 | 0 | 1 3 4 -1 4 (null) (null) 0 -1 -@testcase | 124 | -1 | 1 (null) (null) 0 4 (null) (null) 0 -1 +@testcase | 124 | 0 | 1 3 4 -1 4 (null) (null) 0 +@testcase | 124 | -1 | 1 (null) (null) 0 4 (null) (null) 0 @testcase | 125 | | 3 -@testcase | 126 | | 1 (null) (null) 0 2 +@testcase | 126 | | 1 (null) 2 @testcase | 127 | 1 | 1 (null) (null) 0 2 @testcase | 127 | 2 | 1 2 3 -1 4 @testcase | 127 | 3 | 1 (null) (null) 0 5 @@ -447,6 +463,8 @@ fun main(){} @testcase | 130 | -1 | 1 (null) (null) 0 @testcase | 131 | | -1 777 0 777 777 777 0 0 -1 -1 777 -1 -1 -1 777 @testcase | 132 | | -1 0 -1 0 777 (null) (null) -1 0 0 +@testcase | 133 | | 60 +@testcase | 134 | | 11 21 -1 @testcase | 135 | | [ 10 ] [ (null) ] (null) 777 10 -1 (null) -1 (null) 0 777 10 -1 (null) -1 (null) 0 777 0 0 -1 0 0 -1 0 0 -1 777 0 -1 0 0 -1 0 @fif_codegen diff --git a/tolk-tester/tests/nullable-types.tolk b/tolk-tester/tests/nullable-types.tolk index ebabb80d..24aa7f8a 100644 --- a/tolk-tester/tests/nullable-types.tolk +++ b/tolk-tester/tests/nullable-types.tolk @@ -80,7 +80,7 @@ fun test107() { @method_id(108) fun test108() { var (a, b: cell?, c) = (1, beginCell().endCell(), 3); - b = null; + if (10>3) { b = null; } return a + (b == null ? 0 : b!.beginParse().loadInt(32)) + c; } diff --git a/tolk-tester/tests/smart-cast-tests.tolk b/tolk-tester/tests/smart-cast-tests.tolk new file mode 100644 index 00000000..4d71bb63 --- /dev/null +++ b/tolk-tester/tests/smart-cast-tests.tolk @@ -0,0 +1,678 @@ +// the goal of this file is not only to @testcase results — +// but to check that this file compiles + +fun getNullableInt(): int? { return 5; } +fun getNullableSlice(): slice? { return null; } +fun takeNullableInt(a: int?) {} +fun takeNullableSlice(a: slice?) {} +fun increment(mutate self: int) { self += 1; } +fun assignToInt(mutate self: int, value: int) { self = value; } +fun assignToNullableInt(mutate self: int?, value: int) { self = value; } +fun sameTensor(t: (int, int)) { return t; } +fun sameTensor2(t: (int?, (slice, slice, slice, builder)?)) { return t; } +fun eq(v: T) { return v; } +fun getTwo(): X { return 2 as X; } + +fun test1(): int { + var x = getNullableInt(); + var y = getNullableInt(); + if (x != null && y != null) { + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; + } + return -1; +} + +fun test2() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (x == null || y == null) { + return null; + } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test3(): int { + var ([x, y]) = [getNullableInt(), getNullableInt()]; + if (x != null) { + if (((y)) != null) { + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; + } + return x; + } + if (random() > -1) { + if (y == null) { return -1; } + else { return y; } + } + return 0; +} + +fun test4() { + var x = getNullableInt(); + if (x != null && x > 0) { + var x = getNullableInt(); + if ((x) != null && x + 10 < 0) { + var x = getNullableInt(); + return 10 > 3 && 10 < 10 && x != null && x + 8 > 10; + } + } + if (x != null && x < 1) { + return false; + } + if (x == null && x == null) { + __expect_type(x, "null"); + return true; + } + return x < x + 3; +} + +fun test5() { + var (a, (b, c)) = (getNullableInt(), (getNullableInt(), getNullableInt())); + if (a == null) { return -1; } + if (!(b != null)) { return -2; } + if (random() ? c == null && c == null : c == null) { return -3; } + return a + b + c; +} + +fun test6() { + var a: int? = 5; + __expect_type(a, "int"); + __expect_type(a != null ? a : null, "int"); + __expect_type(a == null ? "" : a, "int"); + takeNullableInt(a); + __expect_type(a, "int"); + if (random()) { + a = null; + } else { + if (random()) { a = null; } + else { a = null; } + } + __expect_type(a, "null"); + takeNullableSlice(a); // ok, `slice?` is `slice | null`, here a definitely null + var b: int? = true ? null : "sl"; + __expect_type(b, "null"); + takeNullableInt(b); + takeNullableSlice(b); // same reason + var c: int? = 10; + __expect_type(c, "int"); + takeNullableSlice(c = null); +} + +fun test7() { + var (a, b, c, d) = (getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()); + if (a == null && true) { return -1; } + if (true && true && 1 && !0 && b == null) { return -2; } + if (true ? c == null && (((c))) == null && true : false) { return -3; } + if (!true ? random() > 0 : a != null && (d == null && b != null)) { return -4; } + return a + b + c + d; +} + +fun test8(x: int?, y: int?) { + var allGt1 = x != null && x > 1 && y != null && y > 1; + var xGtY = x != null && y != null && x > y; + var xLtEq0 = x == null || x < 0; + (x = 0) < random() || x > 10; + return x + 0; +} + +fun test9() { + var x = getNullableInt(); + var y = getNullableInt(); + if (x == null || y == null) { + return -1; + } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test10(): int { + var (x, y) = (getNullableInt(), getNullableInt()); + if (x == null) { + if (y == null) { return -1; } + __expect_type(x, "null"); + __expect_type(y, "int"); + return y; + } + if (y == null) { + return x; + } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test11() { + var [x, y] = [getNullableInt(), getNullableInt()]; + if (random()) { return x == null || y == null ? -1 : x + y; } + if (true && (x == null || y == null) && !!true) { return 0; } + return x + y; +} + +fun test12() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (random() ? x == null || y == null : x == null || y == null) { return -1; } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test13() { + var x: int? = getNullableInt(); + var y: int? = 10; + var z = getNullableInt(); + var w = getNullableInt(); + beginCell().storeInt(x!, 32).storeInt(x = getNullableInt()!, 32).storeInt(x, 32) + .storeInt(y, 32).storeInt(z = 10, 32).storeInt(x + y + z, 32) + .storeInt(w == null ? -1 : w, 32).storeInt(!(null == w) ? w : -1, 32); +} + +fun test14() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (x == null) { + x = 0; + } + if (y == null) { + if (random()) { return 0; } + else { y = 0; } + } + return x + y; +} + +fun test20() { + var t = (getNullableInt(), getNullableInt()); + if (t.0 != null && t.1 != null) { + __expect_type(t.0, "int"); + __expect_type(t.1, "int"); + return t.0 + t.1; + } + t.0 = 10; + if (t.1 == null) { + t.1 = 20; + } + __expect_type(t.0, "int"); + __expect_type(t.1, "int"); + return t.0 + t.1; +} + +fun test21() { + var t = (getNullableInt(), (getNullableInt(), getNullableInt())); + if (t.0 != null && t.1.0 != null) { + if (t.1.1 != null) { return t.0 + t.1.0 + t.1.1; } + return t.0 + t.1.0; + } + if (t.0 != null) { + return t.0 + 0; + } + __expect_type(t.0, "null"); + __expect_type(t.1.0, "int?"); + return t.1.0 == null ? -1 : t.1.0 + 0; +} + +fun test22() { + var t = (getNullableInt(), (getNullableInt(), getNullableInt())); + if (t.0 == null || t.1.0 == null || t.1.1 == null) { + return -1; + } + return t.0 + t.1.0 + t.1.1; +} + +@method_id(123) +fun test23() { + var (x: int?, y: int?, z: int?) = (getNullableInt(), getNullableInt(), getNullableInt()); + ((x = 1, 0).0, (y = 2, 1).0) = (3, z = 4); + return x + y + z; +} + +@method_id(124) +fun test24(x: int?) { + if (x == null) { + __expect_type(x, "null"); + assignToNullableInt(mutate x, 10); + __expect_type(x, "int?"); + x.assignToNullableInt(x! + 5); + } else { + __expect_type(x, "int"); + increment(mutate x); + x.increment(); + __expect_type(x, "int"); + } + __expect_type(x, "int?"); + return x; +} + +fun test25() { + var x = (getNullableInt(), getNullableInt(), getNullableInt()); + x.0 = x.2 = random(); + return (x.0) + ((x.2)); +} + +fun test26() { + var x = [getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), + getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()]; + if (~(x.0 = random())) { return; } + if ((x.1 = random()) < (x.2 = random())) { return; } + else if (!(x.2 <=> (x.3 = random()))) { return; } + x.5 = (x.4 = random()) ? (x.6 = random()) : (x.6 = random()); + if ((x.7 = random()) as int) { return; } + if (((((x.8 = random()) != null)))) { return; } + if ([x.1, (x.9 = random())!].1) { return; } + val result = x.0+x.1+x.2+x.3+x.4+x.5+x.6+x.7+x.8+x.9; +} + +fun test27() { + var (x, _) = ([getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), + getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()], []); + +(x.0 = random()); + x.0 += [((x.1 = random()) < (x.2 = random() + x.1)) as int].0; + !(x.2 <=> (x.3 = random() + x.2)); + x.5 = (x.4 = random()) ? (x.6 = random()) : (x.6 = random()); + (x.7 = random()) as int; + (((((x.8 = random()) != null)))); + [x.1, (x.9 = random())!].1; + return x.0+x.1+x.2+x.3+x.4+x.5+x.6+x.7+x.8+x.9; +} + +fun test28() { + var x = (getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()); + __expect_type((x.0 = random(), x.0 += (x.1 = random()) as int, !(x.1 <=> (x.2 = random() + x.0)) == null, (x.3 = random()) ? x.3 : (!x.3) as int), + "(int, int, bool, int)"); +} + +fun test29() { + var x = (getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()); + __expect_type([x.0 = random(), ((x.0 += (x.1 = random()) as int)), !(x.1 <=> (x.2 = random() + x.0)) == null, (x.3 = random()) ? x.3 : (!x.3) as int], + "[int, int, bool, int]"); +} + +@method_id(130) +fun test30(initial5: bool) { + var t: (int?, (int?, (int?, int?))) = initial5 + ? (getNullableInt(), (getNullableInt(), (getNullableInt(), getNullableInt()))) + : (null, (null, (null, null))); + if (t.0 == null || t.1.0 == null || t.1.1.0 == null || t.1.1.1 == null) { + if (t.1.0 == null || t.1.1.0 == null) { + if (t.1.1.0 == null) { + t.1.1.0 = 4; + } + __expect_type(t.1.1.0, "int"); + __expect_type(t.1.1.1, "int?"); + __expect_type(t.1.0, "int?"); + t.1.1.1 = 3; + t.1.0 = 2; + __expect_type(t.1.1.1, "int"); + __expect_type(t.1.0, "int"); + } + if (((((t.1.1.1)))) != null) {} + else { t.1.1.1 = 3; } + t.0 = 1; + } + return t.0 + t.1.0 + t.1.1.0 + t.1.1.1; +} + +fun test31() { + var t = (getNullableInt(), getNullableInt()); + t.0 == null ? (t.0, t.1) = (1, 2) : (t.1, t.0) = (4, 3); + return t.0 + t.1; +} + +@method_id(132) +fun test32() { + var t: (int?, (int?, int?)?, (int?, int?)) = (getNullableInt(), (getNullableInt(), getNullableInt()), (getNullableInt(), getNullableInt())); + if (t.0 == null) { return -1; } + t.1 != null && t.1.0 == null ? t.1 = (1, 2) : t.1 = (3, 4); + if (t.2.1 != null) { t.2.0 = 1; t.2.1 = 2; } + else { [t.2.0, t.2.1] = [3, 4]; } + return t.0 + t.1.0! + t.1.1! + t.2.0 + t.2.1; +} + +@method_id(133) +fun test33(): int { + var x = getNullableInt(); + repeat (eq(x = 5)) { + __expect_type(x, "int"); + increment(mutate x); + } + return x; +} + +fun test34() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (random()) { throw (x = 1, y = 2); } + else { throw (x = 3, y = (1, getNullableInt()!).1); } + return x + y; +} + +fun test35() { + var (x, y, z, t) = (getNullableInt(), getNullableInt(), getNullableInt(), (getNullableInt(), getNullableInt())); + assert (x != null, 404); + assert (t.0 != null && true && !(t.1 == null) && !(z = 4)) throw (y = 404); + __expect_type(y, "int?"); + return x + t.0 + t.1 + z; +} + +fun test36() { + var x = getNullableInt(); + assert (x == null, x + 0); // check that x is int there + __expect_type(x, "null"); +} + +fun test37() { + var (x, code) = (getNullableInt()!, getNullableInt()); + try { + } catch(code) { + x = 20; + return x + code; // code is scoped + } + return code == null ? x : x + code; +} + +fun assignNull2(mutate x: T1?, mutate y: T2?) { + x = null; + y = null; +} + +fun test38() { + var (x: int?, y: int?) = (1, 2); + __expect_type(x, "int"); + __expect_type(y, "int"); + assignNull2(mutate x, mutate y); + __expect_type(x, "int?"); + __expect_type(y, "int?"); + if (x != null) { + if (y == null) { return -1; } + return x + y; + } + var t: (int?, slice?) = (null, null); + if (!false) { t.0 = 1; } + if (true) { t.1 = beginCell().endCell().beginParse(); } + __expect_type(t.0, "int"); + __expect_type(t.1, "slice"); + t.0 + t.1.loadInt(32); + assignNull2(mutate t.0, mutate t.1); + __expect_type(t.0, "int?"); + __expect_type(t.1, "slice?"); + t.0 != null && t.1 != null ? t.0 + loadInt(mutate t.1, 32) : -1; + return t.0 != null && t.1 != null ? t.0 + loadInt(mutate t.1, 32) : -1; +} + +@method_id(139) +fun test39() { + var x: (int?, int?)? = (4, null); + x.1 = 10; + x.1 += 1; + x!.1 += 1; + return (x!.0! + x.1); +} + +@method_id(140) +fun test40(second: int?) { + var x: (int?, int?)? = (4, second); + if (x.1 != null) { + val result = x.1 + x!.1 + x!!.1 + x.1! + x!!.1!!; + } + if (x!.1 != null) { + val result = x.1 + x!.1 + x!!.1 + x.1! + x!!.1!!; + } + if (!(x!!.1 != null)) { + return -1; + } + return x.1 + x!.1 + x!!.1 + x.1! + x!!.1!!; +} + +@method_id(141) +fun test41() { + var t: (int, int)? = null; + return sameTensor(t = (1, 2)); +} + +@method_id(142) +fun test42() { + var t: (int?, (int?, (int, int)?)?) = (getNullableInt(), (1, (2, 3))); + t.1 = (3,null); + __expect_type(t.1, "(int?, (int, int)?)"); + __expect_type(t, "(int?, (int?, (int, int)?)?)"); + return (t, t.1); +} + +@method_id(143) +fun test43() { + var t1: ((int, int), int?) = ((1, 2), 3); + var t2: ((int?, int?), (int?,int?)?) = ((null, null), (null, 5)); + t2.0 = t1.0 = (10, 11); + t2.1 = t1.1 = null; + return (t1, t2); +} + +@method_id(144) +fun test44() { + var t1: ((int, int), int?) = ((1, 2), 3); + var t2: ((int?, int?), (int?,int?)?) = ((null, null), (null, 5)); + t1.0 = t2.0 = (10, 11); + t1.1 = t2.1 = null; + __expect_type(t1, "((int, int), int?)"); + __expect_type(t2, "((int?, int?), (int?, int?)?)"); + return (t1, t2); +} + +@method_id(145) +fun test45() { + var t: (int?, (int?, (int, int)?)?) = (getNullableInt(), (1, (2, 3))); + var t2 = sameTensor2(t.1 = (3,null)); + return (t, t2, t.1); +} + +fun autoInfer46() { + var t1: int? = 3; + var t2: (int, int)? = (4, 5); + __expect_type(t1, "int"); + __expect_type(t2, "(int, int)"); + return (t1, t2); // proven to be not null, inferred (int, (int,int)) +} + +@method_id(146) +fun test46() { + var r46_1: (int, (int,int)) = autoInfer46(); + var r46_2: (int, (int,int)?) = autoInfer46(); + return (r46_1, r46_2); +} + +@method_id(147) +fun test47() { + var t1: int? = 3; + var t2: (int, int)? = (4, 5); + t1 = t2 = null; + __expect_type(t1, "null"); + __expect_type(t2, "null"); + var result = (t1, t2); // proven to be always null, inferred (null, null), 2 slots on a stack + return (result, 100, result.1, 100, t2 as (int, int)?); +} + +fun test48() { + var t1: int? = getNullableInt(); + if (t1 != null) { + var (t1 redef, t2) = (10, 5); + return t1 + t2; + var t2 redef = getNullableInt()!; + return t1 + t2; + } + return -1; +} + +fun test49(x: int?) { + while (x == null) { + x = getNullableInt(); + } + __expect_type(x, "int"); + return x + 1; +} + +fun test50() { + var (x: int?, y: int?) = (1, 2); + do { + x = getNullableInt(); + y = getNullableInt(); + } while (x == null || y == null); + return x + y; +} + +fun test51() { + while (true) { return; } + // test that no error "control reaches end of function" +} + +fun test52() { + do { } while (true); +} + +fun test53() { + var x1: int? = getNullableInt(); + var x2: int? = 5; + var x3: int? = 5; + var x10: int? = null; + var x11: int? = 5; + var x12: int? = 5; + while (x1 != null) { + __expect_type(x1, "int"); // because condition + __expect_type(x2, "int?"); // because re-assigned + __expect_type(x3, "int?"); // because re-assigned + __expect_type(x10, "null"); + __expect_type(x11, "int"); + x1 = getNullableInt(); + __expect_type(x1, "int?"); + assignToNullableInt(mutate x2, 5); + x3.assignToNullableInt(5); + x11 = 10; + assignToInt(mutate x12, 5); + } + __expect_type(x1, "null"); + __expect_type(x2, "int?"); + __expect_type(x3, "int?"); +} + +fun test54() { + var x1: int? = null; + var x2: int? = 5; + var x3: int? = 5; + var x10: int? = null; + var x11: int? = 5; + var x12: int? = 5; + do { + __expect_type(x1, "int?"); // because re-assigned + __expect_type(x2, "int?"); // because re-assigned + __expect_type(x3, "int?"); // because re-assigned + __expect_type(x10, "null"); + __expect_type(x11, "int"); + x1 = getNullableInt(); + __expect_type(x1, "int?"); + assignToNullableInt(mutate x2, 5); + if (random()) { x3.assignToNullableInt(5); } + x11 = 10; + assignToInt(mutate x12, 5); + } while (x1 != null); + __expect_type(x1, "null"); + __expect_type(x2, "int?"); + __expect_type(x3, "int?"); +} + +fun eq55(v: T) { return v; } + +fun test55() { + var x: int? = 4; + while (true) { + // currently, generic functions are instantiated at the type inferring step + // in case of loops, type inferring is re-enterable + // first iteration: x is int, eq instantiated + // second (final) iteration: x is int?, eq instantiated + // (checked via codegen) + eq55(x); + __expect_type(x, "int?"); // types are checked (unlike generics instantiated) after inferring + x = random() ? 1 : null; + } + __expect_type(x, "int?"); +} + +fun test56() { + var i: int? = null; + var (j: int?, k: int?) = (null, null); + __expect_type(i, "null"); + __expect_type(k, "null"); + i = getTwo(); + [j, ((k))] = [getTwo(), ((getTwo()))]; + __expect_type(i, "int?"); + __expect_type(j, "int?"); + __expect_type(k, "int?"); +} + +fun test57(mutate x: int?): int { + if (x == null) { x = 5; } + else { + if (x < 10) { x = 10; } + else { x = 20; } + } + if (x != null) { + return 123; + } + __expect_type(x, "int"); + // no "return" needed, because end of function is unreachable +} + +@method_id(158) +fun test58() { + var (x1, x2: int?) = (getNullableInt(), null); + return (test57(mutate x1), x1, test57(mutate x2), x2); +} + +fun test59() { + var (x1: int?, x2, x3) = (getNullableInt()!, getNullableInt(), 5); + if ((x2 = x3) != null) { + __expect_type(x2, "int"); + } + __expect_type(x2, "int"); + if ((x2 = getNullableInt()) != null) { + __expect_type(x2, "int"); + } + __expect_type(x2, "int?"); + if (((x1) = x2) == null) { + return; + } + __expect_type(x1, "int"); +} + + + +fun main(x: int?): int { + return x == null ? -1 : x; +} + +/** +@testcase | 0 | 1 | 1 +@testcase | 123 | | 7 +@testcase | 124 | 4 | 6 +@testcase | 124 | null | 15 +@testcase | 130 | -1 | 20 +@testcase | 130 | 0 | 10 +@testcase | 132 | | 15 +@testcase | 133 | | 10 +@testcase | 139 | | 16 +@testcase | 140 | 5 | 25 +@testcase | 141 | | 1 2 +@testcase | 142 | | 5 3 (null) (null) 0 -1 3 (null) (null) 0 +@testcase | 143 | | 10 11 (null) 10 11 (null) (null) 0 +@testcase | 144 | | 10 11 (null) 10 11 (null) (null) 0 +@testcase | 145 | | 5 3 (null) (null) 0 -1 3 (null) (null) (null) (null) 0 3 (null) (null) 0 +@testcase | 146 | | 3 4 5 3 4 5 -1 +@testcase | 147 | | (null) (null) 100 (null) 100 (null) (null) 0 +@testcase | 158 | | 123 10 123 5 + +@stderr warning: expression of type `int` is always not null, this condition is always true +@stderr warning: unreachable code +@stderr var t2 redef = getNullableInt()!; + +@fif_codegen eq55 PROC:<{ +@fif_codegen eq55 PROC:<{ +*/ diff --git a/tolk-tester/tests/unreachable-3.tolk b/tolk-tester/tests/unreachable-3.tolk new file mode 100644 index 00000000..fab21fd2 --- /dev/null +++ b/tolk-tester/tests/unreachable-3.tolk @@ -0,0 +1,22 @@ +fun main(x: int?) { + if (x != null && x == null) { + return 1 + 2; + } + if (x == null) { + return -1; + } + if (x != null) { + return -2; + } + return 3 + 4; +} + +/** +@testcase | 0 | 5 | -2 +@testcase | 0 | null | -1 + +@stderr warning: variable `x` of type `int` is always not null +@stderr if (x != null) +@stderr warning: unreachable code +@stderr return 3 + 4 + */ diff --git a/tolk-tester/tests/warnings-1.tolk b/tolk-tester/tests/warnings-1.tolk new file mode 100644 index 00000000..040057d1 --- /dev/null +++ b/tolk-tester/tests/warnings-1.tolk @@ -0,0 +1,28 @@ +fun getNullableInt(): int? { return null; } + +fun main() { + var c: int? = 6; + __expect_type(c, "int"); + if (c == null) {} + + var d: int? = c; + if (((d)) != null && tupleSize(createEmptyTuple())) {} + + var e: int? = getNullableInt(); + if (e != null) { + return true; + } + __expect_type(e, "null"); + null == e; + + return null != null; +} + +/** +@testcase | 0 | | 0 + +@stderr warning: variable `c` of type `int` is always not null, this condition is always false +@stderr warning: variable `d` of type `int` is always not null, this condition is always true +@stderr warning: variable `e` is always null, this condition is always true +@stderr warning: expression is always null, this condition is always false + */ diff --git a/tolk-tester/tests/warnings-2.tolk b/tolk-tester/tests/warnings-2.tolk new file mode 100644 index 00000000..57ecb21a --- /dev/null +++ b/tolk-tester/tests/warnings-2.tolk @@ -0,0 +1,26 @@ +fun main() { + var (a, b, c, d, e) = (1, beginCell(), beginCell().endCell().beginParse(), [1], true as bool?); + + var alwaysInt = a != null ? 1 : null; + __expect_type(alwaysInt, "int"); + + if (!(c == null)) { + if (10 < 3) { assert(b == null, 100); } + } + while (d == null || false) {} + + return e! != null; +} + +/** +@testcase | 0 | | -1 + +@stderr warning: variable `a` of type `int` is always not null, this condition is always true +@stderr warning: condition of ternary operator is always true +@stderr warning: variable `c` of type `slice` is always not null, this condition is always false +@stderr warning: condition of `if` is always true +@stderr warning: variable `b` of type `builder` is always not null, this condition is always false +@stderr warning: condition of `assert` is always false +@stderr warning: condition of `while` is always false +@stderr warning: expression of type `bool` is always not null, this condition is always true + */ diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 9d720024..de408115 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -12,8 +12,8 @@ set(TOLK_SOURCE pipe-register-symbols.cpp pipe-resolve-identifiers.cpp pipe-calc-rvalue-lvalue.cpp - pipe-detect-unreachable.cpp pipe-infer-types-and-calls.cpp + pipe-check-inferred-types.cpp pipe-refine-lvalue-for-mutate.cpp pipe-check-rvalue-lvalue.cpp pipe-check-pure-impure.cpp @@ -23,6 +23,7 @@ set(TOLK_SOURCE pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp type-system.cpp + smart-casts-cfg.cpp generics-helpers.cpp abscode.cpp analyzer.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 72da0ac8..fc160984 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -414,7 +414,7 @@ std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, s std::string null_flag_name = name.empty() ? name : name + ".NNFlag"; ir_idx = create_var(t_nullable->inner, loc, std::move(name)); ir_idx.emplace_back(create_var(TypeDataBool::create(), loc, std::move(null_flag_name))[0]); - } else if (var_type != TypeDataVoid::create()) { + } else if (var_type != TypeDataVoid::create() && var_type != TypeDataNever::create()) { #ifdef TOLK_DEBUG tolk_assert(stack_w == 1); #endif diff --git a/tolk/ast.cpp b/tolk/ast.cpp index 8f1aa98f..26eaacd5 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -117,6 +117,11 @@ void ASTNodeExpressionBase::assign_lvalue_true() { this->is_lvalue = true; } +void ASTNodeExpressionBase::assign_always_true_or_false(int flow_true_false_state) { + this->is_always_true = flow_true_false_state == 1; // see smart-casts-cfg.h + this->is_always_false = flow_true_false_state == 2; +} + void Vertex::assign_sym(const Symbol* sym) { this->sym = sym; } @@ -173,6 +178,10 @@ void Vertex::assign_is_negated(bool is_negated) { this->is_negated = is_negated; } +void Vertex::assign_first_unreachable(AnyV first_unreachable) { + this->first_unreachable = first_unreachable; +} + void Vertex::assign_target(const DotTarget& target) { this->target = target; } diff --git a/tolk/ast.h b/tolk/ast.h index cd410187..9b7c5d1a 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -186,11 +186,14 @@ struct ASTNodeExpressionBase : ASTNodeBase { TypePtr inferred_type = nullptr; bool is_rvalue: 1 = false; bool is_lvalue: 1 = false; + bool is_always_true: 1 = false; // inside `if`, `while`, ternary condition, `== null`, etc. + bool is_always_false: 1 = false; // (when expression is guaranteed to be always true or always false) ASTNodeExpressionBase* mutate() const { return const_cast(this); } void assign_inferred_type(TypePtr type); void assign_rvalue_true(); void assign_lvalue_true(); + void assign_always_true_or_false(int flow_true_false_state); ASTNodeExpressionBase(ASTNodeType type, SrcLocation loc) : ASTNodeBase(type, loc) {} }; @@ -734,10 +737,14 @@ template<> // example: do while body is a sequence struct Vertex final : ASTStatementVararg { SrcLocation loc_end; + AnyV first_unreachable = nullptr; const std::vector& get_items() const { return children; } AnyV get_item(int i) const { return children.at(i); } + Vertex* mutate() const { return const_cast(this); } + void assign_first_unreachable(AnyV first_unreachable); + Vertex(SrcLocation loc, SrcLocation loc_end, std::vector items) : ASTStatementVararg(ast_sequence, loc, std::move(items)) , loc_end(loc_end) {} diff --git a/tolk/generics-helpers.cpp b/tolk/generics-helpers.cpp index 86cdf82b..9dae3f00 100644 --- a/tolk/generics-helpers.cpp +++ b/tolk/generics-helpers.cpp @@ -119,14 +119,14 @@ TypePtr GenericSubstitutionsDeduceForCall::replace_by_manually_specified(TypePtr return replace_genericT_with_deduced(param_type, fun_ref->genericTs, substitutionTs); } -TypePtr GenericSubstitutionsDeduceForCall::auto_deduce_from_argument(SrcLocation loc, TypePtr param_type, TypePtr arg_type) { +TypePtr GenericSubstitutionsDeduceForCall::auto_deduce_from_argument(FunctionPtr cur_f, SrcLocation loc, TypePtr param_type, TypePtr arg_type) { try { if (!manually_specified) { consider_next_condition(param_type, arg_type); } return replace_genericT_with_deduced(param_type, fun_ref->genericTs, substitutionTs); } catch (const GenericDeduceError& ex) { - throw ParseError(loc, ex.message + " for generic function `" + fun_ref->as_human_readable() + "`; instantiate it manually with `" + fun_ref->name + "<...>()`"); + throw ParseError(cur_f, loc, ex.message + " for generic function `" + fun_ref->as_human_readable() + "`; instantiate it manually with `" + fun_ref->name + "<...>()`"); } } @@ -201,7 +201,6 @@ static void run_pipeline_for_instantiated_function(FunctionPtr inst_fun_ref) { // these pipes are exactly the same as in tolk.cpp — all preceding (and including) type inferring pipeline_resolve_identifiers_and_assign_symbols(inst_fun_ref); pipeline_calculate_rvalue_lvalue(inst_fun_ref); - pipeline_detect_unreachable_statements(inst_fun_ref); pipeline_infer_types_and_calls_and_fields(inst_fun_ref); } diff --git a/tolk/generics-helpers.h b/tolk/generics-helpers.h index 893bd98c..5ed245af 100644 --- a/tolk/generics-helpers.h +++ b/tolk/generics-helpers.h @@ -78,7 +78,7 @@ public: void provide_manually_specified(std::vector&& substitutionTs); TypePtr replace_by_manually_specified(TypePtr param_type) const; - TypePtr auto_deduce_from_argument(SrcLocation loc, TypePtr param_type, TypePtr arg_type); + TypePtr auto_deduce_from_argument(FunctionPtr cur_f, SrcLocation loc, TypePtr param_type, TypePtr arg_type); int get_first_not_deduced_idx() const; std::vector&& flush() { diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 269c5fe1..1561aa40 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -442,6 +442,21 @@ static std::vector transition_expr_to_runtime_type_impl(std::vectortry_as(); const TypeDataNullable* o_nullable = original_type->try_as(); + // handle `never` + // it may occur due to smart cast and in unreachable branches + // we can't do anything reasonable here, but (hopefully) execution will never reach this point, and stack won't be polluted + if (original_type == TypeDataNever::create()) { + std::vector dummy_rvect; + dummy_rvect.reserve(target_w); + for (int i = 0; i < target_w; ++i) { + dummy_rvect.push_back(code.create_tmp_var(TypeDataUnknown::create(), loc, "(never)")[0]); + } + return dummy_rvect; + } + if (target_type == TypeDataNever::create()) { + return {}; + } + // pass `null` to `T?` // for primitives like `int?`, no changes in rvect, null occupies the same TVM slot // for tensors like `(int,int)?`, `null` is represented as N nulls + 1 null flag, insert N nulls @@ -493,6 +508,8 @@ static std::vector transition_expr_to_runtime_type_impl(std::vectorcan_rhs_be_assigned(target_type)) { tolk_assert(o_nullable || original_type == TypeDataUnknown::create()); if (o_nullable && !o_nullable->is_primitive_nullable()) { @@ -502,10 +519,12 @@ static std::vector transition_expr_to_runtime_type_impl(std::vectoris_primitive_nullable()) { rvect.pop_back(); @@ -572,6 +591,17 @@ static std::vector transition_to_target_type(std::vector&& return rvect; } +// the second overload of the same function, invoke impl only when original and target differ +#ifndef TOLK_DEBUG +GNU_ATTRIBUTE_ALWAYS_INLINE +#endif +static std::vector transition_to_target_type(std::vector&& rvect, CodeBlob& code, TypePtr original_type, TypePtr target_type, SrcLocation loc) { + if (target_type != original_type) { + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, original_type, target_type, loc); + } + return rvect; +} + std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { if (GlobalVarPtr glob_ref = sym->try_as()) { @@ -617,20 +647,33 @@ std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, Co static std::vector process_reference(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { std::vector rvect = pre_compile_symbol(v->loc, v->sym, code, lval_ctx); + + // a local variable might be smart cast at this point, for example we're in `if (v != null)` + // it means that we must drop the null flag (if it's a tensor), or maybe perform other stack transformations + // (from original var_ref->ir_idx to fit smart cast) + if (LocalVarPtr var_ref = v->sym->try_as()) { + // note, inside `if (v != null)` when `v` is used for writing, v->inferred_type is an original (declared_type) + // (smart casts apply only for rvalue, not for lvalue, we don't check it here, it's a property of inferring) + rvect = transition_to_target_type(std::move(rvect), code, var_ref->declared_type, v->inferred_type, v->loc); + } + return transition_to_target_type(std::move(rvect), code, target_type, v); } static std::vector process_assignment(V v, CodeBlob& code, TypePtr target_type) { - if (auto lhs_decl = v->get_lhs()->try_as()) { - std::vector rvect = pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + + if (auto lhs_decl = lhs->try_as()) { + std::vector rvect = pre_compile_let(code, lhs_decl->get_expr(), rhs, v->loc); return transition_to_target_type(std::move(rvect), code, target_type, v); } else { - std::vector rvect = pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); + std::vector rvect = pre_compile_let(code, lhs, rhs, v->loc); // now rvect contains rhs IR vars constructed to fit lhs (for correct assignment, lhs type was target_type for rhs) // but the type of `lhs = rhs` is RHS (see type inferring), so rvect now should fit rhs->inferred_type (= v->inferred_type) // example: `t1 = t2 = null`, we're at `t2 = null`, earlier declared t1: `int?`, t2: `(int,int)?` // currently "null" matches t2 (3 null slots), but type of this assignment is "plain null" (1 slot) assigned later to t1 - rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, v->get_lhs()->inferred_type, v->inferred_type, v->loc); + rvect = transition_to_target_type(std::move(rvect), code, lhs->inferred_type, v->inferred_type, v->loc); return transition_to_target_type(std::move(rvect), code, target_type, v); } } @@ -692,13 +735,21 @@ static std::vector process_ternary_operator(V v std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); tolk_assert(cond.size() == 1); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); - Op& if_op = code.emplace_back(v->loc, Op::_If, cond); - code.push_set_cur(if_op.block0); - code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code, v->inferred_type)); - code.close_pop_cur(v->get_when_true()->loc); - code.push_set_cur(if_op.block1); - code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code, v->inferred_type)); - code.close_pop_cur(v->get_when_false()->loc); + + if (v->get_cond()->is_always_true) { + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code, v->inferred_type)); + } else if (v->get_cond()->is_always_false) { + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code, v->inferred_type)); + } else { + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code, v->inferred_type)); + code.close_pop_cur(v->get_when_true()->loc); + code.push_set_cur(if_op.block1); + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code, v->inferred_type)); + code.close_pop_cur(v->get_when_false()->loc); + } + return transition_to_target_type(std::move(rvect), code, target_type, v); } @@ -768,6 +819,10 @@ static std::vector process_dot_access(V v, CodeBlob& stack_offset += t_tensor->items[i]->get_width_on_stack(); } std::vector rvect{lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width}; + // a tensor index might be smart cast at this point, for example we're in `if (t.1 != null)` + // it means that we must drop the null flag (if `t.1` is a tensor), or maybe perform other stack transformations + // (from original rvect = (vars of t.1) to fit smart cast) + rvect = transition_to_target_type(std::move(rvect), code, t_tensor->items[index_at], v->inferred_type, v->loc); return transition_to_target_type(std::move(rvect), code, target_type, v); } // `tupleVar.0` @@ -1090,8 +1145,19 @@ static void process_repeat_statement(V v, CodeBlob& code) } static void process_if_statement(V v, CodeBlob& code) { - std::vector tmp_vars = pre_compile_expr(v->get_cond(), code, nullptr); - Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars)); + std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); + tolk_assert(cond.size() == 1); + + if (v->get_cond()->is_always_true) { + process_any_statement(v->get_if_body(), code); // v->is_ifnot does not matter here + return; + } + if (v->get_cond()->is_always_false) { + process_any_statement(v->get_else_body(), code); + return; + } + + Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(cond)); code.push_set_cur(if_op.block0); process_any_statement(v->get_if_body(), code); code.close_pop_cur(v->get_if_body()->loc_end); @@ -1192,6 +1258,10 @@ static void process_return_statement(V v, CodeBlob& code) code.emplace_back(v->loc, Op::_Return, std::move(return_vars)); } +// append "return" (void) to the end of the function +// if it's not reachable, it will be dropped +// (IR cfg reachability may differ from FlowContext in case of "never" types, so there may be situations, +// when IR will consider this "return" reachable and leave it, but actually execution will never reach it) static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code) { std::vector mutated_vars; if (code.fun_ref->has_mutate_params()) { @@ -1256,9 +1326,7 @@ static void convert_function_body_to_CodeBlob(FunctionPtr fun_ref, FunctionBodyC for (AnyV item : v_body->get_items()) { process_any_statement(item, *blob); } - if (fun_ref->is_implicit_return()) { - append_implicit_return_statement(v_body->loc_end, *blob); - } + append_implicit_return_statement(v_body->loc_end, *blob); blob->close_blk(v_body->loc_end); code_body->set_code(blob); diff --git a/tolk/pipe-check-inferred-types.cpp b/tolk/pipe-check-inferred-types.cpp new file mode 100644 index 00000000..bae67c5f --- /dev/null +++ b/tolk/pipe-check-inferred-types.cpp @@ -0,0 +1,586 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" +#include "type-system.h" + +namespace tolk { + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(TypePtr type) { + return "`" + type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(AnyExprV v_with_type) { + return "`" + v_with_type->inferred_type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string expression_as_string(AnyExprV v) { + if (auto v_ref = v->try_as()) { + if (v_ref->sym->try_as() || v_ref->sym->try_as()) { + return "variable `" + static_cast(v_ref->get_identifier()->name) + "`"; + } + } + if (auto v_par = v->try_as()) { + return expression_as_string(v_par->get_expr()); + } + return "expression"; +} + +// fire a general "type mismatch" error, just a wrapper over `throw` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire(FunctionPtr cur_f, SrcLocation loc, const std::string& message) { + throw ParseError(cur_f, loc, message); +} + +// fire an error on `!cell` / `+slice` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(FunctionPtr cur_f, SrcLocation loc, std::string_view operator_name, AnyExprV unary_expr) { + std::string op = static_cast(operator_name); + fire(cur_f, loc, "can not apply operator `" + op + "` to " + to_string(unary_expr->inferred_type)); +} + +// fire an error on `int + cell` / `slice & int` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(FunctionPtr cur_f, SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { + std::string op = static_cast(operator_name); + fire(cur_f, loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type)); +} + +GNU_ATTRIBUTE_NOINLINE +static void warning_condition_always_true_or_false(FunctionPtr cur_f, SrcLocation loc, AnyExprV cond, const char* operator_name) { + loc.show_warning("condition of " + static_cast(operator_name) + " is always " + (cond->is_always_true ? "true" : "false")); +} + +// given `f(x: int)` and a call `f(expr)`, check that expr_type is assignable to `int` +static void check_function_argument_passed(FunctionPtr cur_f, TypePtr param_type, AnyExprV ith_arg, bool is_obj_of_dot_call) { + if (!param_type->can_rhs_be_assigned(ith_arg->inferred_type)) { + if (is_obj_of_dot_call) { + fire(cur_f, ith_arg->loc, "can not call method for " + to_string(param_type) + " with object of type " + to_string(ith_arg)); + } else { + fire(cur_f, ith_arg->loc, "can not pass " + to_string(ith_arg) + " to " + to_string(param_type)); + } + } +} + +// given `f(x: mutate int?)` and a call `f(expr)`, check that `int?` is assignable to expr_type +// (for instance, can't call `f(mutate intVal)`, since f can potentially assign null to it) +static void check_function_argument_mutate_back(FunctionPtr cur_f, TypePtr param_type, AnyExprV ith_arg, bool is_obj_of_dot_call) { + if (!ith_arg->inferred_type->can_rhs_be_assigned(param_type)) { + if (is_obj_of_dot_call) { + fire(cur_f, ith_arg->loc,"can not call method for mutate " + to_string(param_type) + " with object of type " + to_string(ith_arg) + ", because mutation is not type compatible"); + } else { + fire(cur_f, ith_arg->loc,"can not pass " + to_string(ith_arg) + " to mutate " + to_string(param_type) + ", because mutation is not type compatible"); + } + } +} + +// fire an error on `var n = null` +// technically it's correct, type of `n` is TypeDataNullLiteral, but it's not what the user wanted +// so, it's better to see an error on assignment, that later, on `n` usage and types mismatch +// (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_assign_always_null_to_variable(FunctionPtr cur_f, SrcLocation loc, LocalVarPtr assigned_var, bool is_assigned_null_literal) { + std::string var_name = assigned_var->name; + fire(cur_f, loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); +} + +// fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_put_non1_stack_width_arg_to_tuple(FunctionPtr cur_f, SrcLocation loc, TypePtr inferred_type) { + fire(cur_f, loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->get_width_on_stack()) + " stack slots in TVM, not 1"); +} + +// handle __expect_type(expr, "type") call +// this is used in compiler tests +GNU_ATTRIBUTE_NOINLINE GNU_ATTRIBUTE_COLD +static void handle_possible_compiler_internal_call(FunctionPtr cur_f, V v) { + FunctionPtr fun_ref = v->fun_maybe; + tolk_assert(fun_ref && fun_ref->is_builtin_function()); + + if (fun_ref->name == "__expect_type") { + tolk_assert(v->get_num_args() == 2); + TypePtr expected_type = parse_type_from_string(v->get_arg(1)->get_expr()->as()->str_val); + TypePtr expr_type = v->get_arg(0)->inferred_type; + if (expected_type != expr_type) { + fire(cur_f, v->loc, "__expect_type failed: expected " + to_string(expected_type) + ", got " + to_string(expr_type)); + } + } +} + +static bool expect_integer(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataInt::create(); +} + +static bool expect_boolean(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataBool::create(); +} + + +class CheckInferredTypesVisitor final : public ASTVisitorFunctionBody { + FunctionPtr cur_f = nullptr; // may be nullptr if checking `const a = ...` init_value + +protected: + void visit(V v) override { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + parent::visit(lhs); + parent::visit(rhs); + + // all operators (+=, etc.) can work for integers (if both sides are integers) + bool types_ok = expect_integer(lhs) && expect_integer(rhs); + // bitwise operators &= |= ^= are "overloaded" for booleans also (if both sides are booleans) + if (!types_ok && (v->tok == tok_set_bitwise_and || v->tok == tok_set_bitwise_or || v->tok == tok_set_bitwise_xor)) { + types_ok = expect_boolean(lhs) && expect_boolean(rhs); + } + // using += for other types (e.g. `tensorVar += tensorVar`) is not allowed + if (!types_ok) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + } + + void visit(V v) override { + AnyExprV rhs = v->get_rhs(); + parent::visit(rhs); + + switch (v->tok) { + case tok_logical_not: + if (!expect_integer(rhs) && !expect_boolean(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, rhs); + } + break; + default: + if (!expect_integer(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, rhs); + } + } + } + + void visit(V v) override { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + parent::visit(lhs); + parent::visit(rhs); + + switch (v->tok) { + // == != can compare both integers and booleans, (int == bool) is NOT allowed + // note, that `int?` and `int?` can't be compared, since Fift `EQUAL` works with integers only + // (if to allow `int?` in the future, `==` must be expressed in a complicated Fift code considering TVM NULL) + case tok_eq: + case tok_neq: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + if (lhs->inferred_type == rhs->inferred_type) { // compare slice with slice, int? with int? + fire(cur_f, v->loc, "type " + to_string(lhs) + " can not be compared with `== !=`"); + } else { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + } + break; + } + // < > can compare only strict integers + case tok_lt: + case tok_gt: + case tok_leq: + case tok_geq: + case tok_spaceship: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + break; + // & | ^ are "overloaded" both for integers and booleans, (int & bool) is NOT allowed + case tok_bitwise_and: + case tok_bitwise_or: + case tok_bitwise_xor: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + break; + } + // && || can work with integers and booleans, (int && bool) is allowed, (int16 && int32) also + case tok_logical_and: + case tok_logical_or: { + bool lhs_ok = expect_integer(lhs) || expect_boolean(lhs); + bool rhs_ok = expect_integer(rhs) || expect_boolean(rhs); + if (!lhs_ok || !rhs_ok) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + break; + } + // others are mathematical: + * ... + default: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + } + } + + void visit(V v) override { + parent::visit(v->get_expr()); + + if (!v->get_expr()->inferred_type->can_be_casted_with_as_operator(v->cast_to_type)) { + fire(cur_f, v->loc, "type " + to_string(v->get_expr()) + " can not be cast to " + to_string(v->cast_to_type)); + } + } + + void visit(V v) override { + parent::visit(v->get_expr()); + + if (v->get_expr()->inferred_type == TypeDataNullLiteral::create()) { + // operator `!` used for always-null (proven by smart casts, for example), it's an error + fire(cur_f, v->loc, "operator `!` used for always null expression"); + } + // if operator `!` used for non-nullable, probably a warning should be printed + } + + void visit(V v) override { + parent::visit(v->get_expr()); + + if ((v->is_always_true && !v->is_negated) || (v->is_always_false && v->is_negated)) { + v->loc.show_warning(expression_as_string(v->get_expr()) + " is always null, this condition is always " + (v->is_always_true ? "true" : "false")); + } + if ((v->is_always_false && !v->is_negated) || (v->is_always_true && v->is_negated)) { + v->loc.show_warning(expression_as_string(v->get_expr()) + " of type " + to_string(v->get_expr()) + " is always not null, this condition is always " + (v->is_always_true ? "true" : "false")); + } + } + + void visit(V v) override { + parent::visit(v); + + for (int i = 0; i < v->size(); ++i) { + AnyExprV item = v->get_item(i); + if (item->inferred_type->get_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(cur_f, v->get_item(i)->loc, item->inferred_type); + } + } + } + + void visit(V v) override { + parent::visit(v); + + TypePtr obj_type = v->get_obj()->inferred_type; + if (v->is_target_indexed_access()) { + if (obj_type->try_as() && v->inferred_type->get_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(cur_f, v->loc, v->inferred_type); + } + } + } + + void visit(V v) override { + parent::visit(v); // check against type mismatch inside nested arguments + + FunctionPtr fun_ref = v->fun_maybe; + if (!fun_ref) { + // `local_var(args)` and similar + const TypeDataFunCallable* f_callable = v->get_callee()->inferred_type->try_as(); + tolk_assert(f_callable && f_callable->params_size() == v->get_num_args()); + for (int i = 0; i < v->get_num_args(); ++i) { + auto arg_i = v->get_arg(i)->get_expr(); + TypePtr param_type = f_callable->params_types[i]; + if (!param_type->can_rhs_be_assigned(arg_i->inferred_type)) { + fire(cur_f, arg_i->loc, "can not pass " + to_string(arg_i) + " to " + to_string(param_type)); + } + } + return; + } + + // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) + int delta_self = 0; + AnyExprV dot_obj = nullptr; + if (auto v_dot = v->get_callee()->try_as()) { + delta_self = 1; + dot_obj = v_dot->get_obj(); + } + + if (dot_obj) { + const LocalVarData& param_0 = fun_ref->parameters[0]; + TypePtr param_type = param_0.declared_type; + check_function_argument_passed(cur_f, param_type, dot_obj, true); + if (param_0.is_mutate_parameter()) { + check_function_argument_mutate_back(cur_f, param_type, dot_obj, true); + } + } + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& param_i = fun_ref->parameters[delta_self + i]; + AnyExprV arg_i = v->get_arg(i)->get_expr(); + TypePtr param_type = param_i.declared_type; + check_function_argument_passed(cur_f, param_type, arg_i, false); + if (param_i.is_mutate_parameter()) { + check_function_argument_mutate_back(cur_f, param_type, arg_i, false); + } + } + + if (fun_ref->is_builtin_function() && fun_ref->name[0] == '_') { + handle_possible_compiler_internal_call(cur_f, v); + } + } + + void visit(V v) override { + parent::visit(v->get_lhs()); + parent::visit(v->get_rhs()); + + process_assignment_lhs(v->get_lhs(), v->get_rhs()->inferred_type, v->get_rhs()); + } + + // handle (and dig recursively) into `var lhs = rhs` + // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` + // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) + // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 + void process_assignment_lhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; + + // `var ... = rhs` - dig into left part + if (auto lhs_decl = lhs->try_as()) { + process_assignment_lhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); + return; + } + + // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") + if (auto lhs_var = lhs->try_as()) { + TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr) + if (lhs_var->marked_as_redef) { + tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type); + declared_type = lhs_var->var_ref->declared_type; + } + if (declared_type) { + if (!declared_type->can_rhs_be_assigned(rhs_type)) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); + } + } else { + if (rhs_type == TypeDataNullLiteral::create()) { + fire_error_assign_always_null_to_variable(cur_f, err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); + } + } + return; + } + + // `(v1, v2) = rhs` / `var (v1, v2) = rhs` (rhs may be `(1,2)` or `tensorVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tensor + if (auto lhs_tensor = lhs->try_as()) { + const TypeDataTensor* rhs_type_tensor = rhs_type->try_as(); + if (!rhs_type_tensor) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to a tensor"); + } + if (lhs_tensor->size() != rhs_type_tensor->size()) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + for (int i = 0; i < lhs_tensor->size(); ++i) { + process_assignment_lhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); + } + return; + } + + // `[v1, v2] = rhs` / `var [v1, v2] = rhs` (rhs may be `[1,2]` or `tupleVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tuple + if (auto lhs_tuple = lhs->try_as()) { + const TypeDataTypedTuple* rhs_type_tuple = rhs_type->try_as(); + if (!rhs_type_tuple) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to a tuple"); + } + if (lhs_tuple->size() != rhs_type_tuple->size()) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + for (int i = 0; i < lhs_tuple->size(); ++i) { + process_assignment_lhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); + } + return; + } + + // check `untypedTuple.0 = rhs_tensor` and other non-1 width elements + if (auto lhs_dot = lhs->try_as()) { + if (lhs_dot->is_target_indexed_access() && lhs_dot->get_obj()->inferred_type == TypeDataTuple::create()) { + if (rhs_type->get_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(cur_f, err_loc->loc, rhs_type); + } + } + } + + // here is `v = rhs` (just assignment, not `var v = rhs`) / `a.0 = rhs` / `getObj(z=f()).0 = rhs` etc. + // types were already inferred, so just check their compatibility + // for strange lhs like `f() = rhs` type checking will pass, but will fail lvalue check later + if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { + if (lhs->try_as()) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); + } else { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); + } + } + } + + void visit(V v) override { + parent::visit(v->get_return_value()); + + if (cur_f->does_return_self()) { + if (!is_expr_valid_as_return_self(v->get_return_value())) { + fire(cur_f, v->loc, "invalid return from `self` function"); + } + return; + } + + TypePtr expr_type = v->get_return_value()->inferred_type; + if (!cur_f->inferred_return_type->can_rhs_be_assigned(expr_type)) { + fire(cur_f, v->get_return_value()->loc, "can not convert type " + to_string(expr_type) + " to return type " + to_string(cur_f->inferred_return_type)); + } + } + + static bool is_expr_valid_as_return_self(AnyExprV return_expr) { + // `return self` + if (return_expr->type == ast_reference && return_expr->as()->get_name() == "self") { + return true; + } + // `return self.someMethod()` + if (auto v_call = return_expr->try_as(); v_call && v_call->is_dot_call()) { + return v_call->fun_maybe && v_call->fun_maybe->does_return_self() && is_expr_valid_as_return_self(v_call->get_dot_obj()); + } + // `return cond ? ... : ...` + if (auto v_ternary = return_expr->try_as()) { + return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); + } + return false; + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "ternary operator"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`if`"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond)) { + fire(cur_f, cond->loc, "condition of `repeat` must be an integer, got " + to_string(cond)); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`while`"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`do while`"); + } + } + + void visit(V v) override { + parent::visit(v); + + if (!expect_integer(v->get_thrown_code())) { + fire(cur_f, v->get_thrown_code()->loc, "excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); + } + if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->get_width_on_stack() != 1) { + fire(cur_f, v->get_thrown_arg()->loc, "can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + if (!expect_integer(v->get_thrown_code())) { + fire(cur_f, v->get_thrown_code()->loc, "thrown excNo of `assert` must be an integer, got " + to_string(v->get_thrown_code())); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`assert`"); + } + } + + void visit(V v) override { + parent::visit(v); + + if (v->first_unreachable) { + // it's essential to print "unreachable code" warning AFTER type checking + // (printing it while inferring might be a false positive if types are incorrect, due to smart casts for example) + // a more correct approach would be to access cfg here somehow, but since cfg is now available only while inferring, + // a special v->first_unreachable was set specifically for this warning (again, which is correct if types match) + v->first_unreachable->loc.show_warning("unreachable code"); + } + } + + public: + bool should_visit_function(FunctionPtr fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } + + void start_visiting_function(FunctionPtr fun_ref, V v_function) override { + cur_f = fun_ref; + parent::visit(v_function->get_body()); + cur_f = nullptr; + + if (fun_ref->is_implicit_return() && fun_ref->declared_return_type) { + if (!fun_ref->declared_return_type->can_rhs_be_assigned(TypeDataVoid::create()) || fun_ref->does_return_self()) { + fire(fun_ref, v_function->get_body()->as()->loc_end, "missing return"); + } + } + } +}; + +void pipeline_check_inferred_types() { + visit_ast_of_all_functions(); +} + +} // namespace tolk diff --git a/tolk/pipe-detect-unreachable.cpp b/tolk/pipe-detect-unreachable.cpp deleted file mode 100644 index 041e5581..00000000 --- a/tolk/pipe-detect-unreachable.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - This file is part of TON Blockchain source code. - - TON Blockchain is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - TON Blockchain is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with TON Blockchain. If not, see . -*/ -#include "tolk.h" -#include "ast.h" -#include "ast-visitor.h" - -/* - * This pipe does two things: - * 1) detects unreachable code and prints warnings about it - * example: `fun main() { if(1){return;}else{return;} var x = 0; }` — var is unreachable - * 2) if control flow reaches end of function, store a flag to insert an implicit return - * example: `fun main() { assert(...); }` — has an implicit `return ()` statement before a brace - * - * Note, that it does not delete unreachable code, only prints warnings. - * Actual deleting is done much later (in "legacy" part), after AST is converted to Op. - * - * Note, that it's not CFG, it's just a shallow reachability detection. - * In the future, a true CFG should be introduced. For instance, in order to have nullable types, - * I'll need to implement smart casts. Then I'll think of a complicated granular control flow graph, - * considering data flow and exceptions (built before type inferring, of course), - * and detecting unreachable code will be a part of it. - */ - -namespace tolk { - -class UnreachableStatementsDetectVisitor final { - bool always_returns(AnyV v) { - switch (v->type) { - case ast_sequence: return always_returns(v->as()); - case ast_return_statement: return always_returns(v->as()); - case ast_throw_statement: return always_returns(v->as()); - case ast_function_call: return always_returns(v->as()); - case ast_repeat_statement: return always_returns(v->as()); - case ast_while_statement: return always_returns(v->as()); - case ast_do_while_statement: return always_returns(v->as()); - case ast_try_catch_statement: return always_returns(v->as()); - case ast_if_statement: return always_returns(v->as()); - default: - // unhandled statements (like assert) and statement expressions - return false; - } - } - - bool always_returns(V v) { - bool always = false; - for (AnyV item : v->get_items()) { - if (always && item->type != ast_empty_statement) { - item->loc.show_warning("unreachable code"); - break; - } - always |= always_returns(item); - } - return always; - } - - static bool always_returns([[maybe_unused]] V v) { - // quite obvious: `return expr` interrupts control flow - return true; - } - - static bool always_returns([[maybe_unused]] V v) { - // todo `throw excNo` currently does not interrupt control flow - // (in other words, `throw 1; something` - something is reachable) - // the reason is that internally it's transformed to a call of built-in function __throw(), - // which is a regular function, like __throw_if() or loadInt() - // to fix this later on, it should be deeper, introducing Op::_Throw for example, - // to make intermediate representations and stack optimizer also be aware that after it there is unreachable - return false; - } - - static bool always_returns([[maybe_unused]] V v) { - // neither annotations like @noreturn nor auto-detection of always-throwing functions also doesn't exist - // in order to do this in the future, it should be handled not only at AST/CFG level, - // but inside Op and low-level optimizer (at least if reachability detection is not moved out of there) - // see comments for `throw` above, similar to this case - return false; - } - - bool always_returns(V v) { - return always_returns(v->get_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_try_body()) && always_returns(v->get_catch_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_if_body()) && always_returns(v->get_else_body()); - } - -public: - static bool should_visit_function(FunctionPtr fun_ref) { - return fun_ref->is_code_function() && !fun_ref->is_generic_function(); - } - - void start_visiting_function(FunctionPtr fun_ref, V v_function) { - bool control_flow_reaches_end = !always_returns(v_function->get_body()->as()); - if (control_flow_reaches_end) { - fun_ref->mutate()->assign_is_implicit_return(); - } - } -}; - - -void pipeline_detect_unreachable_statements() { - visit_ast_of_all_functions(); -} - -void pipeline_detect_unreachable_statements(FunctionPtr fun_ref) { - UnreachableStatementsDetectVisitor visitor; - if (UnreachableStatementsDetectVisitor::should_visit_function(fun_ref)) { - visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); - } -} - -} // namespace tolk diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index 2f4290d6..7ab0aa1c 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -20,20 +20,22 @@ #include "ast-visitor.h" #include "generics-helpers.h" #include "type-system.h" +#include "smart-casts-cfg.h" /* * This is a complicated and crucial part of the pipeline. It simultaneously does the following: * * infers types of all expressions; example: `2 + 3` both are TypeDataInt, result is also - * * AND checks types for assignment, arguments passing, etc.; example: `fInt(cs)` is error passing slice to int * * AND binds function/method calls (assigns fun_ref); example: `globalF()`, fun_ref is assigned to `globalF` (unless generic) * * AND instantiates generic functions; example: `t.tuplePush(2)` creates `tuplePush` and assigns fun_ref to dot field * * AND infers return type of functions if it's omitted (`fun f() { ... }` means "auto infer", not "void") + * * AND builds data flow graph, mostly used for smart casts (right at the time of inferring) + * Note, that type checking (errors about types mismatch) is a later compilation step, due to loops. * * It's important to do all these parts simultaneously, they can't be split or separated. * For example, we can't bind `f(2)` earlier, because if `f` is a generic `f`, we should instantiate it, * and in order to do it, we need to know argument types. - * For example, we can't bind `c.cellHash()` earlier, because in the future we'll have overloads (`cell.hash()` and `slice.hash()`), - * and in order to bind it, we need to know object type. + * For example, we can't bind `c.cellHash()` earlier, because in order to bind it, we need to know object type. + * For example, we can't infer `var y = x` without smart casts, because if x's type is refined, it affects y. * And vice versa, to infer type of expression in the middle, we need to have inferred all expressions preceding it, * which may also include generics, etc. * @@ -52,6 +54,36 @@ * Example: `fun tupleAt(t: tuple, idx: int):T`, just `t.tupleGet(2)` can't be deduced (T left unspecified), * but for assignment with left-defined type, or a call to `fInt(t.tupleGet(2))` hint "int" helps deduce T. * + * Control flow is represented NOT as a "graph with edges". Instead, it's a "structured DFS" for the AST: + * 1) at every point of inferring, we have "current flow facts" (FlowContext) + * 2) when we see an `if (...)`, we create two derived contexts (by cloning current) + * 3) after `if`, finalize them at the end and unify + * 4) if we detect unreachable code, we mark that path's context as "unreachable" + * In other words, we get the effect of a CFG but in a more direct approach. That's enough for AST-level data-flow. + * FlowContext contains "data-flow facts that are definitely known". + * // current facts: x is int?, t is (int, int) + * if (x != null && t.0 > 0) + * // current facts: x is int, t is (int, int), t.0 is positive + * else + * // current facts: x is null, t is (int, int), t.0 is not positive + * When branches rejoin, facts are merged back (int+null = int? and so on, here they would be equal to before if). + * See smart-casts-cfg.cpp for detailed comments. + * + * About loops and partial re-entering. Consider the following: + * var x: int? = 5; + * // <- here x is `int` (smart cast) + * while (true) { + * // <- but here x is `int?` (not `int`) due to assignment in a loop + * if (...) { x = getNullableInt(); } + * } + * When building control flow, loops are inferred twice. In the above, at first iteration, x will be `int`, + * but at the second, x will be `int?` (after merged with loop end). + * That's why type checking is done later, not to make false errors on the first iteration. + * Note, that it would also be better to postpone generics "materialization" also: here only to infer type arguments, + * but to instantiate and re-assign fun_ref later. But it complicates the architecture significantly. + * For now, generics may encounter problems within loops on first iteration, though it's unlikely to face this + * in practice. (example: in the loop above, `genericFn(x)` will at first instantiate and then ) + * * Unlike other pipes, inferring can dig recursively on demand. * Example: * fun getInt() { return 1; } @@ -87,183 +119,27 @@ static std::string to_string(FunctionPtr fun_ref) { return "`" + fun_ref->as_human_readable() + "`"; } +// fire a general error, just a wrapper over `throw` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire(FunctionPtr cur_f, SrcLocation loc, const std::string& message) { + throw ParseError(cur_f, loc, message); +} + // fire an error when `fun f(...) asm ...` is called with T=(int,int) or other non-1 width on stack // asm functions generally can't handle it, they expect T to be a TVM primitive // (in FunC, `forall` type just couldn't be unified with non-primitives; in Tolk, generic T is expectedly inferred) GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_calling_asm_function_with_non1_stack_width_arg(SrcLocation loc, FunctionPtr fun_ref, const std::vector& substitutions, int arg_idx) { - throw ParseError(loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->get_width_on_stack()) + " stack slots in TVM, not 1"); -} - -// fire an error on `var n = null` -// technically it's correct, type of `n` is TypeDataNullLiteral, but it's not what the user wanted -// so, it's better to see an error on assignment, that later, on `n` usage and types mismatch -// (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_assign_always_null_to_variable(SrcLocation loc, LocalVarPtr assigned_var, bool is_assigned_null_literal) { - std::string var_name = assigned_var->name; - throw ParseError(loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); -} - -// fire an error on `!cell` / `+slice` -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV unary_expr) { - std::string op = static_cast(operator_name); - throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(unary_expr->inferred_type)); -} - -// fire an error on `int + cell` / `slice & int` -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { - std::string op = static_cast(operator_name); - throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type)); +static void fire_error_calling_asm_function_with_non1_stack_width_arg(FunctionPtr cur_f, SrcLocation loc, FunctionPtr fun_ref, const std::vector& substitutions, int arg_idx) { + fire(cur_f, loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->get_width_on_stack()) + " stack slots in TVM, not 1"); } // fire an error on `untypedTupleVar.0` when used without a hint GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_deduce_untyped_tuple_access(SrcLocation loc, int index) { +static void fire_error_cannot_deduce_untyped_tuple_access(FunctionPtr cur_f, SrcLocation loc, int index) { std::string idx_access = "." + std::to_string(index); - throw ParseError(loc, "can not deduce type of `" + idx_access + "`; either assign it to variable like `var c: int = " + idx_access + "` or cast the result like `" + idx_access + " as int`"); + fire(cur_f, loc, "can not deduce type of `" + idx_access + "`; either assign it to variable like `var c: int = " + idx_access + "` or cast the result like `" + idx_access + " as int`"); } -// fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_tuple_cannot_have_non1_stack_width_elem(SrcLocation loc, TypePtr inferred_type) { - throw ParseError(loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->get_width_on_stack()) + " stack slots in TVM, not 1"); -} - -// check type correctness of a passed argument when calling a function/method -static void check_function_argument(TypePtr param_type, bool is_mutate_param, AnyExprV ith_arg, bool is_obj_of_dot_call) { - // given `f(x: int)` and a call `f(expr)`, check that expr_type is assignable to `int` - if (!param_type->can_rhs_be_assigned(ith_arg->inferred_type)) { - if (is_obj_of_dot_call) { - ith_arg->error("can not call method for " + to_string(param_type) + " with object of type " + to_string(ith_arg)); - } else { - ith_arg->error("can not pass " + to_string(ith_arg) + " to " + to_string(param_type)); - } - } - // given `f(x: mutate int?)` and a call `f(expr)`, check that `int?` is assignable to expr_type - // (for instance, can't call such a function with `f(mutate intVal)`, since f can potentially assign null to it) - if (is_mutate_param && !ith_arg->inferred_type->can_rhs_be_assigned(param_type)) { - if (is_obj_of_dot_call) { - ith_arg->error("can not call method for mutate " + to_string(param_type) + " with object of type " + to_string(ith_arg) + ", because mutation is not type compatible"); - } else { - ith_arg->error("can not pass " + to_string(ith_arg) + " to mutate " + to_string(param_type) + ", because mutation is not type compatible"); - } - } -} - -/* - * TypeInferringUnifyStrategy unifies types from various branches to a common result (lca). - * It's used to auto infer function return type based on return statements, like in TypeScript. - * Example: `fun f() { ... return 1; ... return null; }` inferred as `int`. - * - * Besides function returns, it's also useful for ternary `return cond ? 1 : null` and `match` expression. - * If types can't be unified (a function returns int and cell, for example), `unify()` returns false, handled outside. - * BTW, don't confuse this way of inferring with Hindley-Milner, they have nothing in common. - */ -class TypeInferringUnifyStrategy { - TypePtr unified_result = nullptr; - - static TypePtr calculate_type_lca(TypePtr t1, TypePtr t2) { - if (t1 == t2) { - return t1; - } - if (t1->can_rhs_be_assigned(t2)) { - return t1; - } - if (t2->can_rhs_be_assigned(t1)) { - return t2; - } - - if (t1 == TypeDataNullLiteral::create()) { - return TypeDataNullable::create(t2); - } - if (t2 == TypeDataNullLiteral::create()) { - return TypeDataNullable::create(t1); - } - - const auto* tensor1 = t1->try_as(); - const auto* tensor2 = t2->try_as(); - if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { - std::vector types_lca; - types_lca.reserve(tensor1->size()); - for (int i = 0; i < tensor1->size(); ++i) { - TypePtr next = calculate_type_lca(tensor1->items[i], tensor2->items[i]); - if (next == nullptr) { - return nullptr; - } - types_lca.push_back(next); - } - return TypeDataTensor::create(std::move(types_lca)); - } - - const auto* tuple1 = t1->try_as(); - const auto* tuple2 = t2->try_as(); - if (tuple1 && tuple2 && tuple1->size() == tuple2->size()) { - std::vector types_lca; - types_lca.reserve(tuple1->size()); - for (int i = 0; i < tuple1->size(); ++i) { - TypePtr next = calculate_type_lca(tuple1->items[i], tuple2->items[i]); - if (next == nullptr) { - return nullptr; - } - types_lca.push_back(next); - } - return TypeDataTypedTuple::create(std::move(types_lca)); - } - - return nullptr; - } - -public: - bool unify_with(TypePtr next) { - if (unified_result == nullptr) { - unified_result = next; - return true; - } - if (unified_result == next) { - return true; - } - - TypePtr combined = calculate_type_lca(unified_result, next); - if (!combined) { - return false; - } - - unified_result = combined; - return true; - } - - bool unify_with_implicit_return_void() { - if (unified_result == nullptr) { - unified_result = TypeDataVoid::create(); - return true; - } - - return unified_result == TypeDataVoid::create(); - } - - TypePtr get_result() const { return unified_result; } -}; - -// handle __expect_type(expr, "type") call -// this is used in compiler tests -GNU_ATTRIBUTE_NOINLINE GNU_ATTRIBUTE_COLD -static void handle_possible_compiler_internal_call(FunctionPtr current_function, V v) { - FunctionPtr fun_ref = v->fun_maybe; - tolk_assert(fun_ref && fun_ref->is_builtin_function()); - static_cast(current_function); - - if (fun_ref->name == "__expect_type") { - tolk_assert(v->get_num_args() == 2); - TypePtr expected_type = parse_type_from_string(v->get_arg(1)->get_expr()->as()->str_val); - TypePtr expr_type = v->get_arg(0)->inferred_type; - if (expected_type != expr_type) { - v->error("__expect_type failed: expected " + to_string(expected_type) + ", got " + to_string(expr_type)); - } - } -} /* * This class handles all types of AST vertices and traverses them, filling all AnyExprV::inferred_type. @@ -272,9 +148,9 @@ static void handle_possible_compiler_internal_call(FunctionPtr current_function, * 1) when a new AST node type is introduced, I want it to fail here, not to be left un-inferred with UB at next steps * 2) easy to maintain a hint (see comments at the top of the file) */ -class InferCheckTypesAndCallsAndFieldsVisitor final { - FunctionPtr current_function = nullptr; - TypeInferringUnifyStrategy return_unifier; +class InferTypesAndCallsAndFieldsVisitor final { + FunctionPtr cur_f = nullptr; + std::vector return_statements; GNU_ATTRIBUTE_ALWAYS_INLINE static void assign_inferred_type(AnyExprV dst, AnyExprV src) { @@ -307,130 +183,132 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } // traverse children in any statement - void process_any_statement(AnyV v) { + FlowContext process_any_statement(AnyV v, FlowContext&& flow) { switch (v->type) { case ast_sequence: - return process_sequence(v->as()); + return process_sequence(v->as(), std::move(flow)); case ast_return_statement: - return process_return_statement(v->as()); + return process_return_statement(v->as(), std::move(flow)); case ast_if_statement: - return process_if_statement(v->as()); + return process_if_statement(v->as(), std::move(flow)); case ast_repeat_statement: - return process_repeat_statement(v->as()); + return process_repeat_statement(v->as(), std::move(flow)); case ast_while_statement: - return process_while_statement(v->as()); + return process_while_statement(v->as(), std::move(flow)); case ast_do_while_statement: - return process_do_while_statement(v->as()); + return process_do_while_statement(v->as(), std::move(flow)); case ast_throw_statement: - return process_throw_statement(v->as()); + return process_throw_statement(v->as(), std::move(flow)); case ast_assert_statement: - return process_assert_statement(v->as()); + return process_assert_statement(v->as(), std::move(flow)); case ast_try_catch_statement: - return process_try_catch_statement(v->as()); + return process_try_catch_statement(v->as(), std::move(flow)); case ast_empty_statement: - return; + return flow; default: - infer_any_expr(reinterpret_cast(v)); + return process_expression_statement(reinterpret_cast(v), std::move(flow)); } } // assigns inferred_type for any expression (by calling assign_inferred_type) - void infer_any_expr(AnyExprV v, TypePtr hint = nullptr) { + // returns ExprFlow: out_facts that are "definitely known" after evaluating the whole expression + // if used_as_condition, true_facts/false_facts are also calculated (don't calculate them always for optimization) + ExprFlow infer_any_expr(AnyExprV v, FlowContext&& flow, bool used_as_condition, TypePtr hint = nullptr) { switch (v->type) { case ast_int_const: - return infer_int_const(v->as()); + return infer_int_const(v->as(), std::move(flow), used_as_condition); case ast_string_const: - return infer_string_const(v->as()); + return infer_string_const(v->as(), std::move(flow), used_as_condition); case ast_bool_const: - return infer_bool_const(v->as()); + return infer_bool_const(v->as(), std::move(flow), used_as_condition); case ast_local_vars_declaration: - return infer_local_vars_declaration(v->as()); + return infer_local_vars_declaration(v->as(), std::move(flow), used_as_condition); case ast_local_var_lhs: - return infer_local_var_lhs(v->as()); + return infer_local_var_lhs(v->as(), std::move(flow), used_as_condition); case ast_assign: - return infer_assignment(v->as()); + return infer_assignment(v->as(), std::move(flow), used_as_condition); case ast_set_assign: - return infer_set_assign(v->as()); + return infer_set_assign(v->as(), std::move(flow), used_as_condition); case ast_unary_operator: - return infer_unary_operator(v->as()); + return infer_unary_operator(v->as(), std::move(flow), used_as_condition); case ast_binary_operator: - return infer_binary_operator(v->as()); + return infer_binary_operator(v->as(), std::move(flow), used_as_condition); case ast_ternary_operator: - return infer_ternary_operator(v->as(), hint); + return infer_ternary_operator(v->as(), std::move(flow), used_as_condition, hint); case ast_cast_as_operator: - return infer_cast_as_operator(v->as()); + return infer_cast_as_operator(v->as(), std::move(flow), used_as_condition); case ast_not_null_operator: - return infer_not_null_operator(v->as()); + return infer_not_null_operator(v->as(), std::move(flow), used_as_condition); case ast_is_null_check: - return infer_is_null_check(v->as()); + return infer_is_null_check(v->as(), std::move(flow), used_as_condition); case ast_parenthesized_expression: - return infer_parenthesized(v->as(), hint); + return infer_parenthesized(v->as(), std::move(flow), used_as_condition, hint); case ast_reference: - return infer_reference(v->as()); + return infer_reference(v->as(), std::move(flow), used_as_condition); case ast_dot_access: - return infer_dot_access(v->as(), hint); + return infer_dot_access(v->as(), std::move(flow), used_as_condition, hint); case ast_function_call: - return infer_function_call(v->as(), hint); + return infer_function_call(v->as(), std::move(flow), used_as_condition, hint); case ast_tensor: - return infer_tensor(v->as(), hint); + return infer_tensor(v->as(), std::move(flow), used_as_condition, hint); case ast_typed_tuple: - return infer_typed_tuple(v->as(), hint); + return infer_typed_tuple(v->as(), std::move(flow), used_as_condition, hint); case ast_null_keyword: - return infer_null_keyword(v->as()); + return infer_null_keyword(v->as(), std::move(flow), used_as_condition); case ast_underscore: - return infer_underscore(v->as(), hint); + return infer_underscore(v->as(), std::move(flow), used_as_condition, hint); case ast_empty_expression: - return infer_empty_expression(v->as()); + return infer_empty_expression(v->as(), std::move(flow), used_as_condition); default: throw UnexpectedASTNodeType(v, "infer_any_expr"); } } - static TypePtr unwrap_nullable(TypePtr type) { - while (const TypeDataNullable* as_nullable = type->try_as()) { - type = as_nullable->inner; - } - return type; - } - - static bool expect_integer(AnyExprV v_inferred) { - return v_inferred->inferred_type == TypeDataInt::create(); - } - - static bool expect_integer(TypePtr inferred_type) { - return inferred_type == TypeDataInt::create(); - } - - static bool expect_boolean(AnyExprV v_inferred) { - return v_inferred->inferred_type == TypeDataBool::create(); - } - - static bool expect_boolean(TypePtr inferred_type) { - return inferred_type == TypeDataBool::create(); - } - - static void infer_int_const(V v) { + static ExprFlow infer_int_const(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataInt::create()); + + ExprFlow after_v(std::move(flow), used_as_condition); + if (used_as_condition) { // `if (0)` always false + if (v->intval == 0) { + after_v.true_flow.mark_unreachable(UnreachableKind::CantHappen); + } else { + after_v.false_flow.mark_unreachable(UnreachableKind::CantHappen); + } + } + return after_v; } - static void infer_string_const(V v) { + static ExprFlow infer_string_const(V v, FlowContext&& flow, bool used_as_condition) { if (v->is_bitslice()) { assign_inferred_type(v, TypeDataSlice::create()); } else { assign_inferred_type(v, TypeDataInt::create()); } + + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_bool_const(V v) { + static ExprFlow infer_bool_const(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataBool::create()); + + ExprFlow after_v(std::move(flow), used_as_condition); + if (used_as_condition) { // `if (false)` always false + if (v->bool_val == false) { + after_v.true_flow.mark_unreachable(UnreachableKind::CantHappen); + } else { + after_v.false_flow.mark_unreachable(UnreachableKind::CantHappen); + } + } + return after_v; } - void infer_local_vars_declaration(V v) { - infer_any_expr(v->get_expr()); + ExprFlow infer_local_vars_declaration(V v, FlowContext&& flow, bool used_as_condition) { + flow = infer_any_expr(v->get_expr(), std::move(flow), used_as_condition).out_flow; assign_inferred_type(v, v->get_expr()); + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_local_var_lhs(V v) { + static ExprFlow infer_local_var_lhs(V v, FlowContext&& flow, bool used_as_condition) { // `var v = rhs`, inferring is called for `v` // at the moment of inferring left side of assignment, we don't know type of rhs (since lhs is executed first) // so, mark `v` as unknown @@ -440,59 +318,87 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } else { assign_inferred_type(v, v->declared_type ? v->declared_type : TypeDataUnknown::create()); } + return ExprFlow(std::move(flow), used_as_condition); } - void infer_assignment(V v) { + ExprFlow infer_assignment(V v, FlowContext&& flow, bool used_as_condition) { // v is assignment: `x = 5` / `var x = 5` / `var x: slice = 5` / `(cs,_) = f()` / `val (a,[b],_) = (a,t,0)` // execution flow is: lhs first, rhs second (at IR generation, also lhs is evaluated first, unlike FunC) // after inferring lhs, use it for hint when inferring rhs // example: `var i: int = t.tupleAt(0)` is ok (hint=int, T=int), but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(lhs); - infer_any_expr(rhs, lhs->inferred_type); - process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, rhs); + flow = infer_left_side_of_assignment(lhs, std::move(flow)); + flow = infer_any_expr(rhs, std::move(flow), false, lhs->inferred_type).out_flow; + process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, flow); assign_inferred_type(v, rhs); // note, that the resulting type is rhs, not lhs + + return ExprFlow(std::move(flow), used_as_condition); + } + + // for `v = rhs` (NOT `var v = lhs`), variable `v` may be smart cast at this point + // the purpose of this function is to drop smart casts from expressions used as left side of assignments + // another example: `x.0 = rhs`, smart cast is dropped for `x.0` (not for `x`) + // the goal of dropping smart casts is to have lhs->inferred_type as actually declared, used as hint to infer rhs + FlowContext infer_left_side_of_assignment(AnyExprV lhs, FlowContext&& flow) { + if (auto lhs_tensor = lhs->try_as()) { + std::vector types_list; + types_list.reserve(lhs_tensor->size()); + for (int i = 0; i < lhs_tensor->size(); ++i) { + flow = infer_left_side_of_assignment(lhs_tensor->get_item(i), std::move(flow)); + types_list.push_back(lhs_tensor->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); + + } else if (auto lhs_tuple = lhs->try_as()) { + std::vector types_list; + types_list.reserve(lhs_tuple->size()); + for (int i = 0; i < lhs_tuple->size(); ++i) { + flow = infer_left_side_of_assignment(lhs_tuple->get_item(i), std::move(flow)); + types_list.push_back(lhs_tuple->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); + + } else if (auto lhs_par = lhs->try_as()) { + flow = infer_left_side_of_assignment(lhs_par->get_expr(), std::move(flow)); + assign_inferred_type(lhs, lhs_par->get_expr()->inferred_type); + + } else { + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + if (extract_sink_expression_from_vertex(lhs)) { + TypePtr lhs_declared_type = calc_declared_type_before_smart_cast(lhs); + assign_inferred_type(lhs, lhs_declared_type); + } + } + + return flow; } // handle (and dig recursively) into `var lhs = rhs` // at this point, both lhs and rhs are already inferred, but lhs newly-declared vars are unknown (unless have declared_type) // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` // the purpose is to update inferred_type of lhs vars (z, x, y) + // and to re-assign types of tensors/tuples inside: `var (x,[y]) = ...` was `(unknown,[unknown])`, becomes `(int,[int])` // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 - static void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + static void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, FlowContext& out_flow) { tolk_assert(lhs->inferred_type != nullptr); - AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; // `var ... = rhs` - dig into left part if (auto lhs_decl = lhs->try_as()) { - process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); + process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, out_flow); return; } // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") if (auto lhs_var = lhs->try_as()) { - if (lhs_var->inferred_type != TypeDataUnknown::create()) { // it's `var v: int` or redef - TypePtr declared_type = lhs_var->inferred_type; - if (!declared_type->can_rhs_be_assigned(rhs_type)) { - err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); - } - } else { - if (rhs_type == TypeDataNullLiteral::create()) { - fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); - } + TypePtr declared_type = lhs_var->marked_as_redef ? lhs_var->var_ref->declared_type : lhs_var->declared_type; + if (lhs_var->inferred_type == TypeDataUnknown::create()) { assign_inferred_type(lhs_var, rhs_type); assign_inferred_type(lhs_var->var_ref, rhs_type); } - return; - } - - // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) - if (lhs->try_as()) { - if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { - err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); - } + TypePtr smart_casted_type = declared_type ? calc_smart_cast_type_on_assignment(declared_type, rhs_type) : rhs_type; + out_flow.register_known_type(SinkExpression(lhs_var->var_ref), smart_casted_type); return; } @@ -500,16 +406,14 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // dig recursively into v1 and v2 with corresponding rhs i-th item of a tensor if (auto lhs_tensor = lhs->try_as()) { const TypeDataTensor* rhs_type_tensor = rhs_type->try_as(); - if (!rhs_type_tensor) { - err_loc->error("can not assign " + to_string(rhs_type) + " to a tensor"); - } - if (lhs_tensor->size() != rhs_type_tensor->size()) { - err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); - } - V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tensor->size()); for (int i = 0; i < lhs_tensor->size(); ++i) { - process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); + TypePtr ith_rhs_type = rhs_type_tensor && i < rhs_type_tensor->size() ? rhs_type_tensor->items[i] : TypeDataUnknown::create(); + process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), ith_rhs_type, out_flow); + types_list.push_back(lhs_tensor->get_item(i)->inferred_type); } + assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); return; } @@ -517,73 +421,57 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // dig recursively into v1 and v2 with corresponding rhs i-th item of a tuple if (auto lhs_tuple = lhs->try_as()) { const TypeDataTypedTuple* rhs_type_tuple = rhs_type->try_as(); - if (!rhs_type_tuple) { - err_loc->error("can not assign " + to_string(rhs_type) + " to a tuple"); - } - if (lhs_tuple->size() != rhs_type_tuple->size()) { - err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); - } - V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tuple->size()); for (int i = 0; i < lhs_tuple->size(); ++i) { - process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); + TypePtr ith_rhs_type = rhs_type_tuple && i < rhs_type_tuple->size() ? rhs_type_tuple->items[i] : TypeDataUnknown::create(); + process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), ith_rhs_type, out_flow); + types_list.push_back(lhs_tuple->get_item(i)->inferred_type); } + assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); return; } - // check `untypedTuple.0 = rhs_tensor` and other non-1 width elements - if (auto lhs_dot = lhs->try_as()) { - if (lhs_dot->is_target_indexed_access() && lhs_dot->get_obj()->inferred_type == TypeDataTuple::create()) { - if (rhs_type->get_width_on_stack() != 1) { - fire_error_tuple_cannot_have_non1_stack_width_elem(err_loc->loc, rhs_type); - } - } + // `(v) = (rhs)`, just surrounded by parenthesis + if (auto lhs_par = lhs->try_as()) { + process_assignment_lhs_after_infer_rhs(lhs_par->get_expr(), rhs_type, out_flow); + assign_inferred_type(lhs, lhs_par->get_expr()); + return; } - // here is something unhandled like `a.0 = rhs`, just check type matching - // for something strange like `f() = rhs` type inferring will pass, but will fail later - if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { - err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); + // here is `v = rhs` (just assignment, not `var v = rhs`) / `a.0 = rhs` / `getObj(z=f()).0 = rhs` etc. + // for instance, `tensorVar.0 = rhs` / `obj.field = rhs` has already checked index correctness while inferring lhs + // for strange lhs like `f() = rhs` type inferring (and later checking) will pass, but will fail lvalue check later + if (SinkExpression s_expr = extract_sink_expression_from_vertex(lhs)) { + TypePtr lhs_declared_type = calc_declared_type_before_smart_cast(lhs); + TypePtr smart_casted_type = calc_smart_cast_type_on_assignment(lhs_declared_type, rhs_type); + out_flow.register_known_type(s_expr, smart_casted_type); + assign_inferred_type(lhs, lhs_declared_type); } } - void infer_set_assign(V v) { + ExprFlow infer_set_assign(V v, FlowContext&& flow, bool used_as_condition) { AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(lhs); - infer_any_expr(rhs, lhs->inferred_type); + ExprFlow after_lhs = infer_any_expr(lhs, std::move(flow), false); + FlowContext rhs_flow = std::move(after_lhs.out_flow); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(rhs_flow), false, lhs->inferred_type); // almost all operators implementation is hardcoded by built-in functions `_+_` and similar std::string_view builtin_func = v->operator_name; // "+" for operator += - switch (v->tok) { - // &= |= ^= are "overloaded" both for integers and booleans, (int &= bool) is NOT allowed - case tok_set_bitwise_and: - case tok_set_bitwise_or: - case tok_set_bitwise_xor: { - bool both_int = expect_integer(lhs) && expect_integer(rhs); - bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); - if (!both_int && !both_bool) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } - break; - } - // others are mathematical: += *= ... - default: - if (!expect_integer(lhs) || !expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } - } - assign_inferred_type(v, lhs); if (!builtin_func.empty()) { FunctionPtr builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->try_as(); v->mutate()->assign_fun_ref(builtin_sym); } + + return ExprFlow(std::move(after_rhs.out_flow), used_as_condition); } - void infer_unary_operator(V v) { + ExprFlow infer_unary_operator(V v, FlowContext&& flow, bool used_as_condition) { AnyExprV rhs = v->get_rhs(); - infer_any_expr(rhs); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(flow), used_as_condition); // all operators implementation is hardcoded by built-in functions `~_` and similar std::string_view builtin_func = v->operator_name; @@ -592,95 +480,89 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { case tok_minus: case tok_plus: case tok_bitwise_not: - if (!expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); - } assign_inferred_type(v, TypeDataInt::create()); break; case tok_logical_not: - if (expect_boolean(rhs)) { + if (rhs->inferred_type == TypeDataBool::create()) { builtin_func = "!b"; // "overloaded" for bool - } else if (!expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); } assign_inferred_type(v, TypeDataBool::create()); + std::swap(after_rhs.false_flow, after_rhs.true_flow); break; default: tolk_assert(false); } - if (!builtin_func.empty()) { - FunctionPtr builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->try_as(); - v->mutate()->assign_fun_ref(builtin_sym); - } + FunctionPtr builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->try_as(); + v->mutate()->assign_fun_ref(builtin_sym); + + return after_rhs; } - void infer_binary_operator(V v) { + ExprFlow infer_binary_operator(V v, FlowContext&& flow, bool used_as_condition) { AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(lhs); - infer_any_expr(rhs); // almost all operators implementation is hardcoded by built-in functions `_+_` and similar std::string_view builtin_func = v->operator_name; switch (v->tok) { - // == != can compare both integers and booleans, (int == bool) is NOT allowed + // comparison operators, returning bool case tok_eq: - case tok_neq: { - bool both_int = expect_integer(unwrap_nullable(lhs->inferred_type)) && expect_integer(unwrap_nullable(rhs->inferred_type)); - bool both_bool = expect_boolean(unwrap_nullable(lhs->inferred_type)) && expect_boolean(unwrap_nullable(rhs->inferred_type)); - if (!both_int && !both_bool) { - if (lhs->inferred_type == rhs->inferred_type) { // compare slice with slice - v->error("type " + to_string(lhs) + " can not be compared with `== !=`"); - } else { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } - } - assign_inferred_type(v, TypeDataBool::create()); - break; - } - // < > can compare only integers + case tok_neq: case tok_lt: case tok_gt: case tok_leq: case tok_geq: - case tok_spaceship: { - if (!expect_integer(lhs) || !expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } + case tok_spaceship: + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + flow = infer_any_expr(rhs, std::move(flow), false).out_flow; assign_inferred_type(v, TypeDataBool::create()); break; - } - // & | ^ are "overloaded" both for integers and booleans, (int & bool) is NOT allowed + // & | ^ are "overloaded" both for integers and booleans case tok_bitwise_and: case tok_bitwise_or: - case tok_bitwise_xor: { - bool both_int = expect_integer(lhs) && expect_integer(rhs); - bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); - if (!both_int && !both_bool) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + case tok_bitwise_xor: + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + flow = infer_any_expr(rhs, std::move(flow), false).out_flow; + if (lhs->inferred_type == TypeDataBool::create() && rhs->inferred_type == TypeDataBool::create()) { + assign_inferred_type(v, TypeDataBool::create()); + } else { + assign_inferred_type(v, TypeDataInt::create()); } assign_inferred_type(v, rhs); // (int & int) is int, (bool & bool) is bool break; - } - // && || can work with integers and booleans, (int && bool) is allowed - case tok_logical_and: - case tok_logical_or: { - bool lhs_ok = expect_integer(lhs) || expect_boolean(lhs); - bool rhs_ok = expect_integer(rhs) || expect_boolean(rhs); - if (!lhs_ok || !rhs_ok) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } + // && || result in booleans, but building flow facts is tricky due to short-circuit + case tok_logical_and: { + ExprFlow after_lhs = infer_any_expr(lhs, std::move(flow), true); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(after_lhs.true_flow), true); assign_inferred_type(v, TypeDataBool::create()); - builtin_func = {}; // no built-in functions, logical operators are expressed as IFs at IR level - break; + if (!used_as_condition) { + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.false_flow), std::move(after_rhs.out_flow)); + return ExprFlow(std::move(out_flow), false); + } + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.out_flow), std::move(after_rhs.out_flow)); + FlowContext true_flow = std::move(after_rhs.true_flow); + FlowContext false_flow = FlowContext::merge_flow(std::move(after_lhs.false_flow), std::move(after_rhs.false_flow)); + return ExprFlow(std::move(out_flow), std::move(true_flow), std::move(false_flow)); + } + case tok_logical_or: { + ExprFlow after_lhs = infer_any_expr(lhs, std::move(flow), true); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(after_lhs.false_flow), true); + assign_inferred_type(v, TypeDataBool::create()); + if (!used_as_condition) { + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.true_flow), std::move(after_rhs.out_flow)); + return ExprFlow(std::move(after_rhs.out_flow), false); + } + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.out_flow), std::move(after_rhs.out_flow)); + FlowContext true_flow = FlowContext::merge_flow(std::move(after_lhs.true_flow), std::move(after_rhs.true_flow)); + FlowContext false_flow = std::move(after_rhs.false_flow); + return ExprFlow(std::move(out_flow), std::move(true_flow), std::move(false_flow)); } // others are mathematical: + * ... default: - if (!expect_integer(lhs) || !expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + flow = infer_any_expr(rhs, std::move(flow), false).out_flow; assign_inferred_type(v, TypeDataInt::create()); } @@ -688,63 +570,118 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { FunctionPtr builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->try_as(); v->mutate()->assign_fun_ref(builtin_sym); } + + return ExprFlow(std::move(flow), used_as_condition); } - void infer_ternary_operator(V v, TypePtr hint) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); + ExprFlow infer_ternary_operator(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond.get_always_true_false_state()); + + ExprFlow after_true = infer_any_expr(v->get_when_true(), std::move(after_cond.true_flow), used_as_condition, hint); + ExprFlow after_false = infer_any_expr(v->get_when_false(), std::move(after_cond.false_flow), used_as_condition, hint); + + if (v->get_cond()->is_always_true) { + assign_inferred_type(v, v->get_when_true()); + return after_true; + } + if (v->get_cond()->is_always_false) { + assign_inferred_type(v, v->get_when_false()); + return after_false; } - infer_any_expr(v->get_when_true(), hint); - infer_any_expr(v->get_when_false(), hint); TypeInferringUnifyStrategy tern_type; tern_type.unify_with(v->get_when_true()->inferred_type); if (!tern_type.unify_with(v->get_when_false()->inferred_type)) { - v->error("types of ternary branches are incompatible"); + fire(cur_f, v->loc, "types of ternary branches are incompatible: " + to_string(v->get_when_true()) + " and " + to_string(v->get_when_false())); } assign_inferred_type(v, tern_type.get_result()); + + FlowContext out_flow = FlowContext::merge_flow(std::move(after_true.out_flow), std::move(after_false.out_flow)); + return ExprFlow(std::move(out_flow), std::move(after_true.true_flow), std::move(after_false.false_flow)); } - void infer_cast_as_operator(V v) { + ExprFlow infer_cast_as_operator(V v, FlowContext&& flow, bool used_as_condition) { // for `expr as `, use this type for hint, so that `t.tupleAt(0) as int` is ok - infer_any_expr(v->get_expr(), v->cast_to_type); - if (!v->get_expr()->inferred_type->can_be_casted_with_as_operator(v->cast_to_type)) { - v->error("type " + to_string(v->get_expr()) + " can not be cast to " + to_string(v->cast_to_type)); - } + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), false, v->cast_to_type); assign_inferred_type(v, v->cast_to_type); + + if (!used_as_condition) { + return after_expr; + } + return ExprFlow(std::move(after_expr.out_flow), true); } - void infer_is_null_check(V v) { - infer_any_expr(v->get_expr()); + ExprFlow infer_is_null_check(V v, FlowContext&& flow, bool used_as_condition) { + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), false); assign_inferred_type(v, TypeDataBool::create()); + + TypePtr expr_type = v->get_expr()->inferred_type; + TypePtr non_null_type = calculate_type_subtract_null(expr_type); + if (expr_type == TypeDataNullLiteral::create()) { // `expr == null` is always true + v->mutate()->assign_always_true_or_false(v->is_negated ? 2 : 1); + } else if (non_null_type == TypeDataNever::create()) { // `expr == null` is always false + v->mutate()->assign_always_true_or_false(v->is_negated ? 1 : 2); + } else { + v->mutate()->assign_always_true_or_false(0); + } + + if (!used_as_condition) { + return after_expr; + } + + FlowContext true_flow = after_expr.out_flow.clone(); + FlowContext false_flow = after_expr.out_flow.clone(); + if (SinkExpression s_expr = extract_sink_expression_from_vertex(v->get_expr())) { + if (v->is_always_true) { + false_flow.mark_unreachable(UnreachableKind::CantHappen); + false_flow.register_known_type(s_expr, TypeDataNever::create()); + } else if (v->is_always_false) { + true_flow.mark_unreachable(UnreachableKind::CantHappen); + true_flow.register_known_type(s_expr, TypeDataNever::create()); + } else if (!v->is_negated) { + true_flow.register_known_type(s_expr, TypeDataNullLiteral::create()); + false_flow.register_known_type(s_expr, non_null_type); + } else { + true_flow.register_known_type(s_expr, non_null_type); + false_flow.register_known_type(s_expr, TypeDataNullLiteral::create()); + } + } + return ExprFlow(std::move(after_expr.out_flow), std::move(true_flow), std::move(false_flow)); } - void infer_not_null_operator(V v) { - infer_any_expr(v->get_expr()); + ExprFlow infer_not_null_operator(V v, FlowContext&& flow, bool used_as_condition) { + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), false); + if (const auto* as_nullable = v->get_expr()->inferred_type->try_as()) { - // operator `!` used for `T?`, leave `T` assign_inferred_type(v, as_nullable->inner); } else { - // operator `!` used for non-nullable, probably a warning should be printed assign_inferred_type(v, v->get_expr()); } + + if (!used_as_condition) { + return after_expr; + } + return ExprFlow(std::move(after_expr.out_flow), true); } - void infer_parenthesized(V v, TypePtr hint) { - infer_any_expr(v->get_expr(), hint); + ExprFlow infer_parenthesized(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), used_as_condition, hint); assign_inferred_type(v, v->get_expr()); + return after_expr; } - static void infer_reference(V v) { + ExprFlow infer_reference(V v, FlowContext&& flow, bool used_as_condition) { if (LocalVarPtr var_ref = v->sym->try_as()) { - assign_inferred_type(v, var_ref->declared_type); + TypePtr declared_or_smart_casted = flow.smart_cast_if_exists(SinkExpression(var_ref)); + tolk_assert(declared_or_smart_casted != nullptr); // all local vars are presented in flow + assign_inferred_type(v, declared_or_smart_casted); } else if (GlobalConstPtr const_ref = v->sym->try_as()) { assign_inferred_type(v, const_ref->is_int_const() ? TypeDataInt::create() : TypeDataSlice::create()); } else if (GlobalVarPtr glob_ref = v->sym->try_as()) { + // there are no smart casts for globals, it's a way of preventing reading one global multiple times, it costs gas assign_inferred_type(v, glob_ref->declared_type); } else if (FunctionPtr fun_ref = v->sym->try_as()) { @@ -753,7 +690,7 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { if (fun_ref->is_generic_function() && !v_instantiationTs) { // `genericFn` is invalid as non-call, can't be used without - v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + fire(cur_f, v->loc, "can not use a generic function " + to_string(fun_ref) + " as non-call"); } else if (fun_ref->is_generic_function()) { // `genericFn` is valid, it's a reference to instantiation @@ -761,15 +698,15 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); v->mutate()->assign_sym(fun_ref); - } else if (UNLIKELY(v_instantiationTs != nullptr)) { + } else if (v_instantiationTs != nullptr && !fun_ref->is_instantiation_of_generic_function()) { // non-generic function referenced like `return beginCell;` - v_instantiationTs->error("not generic function used with generic T"); + fire(cur_f, v_instantiationTs->loc, "not generic function used with generic T"); } fun_ref->mutate()->assign_is_used_as_noncall(); get_or_infer_return_type(fun_ref); assign_inferred_type(v, fun_ref->inferred_full_type); - return; + return ExprFlow(std::move(flow), used_as_condition); } else { tolk_assert(false); @@ -777,16 +714,17 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // for non-functions: `local_var` and similar not allowed if (UNLIKELY(v->has_instantiationTs())) { - v->get_instantiationTs()->error("generic T not expected here"); + fire(cur_f, v->get_instantiationTs()->loc, "generic T not expected here"); } + return ExprFlow(std::move(flow), used_as_condition); } // given `genericF` / `t.tupleFirst` (the user manually specified instantiation Ts), // validate and collect them // returns: [int, slice] / [cell] - static std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, FunctionPtr fun_ref, V instantiationT_list) { + std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, FunctionPtr fun_ref, V instantiationT_list) const { if (fun_ref->genericTs->size() != instantiationT_list->get_items().size()) { - throw ParseError(loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); + fire(cur_f, loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); } std::vector substitutions; @@ -804,30 +742,27 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // example: was `t.tuplePush(2)`, read , instantiate `tuplePush` (will later fail type check) // example: was `var cb = t.tupleFirst;` (used as reference, as non-call), instantiate `tupleFirst` // returns fun_ref to instantiated function - static FunctionPtr check_and_instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, std::vector&& substitutionTs) { + FunctionPtr check_and_instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, std::vector&& substitutionTs) const { // T for asm function must be a TVM primitive (width 1), otherwise, asm would act incorrectly if (fun_ref->is_asm_function() || fun_ref->is_builtin_function()) { for (int i = 0; i < static_cast(substitutionTs.size()); ++i) { if (substitutionTs[i]->get_width_on_stack() != 1) { - fire_error_calling_asm_function_with_non1_stack_width_arg(loc, fun_ref, substitutionTs, i); + fire_error_calling_asm_function_with_non1_stack_width_arg(cur_f, loc, fun_ref, substitutionTs, i); } } } std::string inst_name = generate_instantiated_name(fun_ref->name, substitutionTs); - try { - // make deep clone of `f` with substitutionTs - // (if `f` was already instantiated, it will be immediately returned from a symbol table) - return instantiate_generic_function(loc, fun_ref, inst_name, std::move(substitutionTs)); - } catch (const ParseError& ex) { - throw ParseError(ex.where, "while instantiating generic function `" + inst_name + "` at " + loc.to_string() + ": " + ex.message); - } + // make deep clone of `f` with substitutionTs + // (if `f` was already instantiated, it will be immediately returned from a symbol table) + return instantiate_generic_function(loc, fun_ref, inst_name, std::move(substitutionTs)); } - void infer_dot_access(V v, TypePtr hint) { + ExprFlow infer_dot_access(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { // it's NOT a method call `t.tupleSize()` (since such cases are handled by infer_function_call) // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) - infer_any_expr(v->get_obj()); + flow = infer_any_expr(v->get_obj(), std::move(flow), false).out_flow; + TypePtr obj_type = v->get_obj()->inferred_type; // our goal is to fill v->target knowing type of obj V v_ident = v->get_identifier(); // field/method name vertex @@ -840,19 +775,31 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { int index_at = std::stoi(std::string(field_name)); if (const auto* t_tensor = obj_type->try_as()) { if (index_at >= t_tensor->size()) { - v_ident->error("invalid tensor index, expected 0.." + std::to_string(t_tensor->items.size() - 1)); + fire(cur_f, v_ident->loc, "invalid tensor index, expected 0.." + std::to_string(t_tensor->items.size() - 1)); } v->mutate()->assign_target(index_at); - assign_inferred_type(v, t_tensor->items[index_at]); - return; + TypePtr inferred_type = t_tensor->items[index_at]; + if (SinkExpression s_expr = extract_sink_expression_from_vertex(v)) { + if (TypePtr smart_casted = flow.smart_cast_if_exists(s_expr)) { + inferred_type = smart_casted; + } + } + assign_inferred_type(v, inferred_type); + return ExprFlow(std::move(flow), used_as_condition); } if (const auto* t_tuple = obj_type->try_as()) { if (index_at >= t_tuple->size()) { - v_ident->error("invalid tuple index, expected 0.." + std::to_string(t_tuple->items.size() - 1)); + fire(cur_f, v_ident->loc, "invalid tuple index, expected 0.." + std::to_string(t_tuple->items.size() - 1)); } v->mutate()->assign_target(index_at); - assign_inferred_type(v, t_tuple->items[index_at]); - return; + TypePtr inferred_type = t_tuple->items[index_at]; + if (SinkExpression s_expr = extract_sink_expression_from_vertex(v)) { + if (TypePtr smart_casted = flow.smart_cast_if_exists(s_expr)) { + inferred_type = smart_casted; + } + } + assign_inferred_type(v, inferred_type); + return ExprFlow(std::move(flow), used_as_condition); } if (obj_type->try_as()) { TypePtr item_type = nullptr; @@ -860,35 +807,32 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { item_type = TypeDataUnknown::create(); } else { if (hint == nullptr) { - fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at); - } - if (hint->get_width_on_stack() != 1) { - fire_error_tuple_cannot_have_non1_stack_width_elem(v->loc, hint); + fire_error_cannot_deduce_untyped_tuple_access(cur_f, v->loc, index_at); } item_type = hint; } v->mutate()->assign_target(index_at); assign_inferred_type(v, item_type); - return; + return ExprFlow(std::move(flow), used_as_condition); } - v_ident->error("type " + to_string(obj_type) + " is not indexable"); + fire(cur_f, v_ident->loc, "type " + to_string(obj_type) + " is not indexable"); } // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` const Symbol* sym = lookup_global_symbol(field_name); FunctionPtr fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref) { - v_ident->error("non-existing field `" + static_cast(field_name) + "` of type " + to_string(obj_type)); + fire(cur_f, v_ident->loc, "non-existing field `" + static_cast(field_name) + "` of type " + to_string(obj_type)); } // `t.tupleSize` is ok, `cs.tupleSize` not if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(obj_type)) { - v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0].declared_type) + " with object of type " + to_string(obj_type)); + fire(cur_f, v_ident->loc, "referencing a method for " + to_string(fun_ref->parameters[0].declared_type) + " with object of type " + to_string(obj_type)); } if (fun_ref->is_generic_function() && !v_instantiationTs) { // `genericFn` and `t.tupleAt` are invalid as non-call, they can't be used without - v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + fire(cur_f, v->loc, "can not use a generic function " + to_string(fun_ref) + " as non-call"); } else if (fun_ref->is_generic_function()) { // `t.tupleAt` is valid, it's a reference to instantiation @@ -897,16 +841,17 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } else if (UNLIKELY(v_instantiationTs != nullptr)) { // non-generic method referenced like `var cb = c.cellHash;` - v_instantiationTs->error("not generic function used with generic T"); + fire(cur_f, v_instantiationTs->loc, "not generic function used with generic T"); } fun_ref->mutate()->assign_is_used_as_noncall(); v->mutate()->assign_target(fun_ref); get_or_infer_return_type(fun_ref); assign_inferred_type(v, fun_ref->inferred_full_type); // type of `t.tupleSize` is TypeDataFunCallable + return ExprFlow(std::move(flow), used_as_condition); } - void infer_function_call(V v, TypePtr hint) { + ExprFlow infer_function_call(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { AnyExprV callee = v->get_callee(); // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` @@ -926,19 +871,19 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { delta_self = 1; dot_obj = v_dot->get_obj(); v_instantiationTs = v_dot->get_instantiationTs(); // present for `obj.someMethod()` - infer_any_expr(dot_obj); + flow = infer_any_expr(dot_obj, std::move(flow), false).out_flow; // it can be indexed access (`tensorVar.0()`, `tupleVar.1()`) or a method (`t.tupleSize()`) std::string_view field_name = v_dot->get_field_name(); if (field_name[0] >= '0' && field_name[0] <= '9') { // indexed access `ab.2()`, then treat `ab.2` just like an expression, fun_ref remains nullptr - // infer_dot_access() will be called for a callee, it will check type, index correctness, etc. + // infer_dot_access() will be called for a callee, it will check index correctness } else { // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` const Symbol* sym = lookup_global_symbol(field_name); fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref) { - v_dot->get_identifier()->error("non-existing method `" + static_cast(field_name) + "` of type " + to_string(dot_obj)); + fire(cur_f, v_dot->get_identifier()->loc, "non-existing method `" + static_cast(field_name) + "` of type " + to_string(dot_obj)); } } @@ -949,46 +894,43 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` / `tensorVar.0()` if (!fun_ref) { - // treat callee like a usual expression, which must have "callable" inferred type - infer_any_expr(callee); + // treat callee like a usual expression + flow = infer_any_expr(callee, std::move(flow), false).out_flow; + // it must have "callable" inferred type const TypeDataFunCallable* f_callable = callee->inferred_type->try_as(); if (!f_callable) { // `5()` / `SOME_CONST()` / `null()` - v->error("calling a non-function " + to_string(callee->inferred_type)); + fire(cur_f, v->loc, "calling a non-function " + to_string(callee->inferred_type)); } - // check arguments count and their types + // check arguments count (their types will be checked in a later pipe) if (v->get_num_args() != static_cast(f_callable->params_types.size())) { - v->error("expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); + fire(cur_f, v->loc, "expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); } for (int i = 0; i < v->get_num_args(); ++i) { auto arg_i = v->get_arg(i)->get_expr(); - TypePtr param_type = f_callable->params_types[i]; - infer_any_expr(arg_i, param_type); - if (!param_type->can_rhs_be_assigned(arg_i->inferred_type)) { - arg_i->error("can not pass " + to_string(arg_i) + " to " + to_string(param_type)); - } + flow = infer_any_expr(arg_i, std::move(flow), false, f_callable->params_types[i]).out_flow; assign_inferred_type(v->get_arg(i), arg_i); } v->mutate()->assign_fun_ref(nullptr); // no fun_ref to a global function assign_inferred_type(v, f_callable->return_type); - return; + return ExprFlow(std::move(flow), used_as_condition); } // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) - // we're going to iterate over passed arguments, check type compatibility, and (if generic) infer substitutionTs + // we're going to iterate over passed arguments, and (if generic) infer substitutionTs // at first, check arguments count (Tolk doesn't have optional parameters, so just compare counts) int n_arguments = v->get_num_args() + delta_self; int n_parameters = fun_ref->get_num_params(); if (!n_parameters && dot_obj) { - v->error("`" + fun_ref->name + "` has no parameters and can not be called as method"); + fire(cur_f, v->loc, "`" + fun_ref->name + "` has no parameters and can not be called as method"); } if (n_parameters < n_arguments) { - v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + fire(cur_f, v->loc, "too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); } if (n_arguments < n_parameters) { - v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + fire(cur_f, v->loc, "too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); } - // now, for every passed argument, we need to infer its type, and check it against parameter type + // now, for every passed argument, we need to infer its type // for regular functions, it's obvious // but for generic functions, we need to infer type arguments (substitutionTs) on the fly // (unless Ts are specified by a user like `f(args)` / `t.tupleAt()`, take them) @@ -1005,9 +947,14 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { const LocalVarData& param_0 = fun_ref->parameters[0]; TypePtr param_type = param_0.declared_type; if (param_type->has_genericT_inside()) { - param_type = deducingTs->auto_deduce_from_argument(dot_obj->loc, param_type, dot_obj->inferred_type); + param_type = deducingTs->auto_deduce_from_argument(cur_f, dot_obj->loc, param_type, dot_obj->inferred_type); + } + if (param_0.is_mutate_parameter() && dot_obj->inferred_type != param_type) { + if (SinkExpression s_expr = extract_sink_expression_from_vertex(dot_obj)) { + assign_inferred_type(dot_obj, calc_declared_type_before_smart_cast(dot_obj)); + flow.register_known_type(s_expr, param_type); + } } - check_function_argument(param_type, param_0.is_mutate_parameter(), dot_obj, true); } for (int i = 0; i < v->get_num_args(); ++i) { const LocalVarData& param_i = fun_ref->parameters[delta_self + i]; @@ -1017,13 +964,20 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { param_type = deducingTs->replace_by_manually_specified(param_type); } if (param_type->has_genericT_inside()) { // `f(a)` where f is generic: use `a` to infer param type - infer_any_expr(arg_i); // then arg_i is inferred without any hint - param_type = deducingTs->auto_deduce_from_argument(arg_i->loc, param_type, arg_i->inferred_type); + // then arg_i is inferred without any hint + flow = infer_any_expr(arg_i, std::move(flow), false).out_flow; + param_type = deducingTs->auto_deduce_from_argument(cur_f, arg_i->loc, param_type, arg_i->inferred_type); } else { - infer_any_expr(arg_i, param_type); // param_type is hint, helps infer arg_i + // param_type is hint, helps infer arg_i + flow = infer_any_expr(arg_i, std::move(flow), false, param_type).out_flow; } assign_inferred_type(v->get_arg(i), arg_i); // arg itself is an expression - check_function_argument(param_type, param_i.is_mutate_parameter(), arg_i, false); + if (param_i.is_mutate_parameter() && arg_i->inferred_type != param_type) { + if (SinkExpression s_expr = extract_sink_expression_from_vertex(arg_i)) { + assign_inferred_type(arg_i, calc_declared_type_before_smart_cast(arg_i)); + flow.register_known_type(s_expr, param_type); + } + } } // if it's a generic function `f`, we need to instantiate it, like `f` @@ -1035,213 +989,204 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { if (idx != -1 && hint && fun_ref->declared_return_type->has_genericT_inside()) { // example: `t.tupleFirst()`, T doesn't depend on arguments, but is determined by return type // if used like `var x: int = t.tupleFirst()` / `t.tupleFirst() as int` / etc., use hint - deducingTs->auto_deduce_from_argument(v->loc, fun_ref->declared_return_type, hint); + deducingTs->auto_deduce_from_argument(cur_f, v->loc, fun_ref->declared_return_type, hint); idx = deducingTs->get_first_not_deduced_idx(); } if (idx != -1) { - v->error("can not deduce " + fun_ref->genericTs->get_nameT(idx)); + fire(cur_f, v->loc, "can not deduce " + fun_ref->genericTs->get_nameT(idx)); } fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, deducingTs->flush()); delete deducingTs; } else if (UNLIKELY(v_instantiationTs != nullptr)) { // non-generic function/method called with type arguments, like `c.cellHash()` / `beginCell()` - v_instantiationTs->error("calling a not generic function with generic T"); + fire(cur_f, v_instantiationTs->loc, "calling a not generic function with generic T"); } v->mutate()->assign_fun_ref(fun_ref); // since for `t.tupleAt()`, infer_dot_access() not called for callee = "t.tupleAt", assign its target here if (v->is_dot_call()) { v->get_callee()->as()->mutate()->assign_target(fun_ref); - v->get_callee()->as()->mutate()->assign_inferred_type(fun_ref->inferred_full_type); } // get return type either from user-specified declaration or infer here on demand traversing its body get_or_infer_return_type(fun_ref); TypePtr inferred_type = dot_obj && fun_ref->does_return_self() ? dot_obj->inferred_type : fun_ref->inferred_return_type; assign_inferred_type(v, inferred_type); assign_inferred_type(callee, fun_ref->inferred_full_type); - if (fun_ref->is_builtin_function() && fun_ref->name[0] == '_') { - handle_possible_compiler_internal_call(current_function, v); - } // note, that mutate params don't affect typing, they are handled when converting to IR + return ExprFlow(std::move(flow), used_as_condition); } - void infer_tensor(V v, TypePtr hint) { + ExprFlow infer_tensor(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { const TypeDataTensor* tensor_hint = hint ? hint->try_as() : nullptr; std::vector types_list; types_list.reserve(v->get_items().size()); for (int i = 0; i < v->size(); ++i) { AnyExprV item = v->get_item(i); - infer_any_expr(item, tensor_hint && i < tensor_hint->size() ? tensor_hint->items[i] : nullptr); + flow = infer_any_expr(item, std::move(flow), false, tensor_hint && i < tensor_hint->size() ? tensor_hint->items[i] : nullptr).out_flow; types_list.emplace_back(item->inferred_type); } assign_inferred_type(v, TypeDataTensor::create(std::move(types_list))); + return ExprFlow(std::move(flow), used_as_condition); } - void infer_typed_tuple(V v, TypePtr hint) { + ExprFlow infer_typed_tuple(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { const TypeDataTypedTuple* tuple_hint = hint ? hint->try_as() : nullptr; std::vector types_list; types_list.reserve(v->get_items().size()); for (int i = 0; i < v->size(); ++i) { AnyExprV item = v->get_item(i); - infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); - if (item->inferred_type->get_width_on_stack() != 1) { - fire_error_tuple_cannot_have_non1_stack_width_elem(v->get_item(i)->loc, item->inferred_type); - } + flow = infer_any_expr(item, std::move(flow), false, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr).out_flow; types_list.emplace_back(item->inferred_type); } assign_inferred_type(v, TypeDataTypedTuple::create(std::move(types_list))); + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_null_keyword(V v) { + static ExprFlow infer_null_keyword(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataNullLiteral::create()); + + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_underscore(V v, TypePtr hint) { + static ExprFlow infer_underscore(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { // if execution is here, underscore is either used as lhs of assignment, or incorrectly, like `f(_)` // more precise is to always set unknown here, but for incorrect usages, instead of an error // "can not pass unknown to X" would better be an error it can't be used as a value, at later steps assign_inferred_type(v, hint ? hint : TypeDataUnknown::create()); + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_empty_expression(V v) { + static ExprFlow infer_empty_expression(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataUnknown::create()); + return ExprFlow(std::move(flow), used_as_condition); } - void process_sequence(V v) { + FlowContext process_sequence(V v, FlowContext&& flow) { + // we'll print a warning if after some statement, control flow became unreachable + // (but don't print a warning if it's already unreachable, for example we're inside always-false if) + bool initially_unreachable = flow.is_unreachable(); for (AnyV item : v->get_items()) { - process_any_statement(item); + if (flow.is_unreachable() && !initially_unreachable && !v->first_unreachable && item->type != ast_empty_statement) { + v->mutate()->assign_first_unreachable(item); // a warning will be printed later, after type checking + } + flow = process_any_statement(item, std::move(flow)); } + return flow; } - static bool is_expr_valid_as_return_self(AnyExprV return_expr) { - // `return self` - if (return_expr->type == ast_reference && return_expr->as()->get_name() == "self") { - return true; - } - // `return self.someMethod()` - if (auto v_call = return_expr->try_as(); v_call && v_call->is_dot_call()) { - return v_call->fun_maybe && v_call->fun_maybe->does_return_self() && is_expr_valid_as_return_self(v_call->get_dot_obj()); - } - // `return cond ? ... : ...` - if (auto v_ternary = return_expr->try_as()) { - return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); - } - return false; - } - - void process_return_statement(V v) { + FlowContext process_return_statement(V v, FlowContext&& flow) { if (v->has_return_value()) { - infer_any_expr(v->get_return_value(), current_function->declared_return_type); + flow = infer_any_expr(v->get_return_value(), std::move(flow), false, cur_f->declared_return_type).out_flow; } else { assign_inferred_type(v->get_return_value(), TypeDataVoid::create()); } - if (current_function->does_return_self()) { - return_unifier.unify_with(current_function->parameters[0].declared_type); - if (!is_expr_valid_as_return_self(v->get_return_value())) { - v->error("invalid return from `self` function"); - } - return; - } + flow.mark_unreachable(UnreachableKind::ReturnStatement); - TypePtr expr_type = v->get_return_value()->inferred_type; - if (current_function->declared_return_type) { - if (!current_function->declared_return_type->can_rhs_be_assigned(expr_type)) { - v->get_return_value()->error("can not convert type " + to_string(expr_type) + " to return type " + to_string(current_function->declared_return_type)); - } - } else { - if (!return_unifier.unify_with(expr_type)) { - v->get_return_value()->error("can not unify type " + to_string(expr_type) + " with previous return type " + to_string(return_unifier.get_result())); - } + if (!cur_f->declared_return_type) { + return_statements.push_back(v->get_return_value()); // for future unification } + return flow; } - void process_if_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } - process_any_statement(v->get_if_body()); - process_any_statement(v->get_else_body()); + FlowContext process_if_statement(V v, FlowContext&& flow) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond.get_always_true_false_state()); + + FlowContext true_flow = process_any_statement(v->get_if_body(), std::move(after_cond.true_flow)); + FlowContext false_flow = process_any_statement(v->get_else_body(), std::move(after_cond.false_flow)); + + return FlowContext::merge_flow(std::move(true_flow), std::move(false_flow)); } - void process_repeat_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond)) { - cond->error("condition of `repeat` must be an integer, got " + to_string(cond)); - } - process_any_statement(v->get_body()); + FlowContext process_repeat_statement(V v, FlowContext&& flow) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), false); + + return process_any_statement(v->get_body(), std::move(after_cond.out_flow)); } - void process_while_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } - process_any_statement(v->get_body()); + FlowContext process_while_statement(V v, FlowContext&& flow) { + // loops are inferred twice, to merge body outcome with the state before the loop + // (a more correct approach would be not "twice", but "find a fixed point when state stop changing") + // also remember, we don't have a `break` statement, that's why when loop exits, condition became false + FlowContext loop_entry_facts = flow.clone(); + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + FlowContext body_out = process_any_statement(v->get_body(), std::move(after_cond.true_flow)); + // second time, to refine all types + flow = FlowContext::merge_flow(std::move(loop_entry_facts), std::move(body_out)); + ExprFlow after_cond2 = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond2.get_always_true_false_state()); + + process_any_statement(v->get_body(), std::move(after_cond2.true_flow)); + + return std::move(after_cond2.false_flow); } - void process_do_while_statement(V v) { - process_any_statement(v->get_body()); - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } + FlowContext process_do_while_statement(V v, FlowContext&& flow) { + // do while is also handled twice; read comments above + FlowContext loop_entry_facts = flow.clone(); + flow = process_any_statement(v->get_body(), std::move(flow)); + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + // second time + flow = FlowContext::merge_flow(std::move(loop_entry_facts), std::move(after_cond.true_flow)); + flow = process_any_statement(v->get_body(), std::move(flow)); + ExprFlow after_cond2 = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond2.get_always_true_false_state()); + + return std::move(after_cond2.false_flow); } - void process_throw_statement(V v) { - infer_any_expr(v->get_thrown_code()); - if (!expect_integer(v->get_thrown_code())) { - v->get_thrown_code()->error("excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); - } - infer_any_expr(v->get_thrown_arg()); - if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->get_width_on_stack() != 1) { - v->get_thrown_arg()->error("can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); - } + FlowContext process_throw_statement(V v, FlowContext&& flow) { + flow = infer_any_expr(v->get_thrown_code(), std::move(flow), false).out_flow; + flow = infer_any_expr(v->get_thrown_arg(), std::move(flow), false).out_flow; + return flow; } - void process_assert_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } - infer_any_expr(v->get_thrown_code()); - if (!expect_integer(v->get_thrown_code())) { - v->get_cond()->error("thrown excNo of `assert` must be an integer, got " + to_string(v->get_cond())); - } + FlowContext process_assert_statement(V v, FlowContext&& flow) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond.get_always_true_false_state()); + + ExprFlow after_throw = infer_any_expr(v->get_thrown_code(), std::move(after_cond.false_flow), false); + return std::move(after_cond.true_flow); } - static void process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type) { + static FlowContext process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type, FlowContext&& flow) { if (auto v_ref = catch_var->try_as(); v_ref && v_ref->sym) { // not underscore - assign_inferred_type(v_ref->sym->try_as(), catch_var_type); + LocalVarPtr var_ref = v_ref->sym->try_as(); + assign_inferred_type(var_ref, catch_var_type); + flow.register_known_type(SinkExpression(var_ref), catch_var_type); } assign_inferred_type(catch_var, catch_var_type); + return flow; } - void process_try_catch_statement(V v) { - process_any_statement(v->get_try_body()); + FlowContext process_try_catch_statement(V v, FlowContext&& flow) { + FlowContext before_try = flow.clone(); + FlowContext try_end = process_any_statement(v->get_try_body(), std::move(flow)); // `catch` has exactly 2 variables: excNo and arg (when missing, they are implicit underscores) // `arg` is a curious thing, it can be any TVM primitive, so assign unknown to it // hence, using `fInt(arg)` (int from parameter is a target type) or `arg as slice` works well // it's not truly correct, because `arg as (int,int)` also compiles, but can never happen, but let it be user responsibility + FlowContext catch_flow = std::move(before_try); tolk_assert(v->get_catch_expr()->size() == 2); std::vector types_list = {TypeDataInt::create(), TypeDataUnknown::create()}; - process_catch_variable(v->get_catch_expr()->get_item(0), types_list[0]); - process_catch_variable(v->get_catch_expr()->get_item(1), types_list[1]); + catch_flow = process_catch_variable(v->get_catch_expr()->get_item(0), types_list[0], std::move(catch_flow)); + catch_flow = process_catch_variable(v->get_catch_expr()->get_item(1), types_list[1], std::move(catch_flow)); assign_inferred_type(v->get_catch_expr(), TypeDataTensor::create(std::move(types_list))); - process_any_statement(v->get_catch_body()); + FlowContext catch_end = process_any_statement(v->get_catch_body(), std::move(catch_flow)); + return FlowContext::merge_flow(std::move(try_end), std::move(catch_end)); + } + + FlowContext process_expression_statement(AnyExprV v, FlowContext&& flow) { + ExprFlow after_v = infer_any_expr(v, std::move(flow), false); + return std::move(after_v.out_flow); } public: static void assign_fun_full_type(FunctionPtr fun_ref, TypePtr inferred_return_type) { - // calculate function full type `fun(params) -> ret_type` + // calculate function full type `(params) -> ret_type` std::vector params_types; params_types.reserve(fun_ref->get_num_params()); for (const LocalVarData& param : fun_ref->parameters) { @@ -1250,18 +1195,40 @@ public: assign_inferred_type(fun_ref, inferred_return_type, TypeDataFunCallable::create(std::move(params_types), inferred_return_type)); } - void start_visiting_function(FunctionPtr fun_ref, V v_function) { + void start_visiting_function(FunctionPtr fun_ref, V v_function) { + TypePtr inferred_return_type = fun_ref->declared_return_type; if (fun_ref->is_code_function()) { - current_function = fun_ref; - process_any_statement(v_function->get_body()); - current_function = nullptr; + FlowContext body_start; + for (const LocalVarData& param : fun_ref->parameters) { + body_start.register_known_type(SinkExpression(¶m), param.declared_type); + } - if (fun_ref->is_implicit_return()) { - bool is_ok_with_void = fun_ref->declared_return_type - ? fun_ref->declared_return_type->can_rhs_be_assigned(TypeDataVoid::create()) - : return_unifier.unify_with_implicit_return_void(); - if (!is_ok_with_void || fun_ref->does_return_self()) { - throw ParseError(v_function->get_body()->as()->loc_end, "missing return"); + cur_f = fun_ref; + FlowContext body_end = process_any_statement(v_function->get_body(), std::move(body_start)); + cur_f = nullptr; + + if (!body_end.is_unreachable()) { + fun_ref->mutate()->assign_is_implicit_return(); + } + + if (!fun_ref->declared_return_type) { + TypeInferringUnifyStrategy return_unifier; + if (fun_ref->does_return_self()) { + return_unifier.unify_with(fun_ref->parameters[0].declared_type); + } + for (AnyExprV return_value : return_statements) { + if (!return_unifier.unify_with(return_value->inferred_type)) { + fire(cur_f, return_value->loc, "can not unify type " + to_string(return_value) + " with previous return type " + to_string(return_unifier.get_result())); + } + } + if (!body_end.is_unreachable()) { + if (!return_unifier.unify_with_implicit_return_void()) { + fire(cur_f, v_function->get_body()->as()->loc_end, "missing return"); + } + } + inferred_return_type = return_unifier.get_result(); + if (inferred_return_type == nullptr && body_end.is_unreachable()) { + inferred_return_type = TypeDataVoid::create(); } } } else { @@ -1269,7 +1236,6 @@ public: tolk_assert(fun_ref->declared_return_type); } - TypePtr inferred_return_type = fun_ref->declared_return_type ? fun_ref->declared_return_type : return_unifier.get_result(); assign_fun_full_type(fun_ref, inferred_return_type); fun_ref->mutate()->assign_is_type_inferring_done(); } @@ -1283,7 +1249,7 @@ public: } static void start_visiting_function(FunctionPtr fun_ref, V v_function) { - InferCheckTypesAndCallsAndFieldsVisitor visitor; + InferTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, v_function); } }; @@ -1298,20 +1264,20 @@ static void infer_and_save_return_type_of_function(FunctionPtr fun_ref) { tolk_assert(!fun_ref->is_generic_function() && !fun_ref->is_type_inferring_done()); // if `g` has return type declared, like `fun g(): int { ... }`, don't traverse its body if (fun_ref->declared_return_type) { - InferCheckTypesAndCallsAndFieldsVisitor::assign_fun_full_type(fun_ref, fun_ref->declared_return_type); + InferTypesAndCallsAndFieldsVisitor::assign_fun_full_type(fun_ref, fun_ref->declared_return_type); return; } // prevent recursion of untyped functions, like `fun f() { return g(); } fun g() { return f(); }` bool contains = std::find(called_stack.begin(), called_stack.end(), fun_ref) != called_stack.end(); if (contains) { - fun_ref->ast_root->error("could not infer return type of " + to_string(fun_ref) + ", because it appears in a recursive call chain; specify `: ` manually"); + fire(fun_ref, fun_ref->loc, "could not infer return type of " + to_string(fun_ref) + ", because it appears in a recursive call chain; specify `: ` manually"); } // dig into g's body; it's safe, since the compiler is single-threaded // on finish, fun_ref->inferred_return_type is filled, and won't be called anymore called_stack.push_back(fun_ref); - InferCheckTypesAndCallsAndFieldsVisitor visitor; + InferTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); called_stack.pop_back(); } @@ -1321,7 +1287,7 @@ void pipeline_infer_types_and_calls_and_fields() { } void pipeline_infer_types_and_calls_and_fields(FunctionPtr fun_ref) { - InferCheckTypesAndCallsAndFieldsVisitor visitor; + InferTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); } diff --git a/tolk/pipe-resolve-identifiers.cpp b/tolk/pipe-resolve-identifiers.cpp index 95229d20..5a735885 100644 --- a/tolk/pipe-resolve-identifiers.cpp +++ b/tolk/pipe-resolve-identifiers.cpp @@ -59,20 +59,20 @@ namespace tolk { GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_undefined_symbol(V v) { +static void fire_error_undefined_symbol(FunctionPtr cur_f, V v) { if (v->name == "self") { - v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + throw ParseError(cur_f, v->loc, "using `self` in a non-member function (it does not accept the first `self` parameter)"); } else { - v->error("undefined symbol `" + static_cast(v->name) + "`"); + throw ParseError(cur_f, v->loc, "undefined symbol `" + static_cast(v->name) + "`"); } } GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_unknown_type_name(SrcLocation loc, const std::string &text) { - throw ParseError(loc, "unknown type name `" + text + "`"); +static void fire_error_unknown_type_name(FunctionPtr cur_f, SrcLocation loc, const std::string &text) { + throw ParseError(cur_f, loc, "unknown type name `" + text + "`"); } -static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { +static void check_import_exists_when_using_sym(FunctionPtr cur_f, AnyV v_usage, const Symbol* used_sym) { SrcLocation sym_loc = used_sym->loc; if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { const SrcFile* declared_in = sym_loc.get_src_file(); @@ -83,7 +83,7 @@ static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_ } } if (!has_import) { - v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + throw ParseError(cur_f, v_usage->loc, "Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); } } } @@ -137,38 +137,39 @@ struct NameAndScopeResolver { struct TypeDataResolver { GNU_ATTRIBUTE_NOINLINE - static TypePtr resolve_identifiers_in_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { - return type_data->replace_children_custom([genericTs](TypePtr child) { + static TypePtr resolve_identifiers_in_type_data(FunctionPtr cur_f, TypePtr type_data, const GenericsDeclaration* genericTs) { + return type_data->replace_children_custom([cur_f, genericTs](TypePtr child) { if (const TypeDataUnresolved* un = child->try_as()) { if (genericTs && genericTs->has_nameT(un->text)) { std::string nameT = un->text; return TypeDataGenericT::create(std::move(nameT)); } if (un->text == "auto") { - throw ParseError(un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed"); + throw ParseError(cur_f, un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed"); } if (un->text == "self") { - throw ParseError(un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); + throw ParseError(cur_f, un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); } - fire_error_unknown_type_name(un->loc, un->text); + fire_error_unknown_type_name(cur_f, un->loc, un->text); } return child; }); } }; -static TypePtr finalize_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { +static TypePtr finalize_type_data(FunctionPtr cur_f, TypePtr type_data, const GenericsDeclaration* genericTs) { if (!type_data || !type_data->has_unresolved_inside()) { return type_data; } - return TypeDataResolver::resolve_identifiers_in_type_data(type_data, genericTs); + return TypeDataResolver::resolve_identifiers_in_type_data(cur_f, type_data, genericTs); } class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { // more correctly this field shouldn't be static, but currently there is no need to make it a part of state static NameAndScopeResolver current_scope; - static FunctionPtr current_function; + static FunctionPtr cur_f; + static const GenericsDeclaration* current_genericTs; static LocalVarPtr create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) { LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, declared_type, immutable * LocalVarData::flagImmutable, -1); @@ -188,15 +189,15 @@ protected: if (v->marked_as_redef) { const Symbol* sym = current_scope.lookup_symbol(v->get_name()); if (sym == nullptr) { - v->error("`redef` for unknown variable"); + throw ParseError(cur_f, v->loc, "`redef` for unknown variable"); } LocalVarPtr var_ref = sym->try_as(); if (!var_ref) { - v->error("`redef` for unknown variable"); + throw ParseError(cur_f, v->loc, "`redef` for unknown variable"); } v->mutate()->assign_var_ref(var_ref); } else { - TypePtr declared_type = finalize_type_data(v->declared_type, current_function->genericTs); + TypePtr declared_type = finalize_type_data(cur_f, v->declared_type, current_genericTs); LocalVarPtr var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable); v->mutate()->assign_resolved_type(declared_type); v->mutate()->assign_var_ref(var_ref); @@ -211,20 +212,20 @@ protected: void visit(V v) override { const Symbol* sym = current_scope.lookup_symbol(v->get_name()); if (!sym) { - fire_error_undefined_symbol(v->get_identifier()); + fire_error_undefined_symbol(cur_f, v->get_identifier()); } v->mutate()->assign_sym(sym); // for global functions, global vars and constants, `import` must exist if (!sym->try_as()) { - check_import_exists_when_using_sym(v, sym); + check_import_exists_when_using_sym(cur_f, v, sym); } // for `f` / `f`, resolve "MyAlias" and "T" // (for function call `f()`, this v (ast_reference `f`) is callee) if (auto v_instantiationTs = v->get_instantiationTs()) { for (int i = 0; i < v_instantiationTs->size(); ++i) { - TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + TypePtr substituted_type = finalize_type_data(cur_f, v_instantiationTs->get_item(i)->substituted_type, current_genericTs); v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); } } @@ -235,7 +236,7 @@ protected: // (for function call `t.tupleAt()`, this v (ast_dot_access `t.tupleAt`) is callee) if (auto v_instantiationTs = v->get_instantiationTs()) { for (int i = 0; i < v_instantiationTs->size(); ++i) { - TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + TypePtr substituted_type = finalize_type_data(cur_f, v_instantiationTs->get_item(i)->substituted_type, current_genericTs); v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); } } @@ -243,7 +244,7 @@ protected: } void visit(V v) override { - TypePtr cast_to_type = finalize_type_data(v->cast_to_type, current_function->genericTs); + TypePtr cast_to_type = finalize_type_data(cur_f, v->cast_to_type, current_genericTs); v->mutate()->assign_resolved_type(cast_to_type); parent::visit(v->get_expr()); } @@ -284,16 +285,17 @@ public: } void start_visiting_function(FunctionPtr fun_ref, V v) override { - current_function = fun_ref; + cur_f = fun_ref; + current_genericTs = fun_ref->genericTs; for (int i = 0; i < v->get_num_params(); ++i) { const LocalVarData& param_var = fun_ref->parameters[i]; - TypePtr declared_type = finalize_type_data(param_var.declared_type, fun_ref->genericTs); + TypePtr declared_type = finalize_type_data(cur_f, param_var.declared_type, fun_ref->genericTs); v->get_param(i)->mutate()->assign_param_ref(¶m_var); v->get_param(i)->mutate()->assign_resolved_type(declared_type); param_var.mutate()->assign_resolved_type(declared_type); } - TypePtr return_type = finalize_type_data(fun_ref->declared_return_type, fun_ref->genericTs); + TypePtr return_type = finalize_type_data(cur_f, fun_ref->declared_return_type, fun_ref->genericTs); v->mutate()->assign_resolved_type(return_type); fun_ref->mutate()->assign_resolved_type(return_type); @@ -308,12 +310,14 @@ public: tolk_assert(current_scope.scopes.empty()); } - current_function = nullptr; + current_genericTs = nullptr; + cur_f = nullptr; } }; NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; -FunctionPtr AssignSymInsideFunctionVisitor::current_function = nullptr; +FunctionPtr AssignSymInsideFunctionVisitor::cur_f = nullptr; +const GenericsDeclaration* AssignSymInsideFunctionVisitor::current_genericTs = nullptr; void pipeline_resolve_identifiers_and_assign_symbols() { AssignSymInsideFunctionVisitor visitor; @@ -324,14 +328,16 @@ void pipeline_resolve_identifiers_and_assign_symbols() { visitor.start_visiting_function(v_func->fun_ref, v_func); } else if (auto v_global = v->try_as()) { - TypePtr declared_type = finalize_type_data(v_global->var_ref->declared_type, nullptr); + TypePtr declared_type = finalize_type_data(nullptr, v_global->var_ref->declared_type, nullptr); v_global->mutate()->assign_resolved_type(declared_type); v_global->var_ref->mutate()->assign_resolved_type(declared_type); - } else if (auto v_const = v->try_as(); v_const && v_const->declared_type) { - TypePtr declared_type = finalize_type_data(v_const->const_ref->declared_type, nullptr); - v_const->mutate()->assign_resolved_type(declared_type); - v_const->const_ref->mutate()->assign_resolved_type(declared_type); + } else if (auto v_const = v->try_as()) { + if (v_const->declared_type) { + TypePtr declared_type = finalize_type_data(nullptr, v_const->const_ref->declared_type, nullptr); + v_const->mutate()->assign_resolved_type(declared_type); + v_const->const_ref->mutate()->assign_resolved_type(declared_type); + } } } } diff --git a/tolk/pipeline.h b/tolk/pipeline.h index ab65ef80..0a71d751 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -35,8 +35,8 @@ void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, con void pipeline_register_global_symbols(); void pipeline_resolve_identifiers_and_assign_symbols(); void pipeline_calculate_rvalue_lvalue(); -void pipeline_detect_unreachable_statements(); void pipeline_infer_types_and_calls_and_fields(); +void pipeline_check_inferred_types(); void pipeline_refine_lvalue_for_mutate_arguments(); void pipeline_check_rvalue_lvalue(); void pipeline_check_pure_impure_operations(); diff --git a/tolk/smart-casts-cfg.cpp b/tolk/smart-casts-cfg.cpp new file mode 100644 index 00000000..7b86f519 --- /dev/null +++ b/tolk/smart-casts-cfg.cpp @@ -0,0 +1,472 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "smart-casts-cfg.h" +#include "ast.h" +#include "tolk.h" + +/* + * This file represents internals of AST-level control flow and data flow analysis. + * Data flow is mostly used for smart casts and is calculated AT THE TIME of type inferring. + * Not before, not after, but simultaneously with type inferring, because any local variable can be smart cast, + * which affects other expressions/variables types, generics instantiation, return auto-infer, etc. + * Though it's a part of type inferring, it's extracted as a separate file to keep inferring a bit clearer. + * + * Control flow is represented NOT as a "graph with edges". Instead, it's a "structured DFS" for the AST: + * 1) at every point of inferring, we have "current flow facts" (FlowContext) + * 2) when we see an `if (...)`, we create two derived contexts (by cloning current) + * 3) after `if`, finalize them at the end and unify + * 4) if we detect unreachable code, we mark that path's context as "unreachable" + * In other words, we get the effect of a CFG but in a more direct approach. That's enough for AST-level data-flow. + * + * FlowContext contains "data-flow facts that are definitely known": variables types (original or refined), + * sign state (definitely positive, definitely zero, etc.), boolean state (definitely true, definitely false). + * Each local variable is contained there, and possibly sub-fields of tensors/objects if definitely known: + * // current facts: x is int?, t is (int, int) + * if (x != null && t.0 > 0) + * // current facts: x is int, t is (int, int), t.0 is positive + * else + * // current facts: x is null, t is (int, int), t.0 is not positive + * When branches rejoin, facts are merged back (int+null = int? and so on, here they would be equal to before if). + * Another example: + * // current facts: x is int? + * if (x == null) { + * // current facts: x is null + * x = 1; + * // current facts: x is int + * } // else branch is empty, its facts are: x is int + * // current facts (after rejoin): x is int + * + * Every expression analysis result (performed along with type inferring) returns ExprFlow: + * 1) out_flow: facts after evaluating the whole expression, no matter how it evaluates (true or false) + * 2) true_flow: the environment if expression is definitely true + * 3) false_flow: the environment if expression is definitely false + * + * Note, that globals are NOT analyzed (smart casts work for locals only). The explanation is simple: + * don't encourage to use a global twice, it costs gas, better assign it to a local. + * See SinkExpression. + * + * An important highlight about internal structure of tensors / tuples / objects and `t.1` sink expressions. + * When a tensor/object is assigned, its fields are NOT tracked individually. + * For better understanding, I'll give some examples in TypeScript (having the same behavior): + * interface User { id: number | string, ... } + * var u: User = { id: 123, ... } + * u.id // it's number|string, not number + * u = { id: 'asdf', ... } + * u.id // it's number|string, not string + * if (typeof u.id === 'string') { + * // here `u.id` is string (smart cast) + * } + * u.id = 123; + * u.id // now it's number (smart cast) (until `u.id` or `u` are reassigned) + * // but `u` still has type `{ id: number | string, ... }`, not `{ id: number, ... }`; only `u.id` is refined + * The same example, but with nullable tensor in Tolk: + * var t: (int?, ...) = (123, ...) + * t.0 // it's int?, not int + * t = (null, ...) + * t.0 // it's int?, not null + * if (t.0 == null) { + * // here `t.0` is null (smart cast) + * } + * t.0 = 123; + * t.0 // now it's int (smart cast) (until `t.0` or `t` are reassigned) + * // but `t` still has type `(int?, ...)`, not `(int, ...)`; only `t.0` is refined + * + * In the future, not only smart casts, but other data-flow analysis can be implemented. + * 1) detect signs: `if (x > 0) { ... if (x < 0)` to warn always false + * 2) detect always true/false: `if (x) { return; } ... if (!x)` to warn always true + * These potential improvements are SignState and BoolState. Now they are NOT IMPLEMENTED, though declared. + * Their purpose is to show, that data flow is not only about smart casts, but eventually for other facts also. + * (though it's not obvious whether they should be analyzed at AST level or at IR level, like constants now) + */ + +namespace tolk { + +std::string SinkExpression::to_string() const { + std::string result = var_ref->name; + uint64_t cur_path = index_path; + while (cur_path != 0) { + result += "."; + result += std::to_string((cur_path & 0xFF) - 1); + cur_path >>= 8; + } + return result; +} + +static std::string to_string(SignState s) { + static const char* txt[6 + 1] = {"sign=unknown", ">0", "<0", "=0", ">=0", "<=0", "sign=never"}; + return txt[static_cast(s)]; +} + +static std::string to_string(BoolState s) { + static const char* txt[4 + 1] = {"unknown", "always_true", "always_false", "bool=never"}; + return txt[static_cast(s)]; +} + +// from `expr!` get `expr` +static AnyExprV unwrap_not_null_operator(AnyExprV expr) { + while (auto v_not_null = expr->try_as()) { + expr = v_not_null->get_expr(); + } + return expr; +} + +// "type lca" for a and b is T, so that both are assignable to T +// it's used +// 1) for auto-infer return type of the function if not specified +// example: `fun f(x: int?) { ... return 1; ... return x; }`; lca(`int`,`int?`) = `int?` +// 2) for auto-infer type of ternary and `match` expressions +// example: `cond ? beginCell() : null`; lca(`builder`,`null`) = `builder?` +// 3) when two data flows rejoin +// example: `if (tensorVar != null) ... else ...` rejoin `(int,int)` and `null` into `(int,int)?` +// when lca can't be calculated (example: `(int,int)` and `(int,int,int)`), nullptr is returned +static TypePtr calculate_type_lca(TypePtr a, TypePtr b) { + if (a == b) { + return a; + } + if (a == TypeDataNever::create()) { + return b; + } + if (b == TypeDataNever::create()) { + return a; + } + + if (a->can_rhs_be_assigned(b)) { + return a; + } + if (b->can_rhs_be_assigned(a)) { + return b; + } + + if (a == TypeDataUnknown::create() || b == TypeDataUnknown::create()) { + return TypeDataUnknown::create(); + } + + if (a == TypeDataNullLiteral::create()) { + return TypeDataNullable::create(b); + } + if (b == TypeDataNullLiteral::create()) { + return TypeDataNullable::create(a); + } + + const auto* tensor1 = a->try_as(); + const auto* tensor2 = b->try_as(); + if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { + std::vector types_lca; + types_lca.reserve(tensor1->size()); + for (int i = 0; i < tensor1->size(); ++i) { + TypePtr next = calculate_type_lca(tensor1->items[i], tensor2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTensor::create(std::move(types_lca)); + } + + const auto* tuple1 = a->try_as(); + const auto* tuple2 = b->try_as(); + if (tuple1 && tuple2 && tuple1->size() == tuple2->size()) { + std::vector types_lca; + types_lca.reserve(tuple1->size()); + for (int i = 0; i < tuple1->size(); ++i) { + TypePtr next = calculate_type_lca(tuple1->items[i], tuple2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTypedTuple::create(std::move(types_lca)); + } + + return nullptr; +} + +// merge (unify) of two sign states: what sign do we definitely have +// it's used on data flow rejoin +// example: `if (x > 0) ... else ...`; lca(Positive, NonPositive) = Unknown +SignState calculate_sign_lca(SignState a, SignState b) { + using s = SignState; + // a transformation lookup table, using the following rules: + // 1) if one is Unknown, the result is Unknown ("no definite constraints") + // 2) if one is Never (can't happen), the result is the other + // example: x is known > 0 already, given code `if (x > 0) {} else {}` merges Positive (always true) and Never + // 3) handle all other combinations carefully + static constexpr SignState transformations[7][7] = { + // b= Unknown | Positive | Negative | Zero | NonNegative | NonPositive | Never | + /* a=Unknown */ {s::Unknown, s::Unknown, s::Unknown, s::Unknown, s::Unknown, s::Unknown, s::Unknown }, + /* a=Positive */ {s::Unknown, s::Positive, s::Unknown, s::NonNegative, s::NonNegative, s::Unknown, s::Positive }, + /* a=Negative */ {s::Unknown, s::Unknown, s::Negative, s::NonPositive, s::Unknown, s::NonPositive, s::Negative }, + /* a=Zero */ {s::Unknown, s::NonNegative, s::NonPositive, s::Zero, s::NonNegative, s::NonPositive, s::Zero }, + /* a=NonNegative */ {s::Unknown, s::NonNegative, s::Unknown, s::NonNegative, s::NonNegative, s::Unknown, s::NonNegative}, + /* a=NonPositive */ {s::Unknown, s::Unknown, s::NonPositive, s::NonPositive, s::Unknown, s::NonPositive, s::NonPositive}, + /* a=Never */ {s::Unknown, s::Positive, s::Negative, s::Zero, s::NonNegative, s::NonPositive, s::Never } + }; + + return transformations[static_cast(a)][static_cast(b)]; +} + +// merge (unify) two bool state: what state do we definitely have +// it's used on data flow rejoin +// example: `if (x) ... else ...`; lca(AlwaysTrue, AlwaysFalse) = Unknown +BoolState calculate_bool_lca(BoolState a, BoolState b) { + using s = BoolState; + static constexpr BoolState transformations[4][4] = { + // b= Unknown | AlwaysTrue | AlwaysFalse | Never | + /* a=Unknown */ {s::Unknown, s::Unknown, s::Unknown, s::Unknown }, + /* a=AlwaysTrue */ {s::Unknown, s::AlwaysTrue, s::Unknown, s::AlwaysTrue }, + /* a=AlwaysFalse */ {s::Unknown, s::Unknown, s::AlwaysFalse, s::AlwaysFalse}, + /* a=Never */ {s::Unknown, s::AlwaysTrue, s::AlwaysFalse, s::Never } + }; + + return transformations[static_cast(a)][static_cast(b)]; +} + +// see comments above TypeInferringUnifyStrategy +// this function calculates lca or currently stored result and next +bool TypeInferringUnifyStrategy::unify_with(TypePtr next) { + if (unified_result == nullptr) { + unified_result = next; + return true; + } + if (unified_result == next) { + return true; + } + + TypePtr combined = calculate_type_lca(unified_result, next); + if (!combined) { + return false; + } + + unified_result = combined; + return true; +} + +bool TypeInferringUnifyStrategy::unify_with_implicit_return_void() { + if (unified_result == nullptr) { + unified_result = TypeDataVoid::create(); + return true; + } + + return unified_result == TypeDataVoid::create(); +} + +// invalidate knowledge about sub-fields of a variable or its field +// example: `tensorVar = 2`, invalidate facts about `tensorVar`, `tensorVar.0`, `tensorVar.1.2`, and all others +// example: `user.id = rhs`, invalidate facts about `user.id` (sign, etc.) and `user.id.*` if exist +void FlowContext::invalidate_all_subfields(LocalVarPtr var_ref, uint64_t parent_path, uint64_t parent_mask) { + for (auto it = known_facts.begin(); it != known_facts.end();) { + bool is_self_or_field = it->first.var_ref == var_ref && (it->first.index_path & parent_mask) == parent_path; + if (is_self_or_field) { + it = known_facts.erase(it); + } else { + ++it; + } + } +} + +// update current type of `local_var` / `tensorVar.0` / `obj.field` +// example: `local_var = rhs` +// example: `f(mutate obj.field)` +// example: `if (t.0 != null)`, in true_flow `t.0` assigned to "not-null of current", in false_flow to null +void FlowContext::register_known_type(SinkExpression s_expr, TypePtr assigned_type) { + // having index_path = (some bytes filled in the end), + // calc index_mask: replace every filled byte with 0xFF + // example: `t.0.1`, index_path = (1<<8) + 2, index_mask = 0xFFFF + uint64_t index_path = s_expr.index_path; + uint64_t index_mask = 0; + while (index_path > 0) { + index_mask = index_mask << 8 | 0xFF; + index_path >>= 8; + } + invalidate_all_subfields(s_expr.var_ref, s_expr.index_path, index_mask); + + // if just `int` assigned, we have no considerations about its sign + // so, even if something existed by the key s_expr, drop all knowledge + known_facts[s_expr] = FactsAboutExpr(assigned_type, SignState::Unknown, BoolState::Unknown); +} + +// mark control flow unreachable / interrupted +void FlowContext::mark_unreachable(UnreachableKind reason) { + unreachable = true; + // currently we don't save why control flow became unreachable (it's not obvious how, there may be consequent reasons), + // but it helps debugging and reading outer code + static_cast(reason); +} + + +// "merge" two data-flow contexts occurs on control flow rejoins (if/else branches merging, for example) +// it's generating a new context that describes "knowledge that definitely outcomes from these two" +// example: in one branch x is `int`, in x is `null`, result is `int?` unless any of them is unreachable +FlowContext FlowContext::merge_flow(FlowContext&& c1, FlowContext&& c2) { + if (!c1.unreachable && c2.unreachable) { + return merge_flow(std::move(c2), std::move(c1)); + } + + std::map unified; + + if (c1.unreachable && !c2.unreachable) { + // `if (...) return; else ...;` — copy facts about common variables only from else (c2) + for (const auto& [s_expr, i2] : c2.known_facts) { + auto it1 = c1.known_facts.find(s_expr); + bool need_add = it1 != c1.known_facts.end() || s_expr.index_path != 0; + if (need_add) { + unified.emplace(s_expr, i2); + } + } + + } else { + // either both reachable, or both not — merge types and restrictions of common variables and fields + for (const auto& [s_expr, i1] : c1.known_facts) { + if (auto it2 = c2.known_facts.find(s_expr); it2 != c2.known_facts.end()) { + const FactsAboutExpr& i2 = it2->second; + unified.emplace(s_expr, i1 == i2 ? i1 : FactsAboutExpr( + calculate_type_lca(i1.expr_type, i2.expr_type), + calculate_sign_lca(i1.sign_state, i2.sign_state), + calculate_bool_lca(i1.bool_state, i2.bool_state) + )); + } + } + } + + return FlowContext(std::move(unified), c1.unreachable && c2.unreachable); +} + +// return `T`, so that `T?` = type +// what for: `if (x != null)`, to smart cast x inside if +TypePtr calculate_type_subtract_null(TypePtr type) { + if (const auto* as_nullable = type->try_as()) { + return as_nullable->inner; + } + // union types will be handled here + return TypeDataNever::create(); +} + +// given any expression vertex, extract SinkExpression is possible +// example: `x.0` is { var_ref: x, index_path: 1 } +// example: `x.1` is { var_ref: x, index_path: 2 } +// example: `x!.1` is the same +// example: `x.1.2` is { var_ref: x, index_path: 2<<8 + 3 } +// example: `x!.1!.2` is the same +// not SinkExpressions: `globalVar` / `f()` / `obj.method().1` +SinkExpression extract_sink_expression_from_vertex(AnyExprV v) { + if (auto as_ref = v->try_as()) { + if (LocalVarPtr var_ref = as_ref->sym->try_as()) { + return SinkExpression(var_ref); + } + } + + if (auto as_dot = v->try_as(); as_dot && as_dot->is_target_indexed_access()) { + V cur_dot = as_dot; + uint64_t index_path = 0; + while (cur_dot->is_target_indexed_access()) { + int index_at = std::get(cur_dot->target); + index_path = (index_path << 8) + index_at + 1; + if (auto parent_dot = unwrap_not_null_operator(cur_dot->get_obj())->try_as()) { + cur_dot = parent_dot; + } else { + break; + } + } + if (auto as_ref = unwrap_not_null_operator(cur_dot->get_obj())->try_as()) { + if (LocalVarPtr var_ref = as_ref->sym->try_as()) { + return SinkExpression(var_ref, index_path); + } + } + } + + if (auto as_par = v->try_as()) { + return extract_sink_expression_from_vertex(as_par->get_expr()); + } + + if (auto as_assign = v->try_as()) { + return extract_sink_expression_from_vertex(as_assign->get_lhs()); + } + + return {}; +} + +// given `lhs = rhs`, calculate "original" type of `lhs` +// example: `var x: int? = ...; if (x != null) { x (here) = null; }` +// "(here)" x is `int` (smart cast), but originally declared as `int?` +// example: `if (x is (int,int)?) { x!.0 = rhs }`, here `x!.0` is `int` +TypePtr calc_declared_type_before_smart_cast(AnyExprV v) { + if (auto as_ref = v->try_as()) { + if (LocalVarPtr var_ref = as_ref->sym->try_as()) { + return var_ref->declared_type; + } + } + + if (auto as_dot = v->try_as(); as_dot && as_dot->is_target_indexed_access()) { + TypePtr obj_type = as_dot->get_obj()->inferred_type; // v already inferred; hence, index_at is correct + int index_at = std::get(as_dot->target); + if (const auto* t_tensor = obj_type->try_as()) { + return t_tensor->items[index_at]; + } + if (const auto* t_tuple = obj_type->try_as()) { + return t_tuple->items[index_at]; + } + } + + return v->inferred_type; +} + +// given `lhs = rhs` (and `var x = rhs`), calculate probable smart cast for lhs +// it's NOT directly type of rhs! see comment at the top of the file about internal structure of tensors/tuples. +// obvious example: `var x: int? = 5`, it's `int` (most cases are like this) +// obvious example: `var x: (int,int)? = null`, it's `null` (`x == null` is always true, `x` can be passed to any `T?`) +// not obvious example: `var x: (int?, int?)? = (3,null)`, result is `(int?,int?)`, whereas type of rhs is `(int,null)` +TypePtr calc_smart_cast_type_on_assignment(TypePtr lhs_declared_type, TypePtr rhs_inferred_type) { + // assign `T` to `T?` (or at least "assignable-to-T" to "T?") + // smart cast to `T` + if (const auto* lhs_nullable = lhs_declared_type->try_as()) { + if (lhs_nullable->inner->can_rhs_be_assigned(rhs_inferred_type)) { + return lhs_nullable->inner; + } + } + + // assign `null` to `T?` + // smart cast to `null` + if (lhs_declared_type->try_as() && rhs_inferred_type == TypeDataNullLiteral::create()) { + return TypeDataNullLiteral::create(); + } + + // no smart cast, type is the same as declared + // example: `var x: (int?,slice?) = (1, null)`, it's `(int?,slice?)`, not `(int,null)` + return lhs_declared_type; +} + + +std::ostream& operator<<(std::ostream& os, const FlowContext& flow) { + os << "(" << flow.known_facts.size() << " facts) " << (flow.unreachable ? "(unreachable) " : ""); + for (const auto& [s_expr, facts] : flow.known_facts) { + os << ", " << s_expr.to_string() << ": " << facts; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const FactsAboutExpr& facts) { + os << facts.expr_type; + if (facts.sign_state != SignState::Unknown) { + os << " " << to_string(facts.sign_state); + } + if (facts.bool_state != BoolState::Unknown) { + os << " " << to_string(facts.bool_state); + } + return os; +} + +} // namespace tolk diff --git a/tolk/smart-casts-cfg.h b/tolk/smart-casts-cfg.h new file mode 100644 index 00000000..7321f952 --- /dev/null +++ b/tolk/smart-casts-cfg.h @@ -0,0 +1,207 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "fwd-declarations.h" +#include "type-system.h" +#include +#include + +namespace tolk { + +/* + * TypeInferringUnifyStrategy unifies types from various branches to a common result (lca). + * It's used to auto infer function return type based on return statements, like in TypeScript. + * Example: `fun f() { ... return 1; ... return null; }` inferred as `int?`. + * + * Besides function returns, it's also used for ternary `return cond ? 1 : null` and `match` expression. + * If types can't be unified (a function returns int and cell, for example), `unify()` returns false, handled outside. + * BTW, don't confuse this way of inferring with Hindley-Milner, they have nothing in common. + */ +class TypeInferringUnifyStrategy { + TypePtr unified_result = nullptr; + +public: + bool unify_with(TypePtr next); + bool unify_with_implicit_return_void(); + + TypePtr get_result() const { return unified_result; } +}; + +/* + * SinkExpression is an expression that can be smart cast like `if (x != null)` (x is int inside) + * or analyzed by data flow is some other way like `if (x > 0) ... else ...` (x <= 0 inside else). + * In other words, it "absorbs" data flow facts. + * Examples: `localVar`, `localTensor.1`, `localTuple.1.2.3`, `localObj.field` + * These are NOT sink expressions: `globalVar`, `f()`, `f().1` + * Note, that globals are NOT sink: don't encourage to use a global twice, it costs gas, better assign it to a local. + */ +struct SinkExpression { + LocalVarPtr const var_ref; // smart casts and data flow applies only to locals + const uint64_t index_path; // 0 for just `v`; for `v.N` it's (N+1), for `v.N.M` it's (N+1) + (M+1)<<8, etc. + + SinkExpression() + : var_ref(nullptr), index_path(0) {} + explicit SinkExpression(LocalVarPtr var_ref) + : var_ref(var_ref), index_path(0) {} + explicit SinkExpression(LocalVarPtr var_ref, uint64_t index_path) + : var_ref(var_ref), index_path(index_path) {} + + SinkExpression(const SinkExpression&) = default; + SinkExpression& operator=(const SinkExpression&) = delete; + + bool operator==(const SinkExpression& rhs) const { return var_ref == rhs.var_ref && index_path == rhs.index_path; } + bool operator<(const SinkExpression& rhs) const { return var_ref == rhs.var_ref ? index_path < rhs.index_path : var_ref < rhs.var_ref; } + explicit operator bool() const { return var_ref != nullptr; } + + std::string to_string() const; +}; + +// UnreachableKind is a reason of why control flow is unreachable or interrupted +// example: `return;` interrupts control flow +// example: `if (true) ... else ...` inside "else" flow is unreachable because it can't happen +enum class UnreachableKind { + Unknown, // no definite info or not unreachable + CantHappen, + ReturnStatement, + CallNeverReturnFunction, +}; + +// SignState is "definitely positive", etc. +// example: inside `if (x > 0)`, x is Positive, in `else` it's NonPositive (if x is local, until reassigned) +enum class SignState { + Unknown, // no definite info + Positive, + Negative, + Zero, + NonNegative, + NonPositive, + Never // can't happen, like "never" type +}; + +// BoolState is "definitely true" or "definitely false" +// example: inside `if (x)`, x is AlwaysTrue, in `else` it's AlwaysFalse +enum class BoolState { + Unknown, // no definite info + AlwaysTrue, + AlwaysFalse, + Never // can't happen, like "never" type +}; + +// FactsAboutExpr represents "everything known about SinkExpression at a given execution point" +// example: after `var x = getNullableInt()`, x is `int?`, sign/bool is Unknown +// example: after `x = 2;`, x is `int`, sign is Positive, bool is AlwaysTrue +// example: inside `if (x != null && x > 0)`, x is `int`, sign is Positive (in else, no definite knowledge) +// remember, that indices/fields are also expressions, `t.1 = 2` or `u.id = 2` also store such facts +// WARNING! Detecting data-flow facts about sign state and bool state is NOT IMPLEMENTED +// (e.g. `if (x > 0)` / `if (!t.1)` is NOT analysed, therefore not updated, always Unknown now) +// it's a potential improvement for the future, for example `if (x > 0) { ... if (x < 0)` to warn always false +// their purpose for now is to show, that data flow is not only about smart casts, but eventually for other facts also +struct FactsAboutExpr { + TypePtr expr_type; // originally declared type or smart cast (Unknown if no info) + SignState sign_state; // definitely positive, etc. (Unknown if no info) + BoolState bool_state; // definitely true/false (Unknown if no info) + + FactsAboutExpr() + : expr_type(nullptr), sign_state(SignState::Unknown), bool_state(BoolState::Unknown) {} + FactsAboutExpr(TypePtr smart_cast_type, SignState sign_state, BoolState bool_state) + : expr_type(smart_cast_type), sign_state(sign_state), bool_state(bool_state) {} + + bool operator==(const FactsAboutExpr& rhs) const = default; +}; + +// FlowContext represents "everything known about control flow at a given execution point" +// while traversing AST, each statement node gets "in" FlowContext (prior knowledge) +// and returns "output" FlowContext (representing a state AFTER execution of a statement) +// on branching, like if/else, input context is cloned, two contexts for each branch calculated, and merged to a result +class FlowContext { + // std::map, not std::unordered_map, because LLDB visualises it better, for debugging + std::map known_facts; // all local vars plus (optionally) indices/fields of tensors/tuples/objects + bool unreachable = false; // if execution can't reach this point (after `return`, for example) + + FlowContext(std::map&& known_facts, bool unreachable) + : known_facts(std::move(known_facts)), unreachable(unreachable) {} + + void invalidate_all_subfields(LocalVarPtr var_ref, uint64_t parent_path, uint64_t parent_mask); + + friend std::ostream& operator<<(std::ostream& os, const FlowContext& flow); + +public: + FlowContext() = default; + FlowContext(FlowContext&&) noexcept = default; + FlowContext(const FlowContext&) = delete; + FlowContext& operator=(FlowContext&&) = default; + FlowContext& operator=(const FlowContext&) = delete; + + FlowContext clone() const { + std::map copy = known_facts; + return FlowContext(std::move(copy), unreachable); + } + + bool is_unreachable() const { return unreachable; } + + TypePtr smart_cast_if_exists(SinkExpression s_expr) const { + auto it = known_facts.find(s_expr); + return it == known_facts.end() ? nullptr : it->second.expr_type; + } + + void register_known_type(SinkExpression s_expr, TypePtr assigned_type); + void mark_unreachable(UnreachableKind reason); + + static FlowContext merge_flow(FlowContext&& c1, FlowContext&& c2); +}; + +struct ExprFlow { + FlowContext out_flow; + + // only calculated inside `if`, left of `&&`, etc. — there this expression is immediate condition, empty otherwise + FlowContext true_flow; + FlowContext false_flow; + + ExprFlow(FlowContext&& out_flow, FlowContext&& true_flow, FlowContext&& false_flow) + : out_flow(std::move(out_flow)) + , true_flow(std::move(true_flow)) + , false_flow(std::move(false_flow)) {} + ExprFlow(FlowContext&& out_flow, const bool clone_flow_for_condition) + : out_flow(std::move(out_flow)) { + if (clone_flow_for_condition) { + true_flow = this->out_flow.clone(); + false_flow = this->out_flow.clone(); + } + } + + ExprFlow(ExprFlow&&) noexcept = default; + ExprFlow(const ExprFlow&) = delete; + ExprFlow& operator=(ExprFlow&&) = delete; + ExprFlow& operator=(const ExprFlow&) = delete; + + int get_always_true_false_state() const { + if (true_flow.is_unreachable() != false_flow.is_unreachable()) { + return false_flow.is_unreachable() ? 1 : 2; // 1 is "always true" + } + return 0; + } +}; + +std::ostream& operator<<(std::ostream& os, const FactsAboutExpr& facts); +std::ostream& operator<<(std::ostream& os, const FlowContext& flow); +TypePtr calculate_type_subtract_null(TypePtr type); +SinkExpression extract_sink_expression_from_vertex(AnyExprV v); +TypePtr calc_declared_type_before_smart_cast(AnyExprV v); +TypePtr calc_smart_cast_type_on_assignment(TypePtr lhs_declared_type, TypePtr rhs_inferred_type); + +} // namespace tolk diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index 52ac3821..1286c1f9 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -18,6 +18,7 @@ #include "compiler-state.h" #include #include +#include namespace tolk { @@ -146,9 +147,10 @@ void SrcLocation::show_context(std::ostream& os) const { return; } SrcFile::SrcPosition pos = src_file->convert_offset(char_offset); - os << " " << pos.line_str << "\n"; + os << std::right << std::setw(4) << pos.line_no << " | "; + os << pos.line_str << "\n"; - os << " "; + os << " " << " | "; for (int i = 1; i < pos.char_no; ++i) { os << ' '; } @@ -193,8 +195,11 @@ std::ostream& operator<<(std::ostream& os, const ParseError& error) { } void ParseError::show(std::ostream& os) const { - os << where << ": error: " << message << std::endl; - where.show_context(os); + os << loc << ": error: " << message << std::endl; + if (current_function) { + os << " // in function `" << current_function->as_human_readable() << "`" << std::endl; + } + loc.show_context(os); } } // namespace tolk diff --git a/tolk/src-file.h b/tolk/src-file.h index 0c82bf18..b0f9cba3 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -124,10 +124,14 @@ struct Fatal final : std::exception { std::ostream& operator<<(std::ostream& os, const Fatal& fatal); struct ParseError : std::exception { - SrcLocation where; + FunctionPtr current_function; + SrcLocation loc; std::string message; - ParseError(SrcLocation _where, std::string _msg) : where(_where), message(std::move(_msg)) { - } + + ParseError(SrcLocation loc, std::string message) + : current_function(nullptr), loc(loc), message(std::move(message)) {} + ParseError(FunctionPtr current_function, SrcLocation loc, std::string message) + : current_function(current_function), loc(loc), message(std::move(message)) {} const char* what() const noexcept override { return message.c_str(); diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index 48b0b89d..51dc3440 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -102,9 +102,6 @@ void LocalVarData::assign_resolved_type(TypePtr declared_type) { } void LocalVarData::assign_inferred_type(TypePtr inferred_type) { -#ifdef TOLK_DEBUG - assert(this->declared_type == nullptr); // called when type declaration omitted, inferred from assigned value -#endif this->declared_type = inferred_type; } diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index cc867c52..71d1969d 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -58,8 +58,8 @@ int tolk_proceed(const std::string &entrypoint_filename) { pipeline_register_global_symbols(); pipeline_resolve_identifiers_and_assign_symbols(); pipeline_calculate_rvalue_lvalue(); - pipeline_detect_unreachable_statements(); pipeline_infer_types_and_calls_and_fields(); + pipeline_check_inferred_types(); pipeline_refine_lvalue_for_mutate_arguments(); pipeline_check_rvalue_lvalue(); pipeline_check_pure_impure_operations(); diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp index 6cd353d5..d73625c2 100644 --- a/tolk/type-system.cpp +++ b/tolk/type-system.cpp @@ -84,6 +84,7 @@ TypePtr TypeDataTuple::singleton; TypePtr TypeDataContinuation::singleton; TypePtr TypeDataNullLiteral::singleton; TypePtr TypeDataUnknown::singleton; +TypePtr TypeDataNever::singleton; TypePtr TypeDataVoid::singleton; void type_system_init() { @@ -96,6 +97,7 @@ void type_system_init() { TypeDataContinuation::singleton = new TypeDataContinuation; TypeDataNullLiteral::singleton = new TypeDataNullLiteral; TypeDataUnknown::singleton = new TypeDataUnknown; + TypeDataNever::singleton = new TypeDataNever; TypeDataVoid::singleton = new TypeDataVoid; } @@ -325,53 +327,56 @@ bool TypeDataInt::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataBool::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataCell::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataSlice::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataBuilder::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataTuple::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataContinuation::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataNullLiteral::can_rhs_be_assigned(TypePtr rhs) const { - return rhs == this; + if (rhs == this) { + return true; + } + return rhs == TypeDataNever::create(); } bool TypeDataNullable::can_rhs_be_assigned(TypePtr rhs) const { @@ -384,11 +389,17 @@ bool TypeDataNullable::can_rhs_be_assigned(TypePtr rhs) const { if (const TypeDataNullable* rhs_nullable = rhs->try_as()) { return inner->can_rhs_be_assigned(rhs_nullable->inner); } - return inner->can_rhs_be_assigned(rhs); + if (inner->can_rhs_be_assigned(rhs)) { + return true; + } + return rhs == TypeDataNever::create(); } bool TypeDataFunCallable::can_rhs_be_assigned(TypePtr rhs) const { - return rhs == this; + if (rhs == this) { + return true; + } + return rhs == TypeDataNever::create(); } bool TypeDataGenericT::can_rhs_be_assigned(TypePtr rhs) const { @@ -405,7 +416,7 @@ bool TypeDataTensor::can_rhs_be_assigned(TypePtr rhs) const { } return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { @@ -417,7 +428,7 @@ bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { } return true; } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataUnknown::can_rhs_be_assigned(TypePtr rhs) const { @@ -429,8 +440,15 @@ bool TypeDataUnresolved::can_rhs_be_assigned(TypePtr rhs) const { return false; } +bool TypeDataNever::can_rhs_be_assigned(TypePtr rhs) const { + return true; +} + bool TypeDataVoid::can_rhs_be_assigned(TypePtr rhs) const { - return rhs == this; + if (rhs == this) { + return true; + } + return rhs == TypeDataNever::create(); } @@ -551,6 +569,10 @@ bool TypeDataUnresolved::can_be_casted_with_as_operator(TypePtr cast_to) const { return false; } +bool TypeDataNever::can_be_casted_with_as_operator(TypePtr cast_to) const { + return true; +} + bool TypeDataVoid::can_be_casted_with_as_operator(TypePtr cast_to) const { return cast_to == this; } @@ -584,6 +606,10 @@ bool TypeDataTensor::can_hold_tvm_null_instead() const { return true; } +bool TypeDataNever::can_hold_tvm_null_instead() const { + return false; +} + bool TypeDataVoid::can_hold_tvm_null_instead() const { return false; } @@ -650,6 +676,7 @@ static TypePtr parse_simple_type(Lexer& lex) { case 5: if (str == "slice") return TypeDataSlice::create(); if (str == "tuple") return TypeDataTuple::create(); + if (str == "never") return TypeDataNever::create(); break; case 7: if (str == "builder") return TypeDataBuilder::create(); diff --git a/tolk/type-system.h b/tolk/type-system.h index 02e50fc2..4b671e30 100644 --- a/tolk/type-system.h +++ b/tolk/type-system.h @@ -409,6 +409,27 @@ public: bool can_be_casted_with_as_operator(TypePtr cast_to) const override; }; +/* + * `never` is a special type meaning "no value can be hold". + * Is may appear due to smart casts, for example `if (x == null && x != null)` makes x "never". + * Functions returning "never" assume to never exit, calling them interrupts control flow. + * Such variables can not be cast to any other types, all their usage will trigger type mismatch errors. + */ +class TypeDataNever final : public TypeData { + TypeDataNever() : TypeData(19ULL, 0, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "never"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + bool can_hold_tvm_null_instead() const override; +}; + /* * `void` is TypeDataVoid. * From the type system point of view, `void` functions return nothing. From ef0328837f9acb5db2d1f439e993d8979ea97e07 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 24 Feb 2025 20:15:24 +0300 Subject: [PATCH 58/61] [Tolk] `throw` interrupts control flow; `never` type In FunC (and in Tolk before) throwing an exception is just calling a built-in function: > throw 123; // actually, __throw(123) Since it's a regular function, the compiler was not aware that execution will stop, and all following code is unreachable. For instance, `throw` in the end on function needed to be followed by `return` statement. Now, `throw` interrupts control flow, all statements after it are considered unreachable. At IR level, code Ops are also not produced. This works because a built-in __throw() now has `never` type. It can also be applied to custom functions: > fun alwaysThrow(): never { throw 123; } The code after alwaysThrow() call will also be unreachable. --- tolk-tester/tests/inference-tests.tolk | 11 ++ tolk-tester/tests/invalid-never-1.tolk | 8 ++ tolk-tester/tests/try-func.tolk | 133 ++++++++++++++++++++++++- tolk-tester/tests/unreachable-4.tolk | 24 +++++ tolk/analyzer.cpp | 44 ++++---- tolk/builtins.cpp | 5 +- tolk/codegen.cpp | 15 ++- tolk/pipe-infer-types-and-calls.cpp | 7 ++ tolk/smart-casts-cfg.h | 1 + tolk/tolk.h | 4 +- 10 files changed, 227 insertions(+), 25 deletions(-) create mode 100644 tolk-tester/tests/invalid-never-1.tolk create mode 100644 tolk-tester/tests/unreachable-4.tolk diff --git a/tolk-tester/tests/inference-tests.tolk b/tolk-tester/tests/inference-tests.tolk index 96bf8b1a..5020d0dd 100644 --- a/tolk-tester/tests/inference-tests.tolk +++ b/tolk-tester/tests/inference-tests.tolk @@ -86,6 +86,17 @@ fun test7() { // __expect_type(eq<(int, slice)>, "(int, slice) -> (int, slice)"); } +fun alwaysThrows(): never { throw 123; } +fun alwaysThrowsNotAnnotated() { throw 123; } +fun alwaysThrowsNotAnnotated2() { alwaysThrows(); } + +fun test9() { + __expect_type(alwaysThrows(), "never"); + __expect_type(alwaysThrows, "() -> never"); + __expect_type(alwaysThrowsNotAnnotated(), "void"); + __expect_type(alwaysThrowsNotAnnotated2(), "void"); +} + fun main() { return 0; diff --git a/tolk-tester/tests/invalid-never-1.tolk b/tolk-tester/tests/invalid-never-1.tolk new file mode 100644 index 00000000..68c6c804 --- /dev/null +++ b/tolk-tester/tests/invalid-never-1.tolk @@ -0,0 +1,8 @@ +fun invalidNever(): never { + if (random()) { throw 123; } +} + +/** +@compilation_should_fail +@stderr a function returning `never` can not have a reachable endpoint + */ diff --git a/tolk-tester/tests/try-func.tolk b/tolk-tester/tests/try-func.tolk index dfd72e9e..4ac86d96 100644 --- a/tolk-tester/tests/try-func.tolk +++ b/tolk-tester/tests/try-func.tolk @@ -164,6 +164,78 @@ fun test109(): (int, int) { return (g_reg, l_reg); } +fun alwaysThrow123(): never { + throw 123; +} + +fun alwaysThrowX(x: int): never { + if (x > 10) { throw (x, beginCell()); } + else { throw (x, null); } +} + +fun anotherNever(throw123: bool): never { + if (throw123) { alwaysThrow123(); } + alwaysThrowX(456); +} + +fun testCodegen1(x: int) { + if (x > 10) { + throw 123; + anotherNever(true); // unreachable, will be dropped + } + else if (x < 10) { + throw x; + return -123; // unreachable, will be dropped + } + return 0; +} + +fun testCodegen2(x: int) { + if (x > 10) { + alwaysThrow123(); + anotherNever(true); // unreachable, will be dropped + } + else if (x < 10) { + anotherNever(false); + return -123; // unreachable, will be dropped + } + return 0; +} + +@method_id(110) +fun test110(b: bool) { + try { + if (b == true) { testCodegen1(100); } + testCodegen1(5); + return -1; + } catch (ex) { + return ex; + } +} + +@method_id(111) +fun test111(b: bool) { + try { + if (b == true) { testCodegen2(100); } + testCodegen2(5); + return -1; + } catch (ex) { + return ex; + } +} + +fun mySetCode(newCode: slice): void + asm "SETCODE"; + +fun testCodegen3(numberId: int, paramVal: cell) { + if (numberId == -1000) { + var cs = paramVal.beginParse(); + mySetCode(cs); + throw 0; + } + paramVal.beginParse(); +} + fun main() { } @@ -187,6 +259,65 @@ fun main() { @testcase | 107 | 5 | 5 @testcase | 107 | 20 | 20 @testcase | 108 | | 0 +@testcase | 109 | | 10 10 +@testcase | 110 | -1 | 123 +@testcase | 110 | 0 | 5 +@testcase | 111 | -1 | 123 +@testcase | 111 | 0 | 456 -@code_hash 39307974281105539319288356721945232226028429128341177951717392648324358675585 +@code_hash 57361460846265694653029920796509802052573595128418810728101968091567195330515 + +@fif_codegen +""" + testCodegen1 PROC:<{ + // x + DUP // x x + 10 GTINT // x '2 + IFJMP:<{ // x + 123 THROW + }> // x + DUP // x x + 10 LESSINT // x '6 + IFJMP:<{ // x + THROWANY + }> // x + DROP // + 0 PUSHINT // '8=0 + }> +""" + +@fif_codegen +""" + testCodegen2 PROC:<{ + // x + DUP // x x + 10 GTINT // x '2 + IFJMP:<{ // x + DROP // + alwaysThrow123 CALLDICT + }> // x + 10 LESSINT // '5 + IFJMP:<{ // + FALSE // '6 + anotherNever CALLDICT + }> // + 0 PUSHINT // '8=0 + }> +""" + +@fif_codegen +""" + testCodegen3 PROC:<{ + // numberId paramVal + SWAP + -1000 PUSHINT // paramVal numberId '2=-1000 + EQUAL // paramVal '3 + IFJMP:<{ // paramVal + CTOS // cs + SETCODE + 0 THROW + }> // paramVal + DROP // + }> +""" */ diff --git a/tolk-tester/tests/unreachable-4.tolk b/tolk-tester/tests/unreachable-4.tolk new file mode 100644 index 00000000..6b25b3d9 --- /dev/null +++ b/tolk-tester/tests/unreachable-4.tolk @@ -0,0 +1,24 @@ +fun alwaysThrows(): never { + throw 456; +} + +fun testUnreachable(x: int) { + if (x) { throw 123; } + else { alwaysThrows(); } + return 1; +} + +fun main() { + try { + testUnreachable(100); + throw 80; + } catch (excNo) { + return excNo; + } +} + +/** +@testcase | 0 | | 123 +@stderr warning: unreachable code +@stderr return 1; + */ diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 9303bc83..c38b0bfa 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -20,6 +20,13 @@ namespace tolk { +// functions returning "never" are assumed to interrupt flow +// for instance, variables after their call aren't considered used +// its main purpose is `throw` statement, it's a call to a built-in `__throw` function +static bool does_function_always_throw(FunctionPtr fun_ref) { + return fun_ref->declared_return_type == TypeDataNever::create(); +} + /* * * ANALYZE AND PREPROCESS ABSTRACT CODE @@ -262,17 +269,6 @@ VarDescrList& VarDescrList::operator|=(const VarDescrList& y) { } } -VarDescrList& VarDescrList::operator&=(const VarDescrList& values) { - for (const VarDescr& vd : values.list) { - VarDescr* item = operator[](vd.idx); - if (item) { - *item &= vd; - } - } - unreachable |= values.unreachable; - return *this; -} - VarDescrList& VarDescrList::import_values(const VarDescrList& values) { if (values.unreachable) { set_unreachable(); @@ -326,6 +322,17 @@ bool Op::compute_used_vars(const CodeBlob& code, bool edit) { } return std_compute_used_vars(true); } + if (cl == _Call && does_function_always_throw(f_sym)) { + VarDescrList new_var_info; // empty, not next->var_info + if (args.size() == right.size()) { + for (const VarDescr& arg : args) { + new_var_info.add_var(arg.idx, arg.is_unused()); + } + } else { + new_var_info.add_vars(right, false); + } + return set_var_info(std::move(new_var_info)); + } return std_compute_used_vars(); } case _SetGlob: { @@ -516,20 +523,19 @@ bool prune_unreachable(std::unique_ptr& ops) { case Op::_SliceConst: case Op::_GlobVar: case Op::_SetGlob: - case Op::_Call: case Op::_CallInd: case Op::_Tuple: case Op::_UnTuple: case Op::_Import: + case Op::_Let: reach = true; break; - case Op::_Let: { - reach = true; - break; - } case Op::_Return: reach = false; break; + case Op::_Call: + reach = !does_function_always_throw(op.f_sym); + break; case Op::_If: { // if left then block0 else block1; ... VarDescr* c_var = op.var_info[op.left[0]]; @@ -712,6 +718,9 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { values.add_newval(i); } } + if (does_function_always_throw(f_sym)) { + values.set_unreachable(); + } break; } case _Tuple: @@ -860,10 +869,11 @@ bool Op::mark_noreturn() { case _SetGlob: case _GlobVar: case _CallInd: - case _Call: return set_noreturn(next->mark_noreturn()); case _Return: return set_noreturn(); + case _Call: + return set_noreturn(next->mark_noreturn() || does_function_always_throw(f_sym)); case _If: case _TryCatch: // note, that & | (not && ||) here and below is mandatory to invoke both left and right calls diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 2b207c25..cb89c984 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -1088,6 +1088,7 @@ void define_builtins() { TypePtr Slice = TypeDataSlice::create(); TypePtr Builder = TypeDataBuilder::create(); TypePtr Tuple = TypeDataTuple::create(); + TypePtr Never = TypeDataNever::create(); std::vector itemsT; itemsT.emplace_back("T"); @@ -1201,10 +1202,10 @@ void define_builtins() { define_builtin_func("__isNull", {typeT}, Bool, declGenericT, compile_is_null, FunctionData::flagMarkedAsPure); - define_builtin_func("__throw", ParamsInt1, Unit, nullptr, + define_builtin_func("__throw", ParamsInt1, Never, nullptr, compile_throw, 0); - define_builtin_func("__throw_arg", {typeT, Int}, Unit, declGenericT, + define_builtin_func("__throw_arg", {typeT, Int}, Never, declGenericT, compile_throw_arg, 0); define_builtin_func("__throw_if_unless", ParamsInt3, Unit, nullptr, diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 5b2c50cc..ac1cf639 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -274,8 +274,16 @@ void Stack::rearrange_top(var_idx_t top, bool last) { bool Op::generate_code_step(Stack& stack) { stack.opt_show(); - stack.drop_vars_except(var_info); - stack.opt_show(); + + // detect `throw 123` (actually _IntConst 123 + _Call __throw) + // don't clear the stack, since dropping unused elements make no sense, an exception is thrown anyway + bool will_now_immediate_throw = (cl == _Call && f_sym->is_builtin_function() && f_sym->name == "__throw") + || (cl == _IntConst && next->cl == _Call && next->f_sym->is_builtin_function() && next->f_sym->name == "__throw"); + if (!will_now_immediate_throw) { + stack.drop_vars_except(var_info); + stack.opt_show(); + } + bool inline_func = stack.mode & Stack::_InlineFunc; switch (cl) { case _Nop: @@ -285,6 +293,7 @@ bool Op::generate_code_step(Stack& stack) { stack.enforce_state(left); if (stack.o.retalt_ && (stack.mode & Stack::_NeedRetAlt)) { stack.o << "RETALT"; + stack.o.retalt_inserted_ = true; } stack.opt_show(); return false; @@ -514,7 +523,7 @@ bool Op::generate_code_step(Stack& stack) { int j = ret_order ? ret_order->at(i) : i; stack.push_new_var(left.at(j)); } - return true; + return !f_sym || f_sym->declared_return_type != TypeDataNever::create(); } case _SetGlob: { tolk_assert(g_sym); diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index 7ab0aa1c..5fb12059 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -1013,6 +1013,9 @@ class InferTypesAndCallsAndFieldsVisitor final { TypePtr inferred_type = dot_obj && fun_ref->does_return_self() ? dot_obj->inferred_type : fun_ref->inferred_return_type; assign_inferred_type(v, inferred_type); assign_inferred_type(callee, fun_ref->inferred_full_type); + if (inferred_type == TypeDataNever::create()) { + flow.mark_unreachable(UnreachableKind::CallNeverReturnFunction); + } // note, that mutate params don't affect typing, they are handled when converting to IR return ExprFlow(std::move(flow), used_as_condition); } @@ -1139,6 +1142,7 @@ class InferTypesAndCallsAndFieldsVisitor final { FlowContext process_throw_statement(V v, FlowContext&& flow) { flow = infer_any_expr(v->get_thrown_code(), std::move(flow), false).out_flow; flow = infer_any_expr(v->get_thrown_arg(), std::move(flow), false).out_flow; + flow.mark_unreachable(UnreachableKind::ThrowStatement); return flow; } @@ -1209,6 +1213,9 @@ public: if (!body_end.is_unreachable()) { fun_ref->mutate()->assign_is_implicit_return(); + if (fun_ref->declared_return_type == TypeDataNever::create()) { // `never` can only be declared, it can't be inferred + fire(fun_ref, v_function->get_body()->as()->loc_end, "a function returning `never` can not have a reachable endpoint"); + } } if (!fun_ref->declared_return_type) { diff --git a/tolk/smart-casts-cfg.h b/tolk/smart-casts-cfg.h index 7321f952..b97c8864 100644 --- a/tolk/smart-casts-cfg.h +++ b/tolk/smart-casts-cfg.h @@ -77,6 +77,7 @@ struct SinkExpression { enum class UnreachableKind { Unknown, // no definite info or not unreachable CantHappen, + ThrowStatement, ReturnStatement, CallNeverReturnFunction, }; diff --git a/tolk/tolk.h b/tolk/tolk.h index d218d510..3f00d0d4 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -205,7 +205,6 @@ struct VarDescrList { std::size_t count_used(const std::vector idx_list) const; VarDescr& add(var_idx_t idx); VarDescr& add_newval(var_idx_t idx); - VarDescrList& operator&=(const VarDescrList& values); VarDescrList& import_values(const VarDescrList& values); VarDescrList operator|(const VarDescrList& y) const; VarDescrList& operator|=(const VarDescrList& values); @@ -575,6 +574,7 @@ struct AsmOpList { const std::vector* var_names_{nullptr}; std::vector constants_; bool retalt_{false}; + bool retalt_inserted_{false}; void out(std::ostream& os, int mode = 0) const; AsmOpList(int indent = 0, const std::vector* var_names = nullptr) : indent_(indent), var_names_(var_names) { } @@ -1030,7 +1030,7 @@ struct Stack { } void apply_wrappers(int callxargs_count) { bool is_inline = mode & _InlineFunc; - if (o.retalt_) { + if (o.retalt_inserted_) { o.insert(0, "SAMEALTSAVE"); o.insert(0, "c2 SAVE"); } From f67e9f4b6f6c2e1d73d7c49e506b9614893f57f8 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Mon, 24 Feb 2025 20:15:43 +0300 Subject: [PATCH 59/61] [Tolk] Bump version to v0.9 --- crypto/smartcont/tolk-stdlib/common.tolk | 2 +- crypto/smartcont/tolk-stdlib/gas-payments.tolk | 2 +- crypto/smartcont/tolk-stdlib/lisp-lists.tolk | 2 +- crypto/smartcont/tolk-stdlib/tvm-dicts.tolk | 2 +- crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk | 2 +- tolk/tolk-version.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index ba1e6c14..82757c22 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -1,7 +1,7 @@ // Standard library for Tolk (LGPL licence). // It contains common functions that are available out of the box, the user doesn't have to import anything. // More specific functions are required to be imported explicitly, like "@stdlib/tvm-dicts". -tolk 0.8 +tolk 0.9 /** Tuple manipulation primitives. diff --git a/crypto/smartcont/tolk-stdlib/gas-payments.tolk b/crypto/smartcont/tolk-stdlib/gas-payments.tolk index 2ac32f48..9873ca94 100644 --- a/crypto/smartcont/tolk-stdlib/gas-payments.tolk +++ b/crypto/smartcont/tolk-stdlib/gas-payments.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.8 +tolk 0.9 /** Gas and payment related primitives. diff --git a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk index af8b6bd7..e63438b5 100644 --- a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk +++ b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.8 +tolk 0.9 /** Lisp-style lists are nested 2-elements tuples: `(1, (2, (3, null)))` represents list `[1, 2, 3]`. diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk index 4b9d5c81..ee205687 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.8 +tolk 0.9 /** Dictionaries are represented as `cell` data type (cells can store anything, dicts in particular). diff --git a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk index 72a54aac..136eaa4a 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.8 +tolk 0.9 /// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. /// The primitive returns the current value of `c3`. diff --git a/tolk/tolk-version.h b/tolk/tolk-version.h index 84326012..bbea63ff 100644 --- a/tolk/tolk-version.h +++ b/tolk/tolk-version.h @@ -18,6 +18,6 @@ namespace tolk { -constexpr const char* TOLK_VERSION = "0.8.0"; +constexpr const char* TOLK_VERSION = "0.9.0"; } // namespace tolk From faf58118a4244add6d0185103130eb3c00d458fa Mon Sep 17 00:00:00 2001 From: neodix42 Date: Wed, 5 Mar 2025 10:29:49 +0400 Subject: [PATCH 60/61] Fix failing docker build - arm64 (#1541) * fix docker github build (Segmentation fault (core dumped) dpkg: error processing package libc-bin (--configure)) * fix docker github build (Segmentation fault (core dumped) dpkg: error processing package libc-bin (--configure)) * update gh qemu actions * run on ubuntu 24.04 * try driver-opts: image=moby/buildkit:v0.11.0 * split docker images for amd64 and arm64 * Revert "split docker images for amd64 and arm64" This reverts commit 609617f0056ac1eff7b34dce146b864de19de527. * clean libc-bin --- .../workflows/docker-ubuntu-branch-image.yml | 17 +++++++++++++++-- .github/workflows/docker-ubuntu-image.yml | 6 +++--- Dockerfile | 10 +++++++++- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker-ubuntu-branch-image.yml b/.github/workflows/docker-ubuntu-branch-image.yml index afde104a..00aa5015 100644 --- a/.github/workflows/docker-ubuntu-branch-image.yml +++ b/.github/workflows/docker-ubuntu-branch-image.yml @@ -20,10 +20,12 @@ jobs: submodules: 'recursive' - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v3.5.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v3.10.0 + with: + driver-opts: image=moby/buildkit:v0.11.0 - name: Login to GitHub Container Registry uses: docker/login-action@v3 @@ -32,6 +34,17 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Build and export to Docker + uses: docker/build-push-action@v6 + with: + load: true + context: ./ + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test + + - name: Test + run: | + docker run --rm -e "TEST=1" ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test + - name: Get tag as branch name id: tag run: | diff --git a/.github/workflows/docker-ubuntu-image.yml b/.github/workflows/docker-ubuntu-image.yml index 48c553ef..aa4eaeef 100644 --- a/.github/workflows/docker-ubuntu-image.yml +++ b/.github/workflows/docker-ubuntu-image.yml @@ -20,10 +20,10 @@ jobs: submodules: 'recursive' - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - + uses: docker/setup-qemu-action@v3.5.0 + - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v3.10.0 - name: Login to GitHub Container Registry uses: docker/login-action@v3 diff --git a/Dockerfile b/Dockerfile index 61e18c0b..f1b836bf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,13 @@ FROM ubuntu:22.04 AS builder +ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential cmake clang openssl libssl-dev zlib1g-dev gperf wget git ninja-build libsodium-dev libmicrohttpd-dev liblz4-dev pkg-config autoconf automake libtool libjemalloc-dev lsb-release software-properties-common gnupg + rm /var/lib/dpkg/info/libc-bin.* && \ + apt-get clean && \ + apt-get update && \ + apt install libc-bin && \ + apt-get install -y build-essential cmake clang openssl libssl-dev zlib1g-dev gperf wget git \ + ninja-build libsodium-dev libmicrohttpd-dev liblz4-dev pkg-config autoconf automake libtool \ + libjemalloc-dev lsb-release software-properties-common gnupg RUN wget https://apt.llvm.org/llvm.sh && \ chmod +x llvm.sh && \ @@ -25,6 +32,7 @@ RUN mkdir build && \ blockchain-explorer emulator tonlibjson http-proxy adnl-proxy FROM ubuntu:22.04 +ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y wget curl libatomic1 openssl libsodium-dev libmicrohttpd-dev liblz4-dev libjemalloc-dev htop \ net-tools netcat iptraf-ng jq tcpdump pv plzip && \ From cf50b4b5daee74cef0a89478ace5d4b8865e9985 Mon Sep 17 00:00:00 2001 From: EmelyanenkoK Date: Wed, 5 Mar 2025 09:51:34 +0300 Subject: [PATCH 61/61] Update changelogs --- Changelog.md | 14 ++++++++++++++ recent_changelog.md | 23 ++++++++++++----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/Changelog.md b/Changelog.md index 34195f74..4dce39fc 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,3 +1,17 @@ +## 2025.03 Update +1. New extracurrency behavior introduced, check [GlobalVersions.md](./doc/GlobalVersions.md#version-10) +2. Optmization of validation process, in particular CellStorageStat. +3. Flag for speeding up broadcasts in various overlays. +4. Fixes for static builds for emulator and tonlibjson +5. Improving getstats output: adds + * Liteserver queries count + * Collated/validated blocks count, number of active sessions + * Persistent state sizes + * Initial sync progress +6. Fixes in logging, TON Storage, external message checking, persistent state downloading, UB in tonlib + +Besides the work of the core team, this update is based on the efforts of @Sild from StonFi(UB in tonlib). + ## 2025.02 Update 1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersions.md](./doc/GlobalVersions.md) 2. Fix for better discovery of updated nodes' (validators') IPs: retry dht queries diff --git a/recent_changelog.md b/recent_changelog.md index dfa39aa6..820d2aa4 100644 --- a/recent_changelog.md +++ b/recent_changelog.md @@ -1,12 +1,13 @@ -## 2025.02 Update -1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersions.md](./doc/GlobalVersions.md) -2. Fix for better discovery of updated nodes' (validators') IPs: retry dht queries -3. Series of improvements for extra currency adoption: fixed c7 in rungetmethod, reserve modes -4. TVM: Fix processing continuation control data on deep jump -5. A few fixes of tl-b schemes: crc computation, incorrect tag for merkle proofs, advance_ext, NatWidth print -6. Emulator improvements: fix setting libraries, extracurrency support -7. Increase of gas limit for unlocking highload-v2 wallets locked in the beginning of 2024 -8. Validator console improvement: dashed names, better shard formats +## 2025.03 Update +1. New extracurrency behavior introduced, check [GlobalVersions.md](./doc/GlobalVersions.md#version-10) +2. Optmization of validation process, in particular CellStorageStat. +3. Flag for speeding up broadcasts in various overlays. +4. Fixes for static builds for emulator and tonlibjson +5. Improving getstats output: add + * Liteserver queries count + * Collated/validated blocks count, number of active sessions + * Persistent state sizes + * Initial sync progress +6. Fixes in logging, TON Storage, external message checking, persistent state downloading, UB in tonlib - -Besides the work of the core team, this update is based on the efforts of @dbaranovstonfi from StonFi(libraries in emulator), @Rexagon (ret on deep jumps), @tvorogme from DTon (`advance_ext`), Nan from Zellic (`stk_und` and JNI) +Besides the work of the core team, this update is based on the efforts of @Sild from StonFi(UB in tonlib).