diff --git a/.github/workflows/docker-ubuntu-branch-image.yml b/.github/workflows/docker-ubuntu-branch-image.yml index afde104a..00aa5015 100644 --- a/.github/workflows/docker-ubuntu-branch-image.yml +++ b/.github/workflows/docker-ubuntu-branch-image.yml @@ -20,10 +20,12 @@ jobs: submodules: 'recursive' - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v3.5.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v3.10.0 + with: + driver-opts: image=moby/buildkit:v0.11.0 - name: Login to GitHub Container Registry uses: docker/login-action@v3 @@ -32,6 +34,17 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Build and export to Docker + uses: docker/build-push-action@v6 + with: + load: true + context: ./ + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test + + - name: Test + run: | + docker run --rm -e "TEST=1" ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:test + - name: Get tag as branch name id: tag run: | diff --git a/.github/workflows/docker-ubuntu-image.yml b/.github/workflows/docker-ubuntu-image.yml index 48c553ef..aa4eaeef 100644 --- a/.github/workflows/docker-ubuntu-image.yml +++ b/.github/workflows/docker-ubuntu-image.yml @@ -20,10 +20,10 @@ jobs: submodules: 'recursive' - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - + uses: docker/setup-qemu-action@v3.5.0 + - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v3.10.0 - name: Login to GitHub Container Registry uses: docker/login-action@v3 diff --git a/Changelog.md b/Changelog.md index 34195f74..4dce39fc 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,3 +1,17 @@ +## 2025.03 Update +1. New extracurrency behavior introduced, check [GlobalVersions.md](./doc/GlobalVersions.md#version-10) +2. Optmization of validation process, in particular CellStorageStat. +3. Flag for speeding up broadcasts in various overlays. +4. Fixes for static builds for emulator and tonlibjson +5. Improving getstats output: adds + * Liteserver queries count + * Collated/validated blocks count, number of active sessions + * Persistent state sizes + * Initial sync progress +6. Fixes in logging, TON Storage, external message checking, persistent state downloading, UB in tonlib + +Besides the work of the core team, this update is based on the efforts of @Sild from StonFi(UB in tonlib). + ## 2025.02 Update 1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersions.md](./doc/GlobalVersions.md) 2. Fix for better discovery of updated nodes' (validators') IPs: retry dht queries diff --git a/Dockerfile b/Dockerfile index 61e18c0b..f1b836bf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,13 @@ FROM ubuntu:22.04 AS builder +ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential cmake clang openssl libssl-dev zlib1g-dev gperf wget git ninja-build libsodium-dev libmicrohttpd-dev liblz4-dev pkg-config autoconf automake libtool libjemalloc-dev lsb-release software-properties-common gnupg + rm /var/lib/dpkg/info/libc-bin.* && \ + apt-get clean && \ + apt-get update && \ + apt install libc-bin && \ + apt-get install -y build-essential cmake clang openssl libssl-dev zlib1g-dev gperf wget git \ + ninja-build libsodium-dev libmicrohttpd-dev liblz4-dev pkg-config autoconf automake libtool \ + libjemalloc-dev lsb-release software-properties-common gnupg RUN wget https://apt.llvm.org/llvm.sh && \ chmod +x llvm.sh && \ @@ -25,6 +32,7 @@ RUN mkdir build && \ blockchain-explorer emulator tonlibjson http-proxy adnl-proxy FROM ubuntu:22.04 +ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y wget curl libatomic1 openssl libsodium-dev libmicrohttpd-dev liblz4-dev libjemalloc-dev htop \ net-tools netcat iptraf-ng jq tcpdump pv plzip && \ diff --git a/catchain/catchain-receiver.cpp b/catchain/catchain-receiver.cpp index edef9065..a6160383 100644 --- a/catchain/catchain-receiver.cpp +++ b/catchain/catchain-receiver.cpp @@ -526,10 +526,12 @@ void CatChainReceiverImpl::start_up() { for (td::uint32 i = 0; i < get_sources_cnt(); i++) { root_keys.emplace(get_source(i)->get_hash(), OVERLAY_MAX_ALLOWED_PACKET_SIZE); } - td::actor::send_closure(overlay_manager_, &overlay::Overlays::create_private_overlay, + overlay::OverlayOptions overlay_options; + overlay_options.broadcast_speed_multiplier_ = opts_.broadcast_speed_multiplier; + td::actor::send_closure(overlay_manager_, &overlay::Overlays::create_private_overlay_ex, get_source(local_idx_)->get_adnl_id(), overlay_full_id_.clone(), std::move(ids), make_callback(), overlay::OverlayPrivacyRules{0, 0, std::move(root_keys)}, - R"({ "type": "catchain" })"); + R"({ "type": "catchain" })", std::move(overlay_options)); CHECK(root_block_); diff --git a/common/global-version.h b/common/global-version.h index 533e5e8d..2308ce3e 100644 --- a/common/global-version.h +++ b/common/global-version.h @@ -19,6 +19,6 @@ namespace ton { // See doc/GlobalVersions.md -const int SUPPORTED_VERSION = 9; +constexpr int SUPPORTED_VERSION = 10; } diff --git a/crypto/block/block.cpp b/crypto/block/block.cpp index 302a2aa4..e0782240 100644 --- a/crypto/block/block.cpp +++ b/crypto/block/block.cpp @@ -360,7 +360,6 @@ MsgProcessedUptoCollection::MsgProcessedUptoCollection(ton::ShardIdFull _owner, z.shard = key.get_uint(64); z.mc_seqno = (unsigned)((key + 64).get_uint(32)); z.last_inmsg_lt = value.write().fetch_ulong(64); - // std::cerr << "ProcessedUpto shard " << std::hex << z.shard << std::dec << std::endl; return value.write().fetch_bits_to(z.last_inmsg_hash) && z.shard && ton::shard_contains(owner.shard, z.shard); }); } @@ -862,8 +861,10 @@ td::Status ShardState::unpack_out_msg_queue_info(Ref out_msg_queue_inf out_msg_queue_ = std::make_unique(std::move(qinfo.out_queue), 352, block::tlb::aug_OutMsgQueue); if (verbosity >= 3 * 1) { - LOG(DEBUG) << "unpacking ProcessedUpto of our previous block " << id_.to_str(); - block::gen::t_ProcessedInfo.print(std::cerr, qinfo.proc_info); + FLOG(DEBUG) { + sb << "unpacking ProcessedUpto of our previous block " << id_.to_str(); + block::gen::t_ProcessedInfo.print(sb, qinfo.proc_info); + }; } if (!block::gen::t_ProcessedInfo.validate_csr(1024, qinfo.proc_info)) { return td::Status::Error( @@ -1349,6 +1350,35 @@ bool CurrencyCollection::clamp(const CurrencyCollection& other) { return ok || invalidate(); } +bool CurrencyCollection::check_extra_currency_limit(td::uint32 max_currencies) const { + td::uint32 count = 0; + return vm::Dictionary{extra, 32}.check_for_each([&](td::Ref, td::ConstBitPtr, int) { + ++count; + return count <= max_currencies; + }); +} + +bool CurrencyCollection::remove_zero_extra_currencies(Ref& root, td::uint32 max_currencies) { + td::uint32 count = 0; + vm::Dictionary dict{root, 32}; + int res = dict.filter([&](const vm::CellSlice& cs, td::ConstBitPtr, int) -> int { + ++count; + if (count > max_currencies) { + return -1; + } + td::RefInt256 val = tlb::t_VarUInteger_32.as_integer(cs); + if (val.is_null()) { + return -1; + } + return val->sgn() > 0; + }); + if (res < 0) { + return false; + } + root = dict.get_root_cell(); + return true; +} + bool CurrencyCollection::operator==(const CurrencyCollection& other) const { return is_valid() && other.is_valid() && !td::cmp(grams, other.grams) && (extra.not_null() == other.extra.not_null()) && diff --git a/crypto/block/block.h b/crypto/block/block.h index f64f00a8..685005b4 100644 --- a/crypto/block/block.h +++ b/crypto/block/block.h @@ -391,6 +391,8 @@ struct CurrencyCollection { CurrencyCollection operator-(CurrencyCollection&& other) const; CurrencyCollection operator-(td::RefInt256 other_grams) const; bool clamp(const CurrencyCollection& other); + bool check_extra_currency_limit(td::uint32 max_currencies) const; + static bool remove_zero_extra_currencies(Ref& root, td::uint32 max_currencies); bool store(vm::CellBuilder& cb) const; bool store_or_zero(vm::CellBuilder& cb) const; bool fetch(vm::CellSlice& cs); diff --git a/crypto/block/block.tlb b/crypto/block/block.tlb index b8b40827..4a8bbc06 100644 --- a/crypto/block/block.tlb +++ b/crypto/block/block.tlb @@ -801,7 +801,7 @@ size_limits_config#01 max_msg_bits:uint32 max_msg_cells:uint32 max_library_cells max_ext_msg_size:uint32 max_ext_msg_depth:uint16 = SizeLimitsConfig; size_limits_config_v2#02 max_msg_bits:uint32 max_msg_cells:uint32 max_library_cells:uint32 max_vm_data_depth:uint16 max_ext_msg_size:uint32 max_ext_msg_depth:uint16 max_acc_state_cells:uint32 max_acc_state_bits:uint32 - max_acc_public_libraries:uint32 defer_out_queue_size_limit:uint32 = SizeLimitsConfig; + max_acc_public_libraries:uint32 defer_out_queue_size_limit:uint32 max_msg_extra_currencies:uint32 = SizeLimitsConfig; _ SizeLimitsConfig = ConfigParam 43; // key is [ wc:int32 addr:uint256 ] diff --git a/crypto/block/mc-config.cpp b/crypto/block/mc-config.cpp index 14881913..0f019b06 100644 --- a/crypto/block/mc-config.cpp +++ b/crypto/block/mc-config.cpp @@ -163,8 +163,11 @@ td::Status ConfigInfo::unpack() { } gen::McStateExtra::Record extra_info; if (!tlb::unpack_cell(state_extra_root_, extra_info)) { - vm::load_cell_slice(state_extra_root_).print_rec(std::cerr); - block::gen::t_McStateExtra.print_ref(std::cerr, state_extra_root_); + FLOG(WARNING) { + sb << "state extra information is invalid: "; + vm::load_cell_slice(state_extra_root_).print_rec(sb); + block::gen::t_McStateExtra.print_ref(sb, state_extra_root_); + }; return td::Status::Error("state extra information is invalid"); } gen::ValidatorInfo::Record validator_info; @@ -1067,7 +1070,6 @@ Ref ShardConfig::get_shard_hash(ton::ShardIdFull id, bool exact) co ton::ShardIdFull true_id; vm::CellSlice cs; if (get_shard_hash_raw(cs, id, true_id, exact)) { - // block::gen::t_ShardDescr.print(std::cerr, vm::CellSlice{cs}); return McShardHash::unpack(cs, true_id); } else { return {}; @@ -1637,8 +1639,10 @@ bool ShardConfig::set_shard_info(ton::ShardIdFull shard, Ref value) { if (!gen::t_BinTree_ShardDescr.validate_ref(1024, value)) { LOG(ERROR) << "attempting to store an invalid (BinTree ShardDescr) at shard configuration position " << shard.to_str(); - gen::t_BinTree_ShardDescr.print_ref(std::cerr, value); - vm::load_cell_slice(value).print_rec(std::cerr); + FLOG(WARNING) { + gen::t_BinTree_ShardDescr.print_ref(sb, value); + vm::load_cell_slice(value).print_rec(sb); + }; return false; } auto root = shard_hashes_dict_->lookup_ref(td::BitArray<32>{shard.workchain}); @@ -1956,6 +1960,7 @@ td::Result Config::do_get_size_limits_config(td::Ref outmsg_root) { if (outmsg_root.is_null()) { return true; } - //block::gen::HashmapAug{352, block::gen::t_EnqueuedMsg, block::gen::t_uint64}.print_ref(std::cerr, outmsg_root); auto kv = std::make_unique(src, std::move(outmsg_root)); if (kv->replace_by_prefix(common_pfx.cbits(), common_pfx_len)) { heap.push_back(std::move(kv)); diff --git a/crypto/block/transaction.cpp b/crypto/block/transaction.cpp index ba50c581..34d23511 100644 --- a/crypto/block/transaction.cpp +++ b/crypto/block/transaction.cpp @@ -446,8 +446,10 @@ bool Account::unpack(Ref shard_account, ton::UnixTime now, bool s return false; } if (verbosity > 2) { - shard_account->print_rec(std::cerr, 2); - block::gen::t_ShardAccount.print(std::cerr, *shard_account); + FLOG(INFO) { + shard_account->print_rec(sb, 2); + block::gen::t_ShardAccount.print(sb, shard_account); + }; } block::gen::ShardAccount::Record acc_info; if (!(block::tlb::t_ShardAccount.validate_csr(shard_account) && tlb::unpack_exact(shard_account.write(), acc_info))) { @@ -737,9 +739,11 @@ bool Transaction::unpack_input_msg(bool ihr_delivered, const ActionPhaseConfig* return false; } if (verbosity > 2) { - fprintf(stderr, "unpacking inbound message for a new transaction: "); - block::gen::t_Message_Any.print_ref(std::cerr, in_msg); - load_cell_slice(in_msg).print_rec(std::cerr); + FLOG(INFO) { + sb << "unpacking inbound message for a new transaction: "; + block::gen::t_Message_Any.print_ref(sb, in_msg); + load_cell_slice(in_msg).print_rec(sb); + }; } auto cs = vm::load_cell_slice(in_msg); int tag = block::gen::t_CommonMsgInfo.get_tag(cs); @@ -1550,11 +1554,13 @@ bool Transaction::run_precompiled_contract(const ComputePhaseConfig& cfg, precom cp.actions = impl.get_c5(); int out_act_num = output_actions_count(cp.actions); if (verbosity > 2) { - std::cerr << "new smart contract data: "; - bool can_be_special = true; - load_cell_slice_special(cp.new_data, can_be_special).print_rec(std::cerr); - std::cerr << "output actions: "; - block::gen::OutList{out_act_num}.print_ref(std::cerr, cp.actions); + FLOG(INFO) { + sb << "new smart contract data: "; + bool can_be_special = true; + load_cell_slice_special(cp.new_data, can_be_special).print_rec(sb); + sb << "output actions: "; + block::gen::OutList{out_act_num}.print_ref(sb, cp.actions); + }; } } cp.mode = 0; @@ -1619,7 +1625,6 @@ bool Transaction::prepare_compute_phase(const ComputePhaseConfig& cfg) { if (in_msg_state.not_null()) { LOG(DEBUG) << "HASH(in_msg_state) = " << in_msg_state->get_hash().bits().to_hex(256) << ", account_state_hash = " << account.state_hash.to_hex(); - // vm::load_cell_slice(in_msg_state).print_rec(std::cerr); } else { LOG(DEBUG) << "in_msg_state is null"; } @@ -1775,11 +1780,13 @@ bool Transaction::prepare_compute_phase(const ComputePhaseConfig& cfg) { cp.actions = vm.get_committed_state().c5; // c5 -> action list int out_act_num = output_actions_count(cp.actions); if (verbosity > 2) { - std::cerr << "new smart contract data: "; - bool can_be_special = true; - load_cell_slice_special(cp.new_data, can_be_special).print_rec(std::cerr); - std::cerr << "output actions: "; - block::gen::OutList{out_act_num}.print_ref(std::cerr, cp.actions); + FLOG(INFO) { + sb << "new smart contract data: "; + bool can_be_special = true; + load_cell_slice_special(cp.new_data, can_be_special).print_rec(sb); + sb << "output actions: "; + block::gen::OutList{out_act_num}.print_ref(sb, cp.actions); + }; } } cp.mode = 0; @@ -1993,9 +2000,9 @@ bool Transaction::prepare_action_phase(const ActionPhaseConfig& cfg) { ap.remaining_balance += ap.reserved_balance; CHECK(ap.remaining_balance.is_valid()); if (ap.acc_delete_req) { - CHECK(ap.remaining_balance.is_zero()); + CHECK(cfg.extra_currency_v2 ? ap.remaining_balance.grams->sgn() == 0 : ap.remaining_balance.is_zero()); ap.acc_status_change = ActionPhase::acst_deleted; - acc_status = Account::acc_deleted; + acc_status = (ap.remaining_balance.is_zero() ? Account::acc_deleted : Account::acc_uninit); was_deleted = true; } ap.success = true; @@ -2465,6 +2472,20 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, LOG(DEBUG) << "invalid destination address in a proposed outbound message"; return check_skip_invalid(36); // invalid destination address } + if (cfg.extra_currency_v2) { + CurrencyCollection value; + if (!value.unpack(info.value)) { + LOG(DEBUG) << "invalid value:ExtraCurrencies in a proposed outbound message"; + return check_skip_invalid(37); // invalid value:CurrencyCollection + } + if (!CurrencyCollection::remove_zero_extra_currencies(value.extra, cfg.size_limits.max_msg_extra_currencies)) { + LOG(DEBUG) << "invalid value:ExtraCurrencies in a proposed outbound message: too many currencies (max " + << cfg.size_limits.max_msg_extra_currencies << ")"; + // Dict should be valid, since it was checked in t_OutListNode.validate_ref, so error here means limit exceeded + return check_skip_invalid(41); // invalid value:CurrencyCollection : too many extra currencies + } + info.value = value.pack(); + } // fetch message pricing info const MsgPrices& msg_prices = cfg.fetch_msg_prices(to_mc || account.is_masterchain()); @@ -2517,7 +2538,7 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, }; add_used_storage(msg.init, 3); // message init add_used_storage(msg.body, 3); // message body (the root cell itself is not counted) - if (!ext_msg) { + if (!ext_msg && !cfg.extra_currency_v2) { add_used_storage(info.value->prefetch_ref(), 0); } auto collect_fine = [&] { @@ -2588,11 +2609,19 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, if (act_rec.mode & 0x80) { // attach all remaining balance to this message - req = ap.remaining_balance; + if (cfg.extra_currency_v2) { + req.grams = ap.remaining_balance.grams; + } else { + req = ap.remaining_balance; + } act_rec.mode &= ~1; // pay fees from attached value } else if (act_rec.mode & 0x40) { // attach all remaining balance of the inbound message (in addition to the original value) - req += msg_balance_remaining; + if (cfg.extra_currency_v2) { + req.grams += msg_balance_remaining.grams; + } else { + req += msg_balance_remaining; + } if (!(act_rec.mode & 1)) { req -= ap.action_fine; if (compute_phase) { @@ -2632,6 +2661,11 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, return check_skip_invalid(37); // not enough grams } + if (cfg.extra_currency_v2 && !req.check_extra_currency_limit(cfg.size_limits.max_msg_extra_currencies)) { + LOG(DEBUG) << "too many extra currencies in the message : max " << cfg.size_limits.max_msg_extra_currencies; + return check_skip_invalid(41); // to many extra currencies + } + Ref new_extra; if (!block::sub_extra_currency(ap.remaining_balance.extra, req.extra, new_extra)) { @@ -2673,7 +2707,11 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, // clear msg_balance_remaining if it has been used if (act_rec.mode & 0xc0) { - msg_balance_remaining.set_zero(); + if (cfg.extra_currency_v2) { + msg_balance_remaining.grams = td::zero_refint(); + } else { + msg_balance_remaining.set_zero(); + } } // update balance @@ -2725,14 +2763,18 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, } if (!block::gen::t_Message_Any.validate_ref(new_msg)) { LOG(ERROR) << "generated outbound message is not a valid (Message Any) according to automated check"; - block::gen::t_Message_Any.print_ref(std::cerr, new_msg); - vm::load_cell_slice(new_msg).print_rec(std::cerr); + FLOG(INFO) { + block::gen::t_Message_Any.print_ref(sb, new_msg); + vm::load_cell_slice(new_msg).print_rec(sb); + }; collect_fine(); return -1; } if (verbosity > 2) { - std::cerr << "converted outbound message: "; - block::gen::t_Message_Any.print_ref(std::cerr, new_msg); + FLOG(INFO) { + sb << "converted outbound message: "; + block::gen::t_Message_Any.print_ref(sb, new_msg); + }; } ap.msgs_created++; @@ -2743,8 +2785,13 @@ int Transaction::try_action_send_msg(const vm::CellSlice& cs0, ActionPhase& ap, ap.total_fwd_fees += fees_total; if ((act_rec.mode & 0xa0) == 0xa0) { - CHECK(ap.remaining_balance.is_zero()); - ap.acc_delete_req = ap.reserved_balance.is_zero(); + if (cfg.extra_currency_v2) { + CHECK(ap.remaining_balance.grams->sgn() == 0); + ap.acc_delete_req = ap.reserved_balance.grams->sgn() == 0; + } else { + CHECK(ap.remaining_balance.is_zero()); + ap.acc_delete_req = ap.reserved_balance.is_zero(); + } } ap.tot_msg_bits += sstat.bits + new_msg_bits; @@ -3015,7 +3062,8 @@ bool Transaction::prepare_bounce_phase(const ActionPhaseConfig& cfg) { bp.fwd_fees -= bp.fwd_fees_collected; total_fees += td::make_refint(bp.fwd_fees_collected); // serialize outbound message - info.created_lt = end_lt++; + info.created_lt = start_lt + 1 + out_msgs.size(); + end_lt++; info.created_at = now; vm::CellBuilder cb; CHECK(cb.store_long_bool(5, 4) // int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool @@ -3045,8 +3093,10 @@ bool Transaction::prepare_bounce_phase(const ActionPhaseConfig& cfg) { } CHECK(cb.finalize_to(bp.out_msg)); if (verbosity > 2) { - LOG(INFO) << "generated bounced message: "; - block::gen::t_Message_Any.print_ref(std::cerr, bp.out_msg); + FLOG(INFO) { + sb << "generated bounced message: "; + block::gen::t_Message_Any.print_ref(sb, bp.out_msg); + }; } out_msgs.push_back(bp.out_msg); bp.ok = true; @@ -3094,6 +3144,7 @@ bool Account::store_acc_status(vm::CellBuilder& cb, int acc_status) const { * Tries to update the storage statistics based on the old storage statistics and old account state without fully recomputing it. * * It succeeds if only root cell of AccountStorage is changed. + * old_cs and new_cell are AccountStorage without extra currencies (if global_version >= 10). * * @param old_stat The old storage statistics. * @param old_cs The old AccountStorage. @@ -3127,13 +3178,48 @@ static td::optional try_update_storage_stat(const vm::CellS return new_stat; } +/** + * Removes extra currencies dict from AccountStorage. + * + * This is used for computing account storage stats. + * + * @param storage_cs AccountStorage as CellSlice. + * + * @returns AccountStorage without extra currencies as Cell. + */ +static td::Ref storage_without_extra_currencies(td::Ref storage_cs) { + block::gen::AccountStorage::Record rec; + if (!block::gen::csr_unpack(storage_cs, rec)) { + LOG(ERROR) << "failed to unpack AccountStorage"; + return {}; + } + if (rec.balance->size_refs() > 0) { + block::gen::CurrencyCollection::Record balance; + if (!block::gen::csr_unpack(rec.balance, balance)) { + LOG(ERROR) << "failed to unpack AccountStorage"; + return {}; + } + balance.other = vm::CellBuilder{}.store_zeroes(1).as_cellslice_ref(); + if (!block::gen::csr_pack(rec.balance, balance)) { + LOG(ERROR) << "failed to pack AccountStorage"; + return {}; + } + } + td::Ref cell; + if (!block::gen::pack_cell(cell, rec)) { + LOG(ERROR) << "failed to pack AccountStorage"; + return {}; + } + return cell; +} + namespace transaction { /** * Computes the new state of the account. * * @returns True if the state computation is successful, false otherwise. */ -bool Transaction::compute_state() { +bool Transaction::compute_state(const SerializeConfig& cfg) { if (new_total_state.not_null()) { return true; } @@ -3167,11 +3253,13 @@ bool Transaction::compute_state() { auto frozen_state = cb2.finalize(); frozen_hash = frozen_state->get_hash().bits(); if (verbosity >= 3 * 1) { // !!!DEBUG!!! - std::cerr << "freezing state of smart contract: "; - block::gen::t_StateInit.print_ref(std::cerr, frozen_state); - CHECK(block::gen::t_StateInit.validate_ref(frozen_state)); - CHECK(block::tlb::t_StateInit.validate_ref(frozen_state)); - std::cerr << "with hash " << frozen_hash.to_hex() << std::endl; + FLOG(INFO) { + sb << "freezing state of smart contract: "; + block::gen::t_StateInit.print_ref(sb, frozen_state); + CHECK(block::gen::t_StateInit.validate_ref(frozen_state)); + CHECK(block::tlb::t_StateInit.validate_ref(frozen_state)); + sb << "with hash " << frozen_hash.to_hex(); + }; } } new_code.clear(); @@ -3203,13 +3291,27 @@ bool Transaction::compute_state() { new_inner_state.clear(); } vm::CellStorageStat& stats = new_storage_stat; - auto new_stats = try_update_storage_stat(account.storage_stat, account.storage, storage); + td::Ref old_storage_for_stat = account.storage; + td::Ref new_storage_for_stat = storage; + if (cfg.extra_currency_v2) { + new_storage_for_stat = storage_without_extra_currencies(new_storage); + if (new_storage_for_stat.is_null()) { + return false; + } + if (old_storage_for_stat.not_null()) { + old_storage_for_stat = vm::load_cell_slice_ref(storage_without_extra_currencies(old_storage_for_stat)); + if (old_storage_for_stat.is_null()) { + return false; + } + } + } + auto new_stats = try_update_storage_stat(account.storage_stat, old_storage_for_stat, storage); if (new_stats) { stats = new_stats.unwrap(); } else { TD_PERF_COUNTER(transaction_storage_stat_b); td::Timer timer; - stats.add_used_storage(Ref(storage)).ensure(); + stats.add_used_storage(new_storage_for_stat).ensure(); if (timer.elapsed() > 0.1) { LOG(INFO) << "Compute used storage took " << timer.elapsed() << "s"; } @@ -3229,8 +3331,10 @@ bool Transaction::compute_state() { CHECK(cb.append_data_cell_bool(std::move(storage))); new_total_state = cb.finalize(); if (verbosity > 2) { - std::cerr << "new account state: "; - block::gen::t_Account.print_ref(std::cerr, new_total_state); + FLOG(INFO) { + sb << "new account state: "; + block::gen::t_Account.print_ref(sb, new_total_state); + }; } CHECK(block::tlb::t_Account.validate_ref(new_total_state)); return true; @@ -3243,11 +3347,11 @@ bool Transaction::compute_state() { * * @returns True if the serialization is successful, False otherwise. */ -bool Transaction::serialize() { +bool Transaction::serialize(const SerializeConfig& cfg) { if (root.not_null()) { return true; } - if (!compute_state()) { + if (!compute_state(cfg)) { return false; } vm::Dictionary dict{15}; @@ -3322,22 +3426,28 @@ bool Transaction::serialize() { return false; } if (verbosity >= 3 * 1) { - std::cerr << "new transaction: "; - block::gen::t_Transaction.print_ref(std::cerr, root); - vm::load_cell_slice(root).print_rec(std::cerr); + FLOG(INFO) { + sb << "new transaction: "; + block::gen::t_Transaction.print_ref(sb, root); + vm::load_cell_slice(root).print_rec(sb); + }; } if (!block::gen::t_Transaction.validate_ref(4096, root)) { LOG(ERROR) << "newly-generated transaction failed to pass automated validation:"; - vm::load_cell_slice(root).print_rec(std::cerr); - block::gen::t_Transaction.print_ref(std::cerr, root); + FLOG(INFO) { + vm::load_cell_slice(root).print_rec(sb); + block::gen::t_Transaction.print_ref(sb, root); + }; root.clear(); return false; } if (!block::tlb::t_Transaction.validate_ref(4096, root)) { LOG(ERROR) << "newly-generated transaction failed to pass hand-written validation:"; - vm::load_cell_slice(root).print_rec(std::cerr); - block::gen::t_Transaction.print_ref(std::cerr, root); + FLOG(INFO) { + vm::load_cell_slice(root).print_rec(sb); + block::gen::t_Transaction.print_ref(sb, root); + }; root.clear(); return false; } @@ -3707,6 +3817,7 @@ bool Account::libraries_changed() const { * @param rand_seed Pointer to the random seed. Generates a new seed if the value is `td::Bits256::zero()`. * @param compute_phase_cfg Pointer to store the compute phase configuration. * @param action_phase_cfg Pointer to store the action phase configuration. + * @param serialize_cfg Pointer to store the serialize phase configuration. * @param masterchain_create_fee Pointer to store the masterchain create fee. * @param basechain_create_fee Pointer to store the basechain create fee. * @param wc The workchain ID. @@ -3715,15 +3826,15 @@ bool Account::libraries_changed() const { td::Status FetchConfigParams::fetch_config_params( const block::ConfigInfo& config, Ref* old_mparams, std::vector* storage_prices, StoragePhaseConfig* storage_phase_cfg, td::BitArray<256>* rand_seed, ComputePhaseConfig* compute_phase_cfg, - ActionPhaseConfig* action_phase_cfg, td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, - ton::WorkchainId wc, ton::UnixTime now) { + ActionPhaseConfig* action_phase_cfg, SerializeConfig* serialize_cfg, td::RefInt256* masterchain_create_fee, + td::RefInt256* basechain_create_fee, ton::WorkchainId wc, ton::UnixTime now) { auto prev_blocks_info = config.get_prev_blocks_info(); if (prev_blocks_info.is_error()) { return prev_blocks_info.move_as_error_prefix( td::Status::Error(-668, "cannot fetch prev blocks info from masterchain configuration: ")); } return fetch_config_params(config, prev_blocks_info.move_as_ok(), old_mparams, storage_prices, storage_phase_cfg, - rand_seed, compute_phase_cfg, action_phase_cfg, masterchain_create_fee, + rand_seed, compute_phase_cfg, action_phase_cfg, serialize_cfg, masterchain_create_fee, basechain_create_fee, wc, now); } @@ -3738,6 +3849,7 @@ td::Status FetchConfigParams::fetch_config_params( * @param rand_seed Pointer to the random seed. Generates a new seed if the value is `td::Bits256::zero()`. * @param compute_phase_cfg Pointer to store the compute phase configuration. * @param action_phase_cfg Pointer to store the action phase configuration. + * @param serialize_cfg Pointer to store the serialize phase configuration. * @param masterchain_create_fee Pointer to store the masterchain create fee. * @param basechain_create_fee Pointer to store the basechain create fee. * @param wc The workchain ID. @@ -3747,8 +3859,8 @@ td::Status FetchConfigParams::fetch_config_params( const block::Config& config, td::Ref prev_blocks_info, Ref* old_mparams, std::vector* storage_prices, StoragePhaseConfig* storage_phase_cfg, td::BitArray<256>* rand_seed, ComputePhaseConfig* compute_phase_cfg, ActionPhaseConfig* action_phase_cfg, - td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, ton::WorkchainId wc, - ton::UnixTime now) { + SerializeConfig* serialize_cfg, td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, + ton::WorkchainId wc, ton::UnixTime now) { *old_mparams = config.get_config_param(9); { auto res = config.get_storage_prices(); @@ -3820,6 +3932,10 @@ td::Status FetchConfigParams::fetch_config_params( action_phase_cfg->disable_custom_fess = config.get_global_version() >= 8; action_phase_cfg->reserve_extra_enabled = config.get_global_version() >= 9; action_phase_cfg->mc_blackhole_addr = config.get_burning_config().blackhole_addr; + action_phase_cfg->extra_currency_v2 = config.get_global_version() >= 10; + } + { + serialize_cfg->extra_currency_v2 = config.get_global_version() >= 10; } { // fetch block_grams_created diff --git a/crypto/block/transaction.h b/crypto/block/transaction.h index 0f6952dc..8e612e6a 100644 --- a/crypto/block/transaction.h +++ b/crypto/block/transaction.h @@ -170,12 +170,17 @@ struct ActionPhaseConfig { bool message_skip_enabled{false}; bool disable_custom_fess{false}; bool reserve_extra_enabled{false}; + bool extra_currency_v2{false}; td::optional mc_blackhole_addr; const MsgPrices& fetch_msg_prices(bool is_masterchain) const { return is_masterchain ? fwd_mc : fwd_std; } }; +struct SerializeConfig { + bool extra_currency_v2{false}; +}; + struct CreditPhase { td::RefInt256 due_fees_collected; block::CurrencyCollection credit; @@ -389,8 +394,8 @@ struct Transaction { bool prepare_action_phase(const ActionPhaseConfig& cfg); td::Status check_state_limits(const SizeLimitsConfig& size_limits, bool update_storage_stat = true); bool prepare_bounce_phase(const ActionPhaseConfig& cfg); - bool compute_state(); - bool serialize(); + bool compute_state(const SerializeConfig& cfg); + bool serialize(const SerializeConfig& cfg); td::uint64 gas_used() const { return compute_phase ? compute_phase->gas_used : 0; } @@ -428,14 +433,14 @@ struct FetchConfigParams { std::vector* storage_prices, StoragePhaseConfig* storage_phase_cfg, td::BitArray<256>* rand_seed, ComputePhaseConfig* compute_phase_cfg, ActionPhaseConfig* action_phase_cfg, - td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, - ton::WorkchainId wc, ton::UnixTime now); + SerializeConfig* serialize_cfg, td::RefInt256* masterchain_create_fee, + td::RefInt256* basechain_create_fee, ton::WorkchainId wc, ton::UnixTime now); static td::Status fetch_config_params(const block::Config& config, Ref prev_blocks_info, Ref* old_mparams, std::vector* storage_prices, StoragePhaseConfig* storage_phase_cfg, td::BitArray<256>* rand_seed, ComputePhaseConfig* compute_phase_cfg, ActionPhaseConfig* action_phase_cfg, - td::RefInt256* masterchain_create_fee, td::RefInt256* basechain_create_fee, - ton::WorkchainId wc, ton::UnixTime now); + SerializeConfig* serialize_cfg, td::RefInt256* masterchain_create_fee, + td::RefInt256* basechain_create_fee, ton::WorkchainId wc, ton::UnixTime now); }; } // namespace block diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index 5311ec2f..82757c22 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -1,7 +1,7 @@ // Standard library for Tolk (LGPL licence). // It contains common functions that are available out of the box, the user doesn't have to import anything. // More specific functions are required to be imported explicitly, like "@stdlib/tvm-dicts". -tolk 0.8 +tolk 0.9 /** Tuple manipulation primitives. @@ -139,7 +139,7 @@ fun getMyOriginalBalance(): int /// `int` — balance in nanotoncoins; /// `cell` — a dictionary with 32-bit keys representing the balance of "extra currencies". @pure -fun getMyOriginalBalanceWithExtraCurrencies(): [int, cell] +fun getMyOriginalBalanceWithExtraCurrencies(): [int, cell?] asm "BALANCE"; /// Returns the logical time of the current transaction. @@ -154,7 +154,7 @@ fun getCurrentBlockLogicalTime(): int /// Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. @pure -fun getBlockchainConfigParam(x: int): cell +fun getBlockchainConfigParam(x: int): cell? asm "CONFIGOPTPARAM"; /// Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. @@ -291,7 +291,7 @@ fun calculateSliceSizeStrict(s: slice, maxCells: int): (int, int, int) /// otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. /// If [c] is a `null` instead of a cell, returns zero. @pure -fun getCellDepth(c: cell): int +fun getCellDepth(c: cell?): int asm "CDEPTH"; /// Returns the depth of `slice` [s]. @@ -417,12 +417,12 @@ fun getLastBits(self: slice, len: int): slice /// Loads a dictionary (TL HashMapE structure, represented as TVM cell) from a slice. /// Returns `null` if `nothing` constructor is used. @pure -fun loadDict(mutate self: slice): cell +fun loadDict(mutate self: slice): cell? asm( -> 1 0) "LDDICT"; /// Preloads a dictionary (cell) from a slice. @pure -fun preloadDict(self: slice): cell +fun preloadDict(self: slice): cell? asm "PLDDICT"; /// Loads a dictionary as [loadDict], but returns only the remainder of the slice. @@ -433,12 +433,12 @@ fun skipDict(mutate self: slice): self /// Loads (Maybe ^Cell) from a slice. /// In other words, loads 1 bit: if it's true, loads the first ref, otherwise returns `null`. @pure -fun loadMaybeRef(mutate self: slice): cell +fun loadMaybeRef(mutate self: slice): cell? asm( -> 1 0) "LDOPTREF"; /// Preloads (Maybe ^Cell) from a slice. @pure -fun preloadMaybeRef(self: slice): cell +fun preloadMaybeRef(self: slice): cell? asm "PLDOPTREF"; /// Loads (Maybe ^Cell), but returns only the remainder of the slice. @@ -497,13 +497,13 @@ fun storeBool(mutate self: builder, x: bool): self /// Stores dictionary (represented by TVM `cell` or `null`) into a builder. /// In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. @pure -fun storeDict(mutate self: builder, c: cell): self +fun storeDict(mutate self: builder, c: cell?): self asm(c self) "STDICT"; /// Stores (Maybe ^Cell) into a builder. /// In other words, if cell is `null`, store '0' bit; otherwise, store '1' and a ref to [c]. @pure -fun storeMaybeRef(mutate self: builder, c: cell): self +fun storeMaybeRef(mutate self: builder, c: cell?): self asm(c self) "STOPTREF"; /// Concatenates two builders. @@ -661,7 +661,7 @@ fun reserveToncoinsOnBalance(nanoTonCoins: int, reserveMode: int): void /// Similar to [reserveToncoinsOnBalance], but also accepts a dictionary extraAmount (represented by a cell or null) /// with extra currencies. In this way currencies other than Toncoin can be reserved. -fun reserveExtraCurrenciesOnBalance(nanoTonCoins: int, extraAmount: cell, reserveMode: int): void +fun reserveExtraCurrenciesOnBalance(nanoTonCoins: int, extraAmount: cell?, reserveMode: int): void asm "RAWRESERVEX"; diff --git a/crypto/smartcont/tolk-stdlib/gas-payments.tolk b/crypto/smartcont/tolk-stdlib/gas-payments.tolk index 2ac32f48..9873ca94 100644 --- a/crypto/smartcont/tolk-stdlib/gas-payments.tolk +++ b/crypto/smartcont/tolk-stdlib/gas-payments.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.8 +tolk 0.9 /** Gas and payment related primitives. diff --git a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk index 0cb17841..e63438b5 100644 --- a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk +++ b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.8 +tolk 0.9 /** Lisp-style lists are nested 2-elements tuples: `(1, (2, (3, null)))` represents list `[1, 2, 3]`. @@ -14,17 +14,18 @@ fun createEmptyList(): tuple /// Adds an element to the beginning of lisp-style list. /// Note, that it does not mutate the list: instead, it returns a new one (it's a lisp pattern). @pure -fun listPrepend(head: X, tail: tuple): tuple +fun listPrepend(head: X, tail: tuple?): tuple asm "CONS"; /// Extracts the head and the tail of lisp-style list. @pure -fun listSplit(list: tuple): (X, tuple) +fun listSplit(list: tuple): (X, tuple?) asm "UNCONS"; /// Extracts the tail and the head of lisp-style list. +/// After extracting the last element, tuple is assigned to null. @pure -fun listNext(mutate self: tuple): X +fun listNext(mutate self: tuple?): X asm( -> 1 0) "UNCONS"; /// Returns the head of lisp-style list. @@ -34,5 +35,5 @@ fun listGetHead(list: tuple): X /// Returns the tail of lisp-style list. @pure -fun listGetTail(list: tuple): tuple +fun listGetTail(list: tuple): tuple? asm "CDR"; diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk index 5c436239..ee205687 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.8 +tolk 0.9 /** Dictionaries are represented as `cell` data type (cells can store anything, dicts in particular). @@ -9,288 +9,289 @@ tolk 0.8 - uDict* - dicts with unsigned integer keys - sDict* - dicts with arbitrary slice keys When accessing a dict element, you should not only provide a key, but provide keyLen, - since for optimization, for optimization, key length is not stored in the dictionary itself. + since for optimization, key length is not stored in the dictionary itself. + Every dictionary object (`self` parameter) can be null. TVM NULL is essentially "empty dictionary". */ /// Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL @pure -fun createEmptyDict(): cell +fun createEmptyDict(): cell? asm "NEWDICT"; /// Checks whether a dictionary is empty. @pure -fun dictIsEmpty(self: cell): bool +fun dictIsEmpty(self: cell?): bool asm "DICTEMPTY"; @pure -fun iDictGet(self: cell, keyLen: int, key: int): (slice, bool) +fun iDictGet(self: cell?, keyLen: int, key: int): (slice?, bool) asm(key self keyLen) "DICTIGET" "NULLSWAPIFNOT"; @pure -fun uDictGet(self: cell, keyLen: int, key: int): (slice, bool) +fun uDictGet(self: cell?, keyLen: int, key: int): (slice?, bool) asm(key self keyLen) "DICTUGET" "NULLSWAPIFNOT"; @pure -fun sDictGet(self: cell, keyLen: int, key: slice): (slice, bool) +fun sDictGet(self: cell?, keyLen: int, key: slice): (slice?, bool) asm(key self keyLen) "DICTGET" "NULLSWAPIFNOT"; @pure -fun iDictSet(mutate self: cell, keyLen: int, key: int, value: slice): void +fun iDictSet(mutate self: cell?, keyLen: int, key: int, value: slice): void asm(value key self keyLen) "DICTISET"; @pure -fun uDictSet(mutate self: cell, keyLen: int, key: int, value: slice): void +fun uDictSet(mutate self: cell?, keyLen: int, key: int, value: slice): void asm(value key self keyLen) "DICTUSET"; @pure -fun sDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): void +fun sDictSet(mutate self: cell?, keyLen: int, key: slice, value: slice): void asm(value key self keyLen) "DICTSET"; @pure -fun iDictSetRef(mutate self: cell, keyLen: int, key: int, value: cell): void +fun iDictSetRef(mutate self: cell?, keyLen: int, key: int, value: cell): void asm(value key self keyLen) "DICTISETREF"; @pure -fun uDictSetRef(mutate self: cell, keyLen: int, key: int, value: cell): void +fun uDictSetRef(mutate self: cell?, keyLen: int, key: int, value: cell): void asm(value key self keyLen) "DICTUSETREF"; @pure -fun sDictSetRef(mutate self: cell, keyLen: int, key: slice, value: cell): void +fun sDictSetRef(mutate self: cell?, keyLen: int, key: slice, value: cell): void asm(value key self keyLen) "DICTSETREF"; @pure -fun iDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): bool +fun iDictSetIfNotExists(mutate self: cell?, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTIADD"; @pure -fun uDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): bool +fun uDictSetIfNotExists(mutate self: cell?, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTUADD"; @pure -fun iDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): bool +fun iDictSetIfExists(mutate self: cell?, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTIREPLACE"; @pure -fun uDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): bool +fun uDictSetIfExists(mutate self: cell?, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTUREPLACE"; @pure -fun iDictGetRef(self: cell, keyLen: int, key: int): (cell, bool) +fun iDictGetRef(self: cell?, keyLen: int, key: int): (cell?, bool) asm(key self keyLen) "DICTIGETREF" "NULLSWAPIFNOT"; @pure -fun uDictGetRef(self: cell, keyLen: int, key: int): (cell, bool) +fun uDictGetRef(self: cell?, keyLen: int, key: int): (cell?, bool) asm(key self keyLen) "DICTUGETREF" "NULLSWAPIFNOT"; @pure -fun sDictGetRef(self: cell, keyLen: int, key: slice): (cell, bool) +fun sDictGetRef(self: cell?, keyLen: int, key: slice): (cell?, bool) asm(key self keyLen) "DICTGETREF" "NULLSWAPIFNOT"; @pure -fun iDictGetRefOrNull(self: cell, keyLen: int, key: int): cell +fun iDictGetRefOrNull(self: cell?, keyLen: int, key: int): cell? asm(key self keyLen) "DICTIGETOPTREF"; @pure -fun uDictGetRefOrNull(self: cell, keyLen: int, key: int): cell +fun uDictGetRefOrNull(self: cell?, keyLen: int, key: int): cell? asm(key self keyLen) "DICTUGETOPTREF"; @pure -fun sDictGetRefOrNull(self: cell, keyLen: int, key: slice): cell +fun sDictGetRefOrNull(self: cell?, keyLen: int, key: slice): cell? asm(key self keyLen) "DICTGETOPTREF"; @pure -fun iDictDelete(mutate self: cell, keyLen: int, key: int): bool +fun iDictDelete(mutate self: cell?, keyLen: int, key: int): bool asm(key self keyLen) "DICTIDEL"; @pure -fun uDictDelete(mutate self: cell, keyLen: int, key: int): bool +fun uDictDelete(mutate self: cell?, keyLen: int, key: int): bool asm(key self keyLen) "DICTUDEL"; @pure -fun sDictDelete(mutate self: cell, keyLen: int, key: slice): bool +fun sDictDelete(mutate self: cell?, keyLen: int, key: slice): bool asm(key self keyLen) "DICTDEL"; @pure -fun iDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, bool) +fun iDictSetAndGet(mutate self: cell?, keyLen: int, key: int, value: slice): (slice?, bool) asm(value key self keyLen) "DICTISETGET" "NULLSWAPIFNOT"; @pure -fun uDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, bool) +fun uDictSetAndGet(mutate self: cell?, keyLen: int, key: int, value: slice): (slice?, bool) asm(value key self keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; @pure -fun sDictSetAndGet(mutate self: cell, keyLen: int, key: slice, value: slice): (slice, bool) +fun sDictSetAndGet(mutate self: cell?, keyLen: int, key: slice, value: slice): (slice?, bool) asm(value key self keyLen) "DICTSETGET" "NULLSWAPIFNOT"; @pure -fun iDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cell): cell +fun iDictSetAndGetRefOrNull(mutate self: cell?, keyLen: int, key: int, value: cell): cell? asm(value key self keyLen) "DICTISETGETOPTREF"; @pure -fun uDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cell): cell +fun uDictSetAndGetRefOrNull(mutate self: cell?, keyLen: int, key: int, value: cell): cell? asm(value key self keyLen) "DICTUSETGETOPTREF"; @pure -fun iDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, bool) +fun iDictDeleteAndGet(mutate self: cell?, keyLen: int, key: int): (slice?, bool) asm(key self keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; @pure -fun uDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, bool) +fun uDictDeleteAndGet(mutate self: cell?, keyLen: int, key: int): (slice?, bool) asm(key self keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; @pure -fun sDictDeleteAndGet(mutate self: cell, keyLen: int, key: slice): (slice, bool) +fun sDictDeleteAndGet(mutate self: cell?, keyLen: int, key: slice): (slice?, bool) asm(key self keyLen) "DICTDELGET" "NULLSWAPIFNOT"; @pure -fun iDictSetBuilder(mutate self: cell, keyLen: int, key: int, value: builder): void +fun iDictSetBuilder(mutate self: cell?, keyLen: int, key: int, value: builder): void asm(value key self keyLen) "DICTISETB"; @pure -fun uDictSetBuilder(mutate self: cell, keyLen: int, key: int, value: builder): void +fun uDictSetBuilder(mutate self: cell?, keyLen: int, key: int, value: builder): void asm(value key self keyLen) "DICTUSETB"; @pure -fun sDictSetBuilder(mutate self: cell, keyLen: int, key: slice, value: builder): void +fun sDictSetBuilder(mutate self: cell?, keyLen: int, key: slice, value: builder): void asm(value key self keyLen) "DICTSETB"; @pure -fun iDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): bool +fun iDictSetBuilderIfNotExists(mutate self: cell?, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTIADDB"; @pure -fun uDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): bool +fun uDictSetBuilderIfNotExists(mutate self: cell?, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTUADDB"; @pure -fun iDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): bool +fun iDictSetBuilderIfExists(mutate self: cell?, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTIREPLACEB"; @pure -fun uDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): bool +fun uDictSetBuilderIfExists(mutate self: cell?, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTUREPLACEB"; @pure -fun iDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, bool) +fun iDictDeleteFirstAndGet(mutate self: cell?, keyLen: int): (int?, slice?, bool) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; @pure -fun uDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, bool) +fun uDictDeleteFirstAndGet(mutate self: cell?, keyLen: int): (int?, slice?, bool) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; @pure -fun sDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (slice, slice, bool) +fun sDictDeleteFirstAndGet(mutate self: cell?, keyLen: int): (slice?, slice?, bool) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; @pure -fun iDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, bool) +fun iDictDeleteLastAndGet(mutate self: cell?, keyLen: int): (int?, slice?, bool) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; @pure -fun uDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, bool) +fun uDictDeleteLastAndGet(mutate self: cell?, keyLen: int): (int?, slice?, bool) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; @pure -fun sDictDeleteLastAndGet(mutate self: cell, keyLen: int): (slice, slice, bool) +fun sDictDeleteLastAndGet(mutate self: cell?, keyLen: int): (slice?, slice?, bool) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirst(self: cell, keyLen: int): (int, slice, bool) +fun iDictGetFirst(self: cell?, keyLen: int): (int?, slice?, bool) asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirst(self: cell, keyLen: int): (int, slice, bool) +fun uDictGetFirst(self: cell?, keyLen: int): (int?, slice?, bool) asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirst(self: cell, keyLen: int): (slice, slice, bool) +fun sDictGetFirst(self: cell?, keyLen: int): (slice?, slice?, bool) asm (-> 1 0 2) "DICTMIN" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, bool) +fun iDictGetFirstAsRef(self: cell?, keyLen: int): (int?, cell?, bool) asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, bool) +fun uDictGetFirstAsRef(self: cell?, keyLen: int): (int?, cell?, bool) asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirstAsRef(self: cell, keyLen: int): (slice, cell, bool) +fun sDictGetFirstAsRef(self: cell?, keyLen: int): (slice?, cell?, bool) asm (-> 1 0 2) "DICTMINREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetLast(self: cell, keyLen: int): (int, slice, bool) +fun iDictGetLast(self: cell?, keyLen: int): (int?, slice?, bool) asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; @pure -fun uDictGetLast(self: cell, keyLen: int): (int, slice, bool) +fun uDictGetLast(self: cell?, keyLen: int): (int?, slice?, bool) asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; @pure -fun sDictGetLast(self: cell, keyLen: int): (slice, slice, bool) +fun sDictGetLast(self: cell?, keyLen: int): (slice?, slice?, bool) asm (-> 1 0 2) "DICTMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetLastAsRef(self: cell, keyLen: int): (int, cell, bool) +fun iDictGetLastAsRef(self: cell?, keyLen: int): (int?, cell?, bool) asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetLastAsRef(self: cell, keyLen: int): (int, cell, bool) +fun uDictGetLastAsRef(self: cell?, keyLen: int): (int?, cell?, bool) asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetLastAsRef(self: cell, keyLen: int): (slice, cell, bool) +fun sDictGetLastAsRef(self: cell?, keyLen: int): (slice?, cell?, bool) asm (-> 1 0 2) "DICTMAXREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun iDictGetNext(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; @pure -fun uDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun uDictGetNext(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; @pure -fun iDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun iDictGetNextOrEqual(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun uDictGetNextOrEqual(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun iDictGetPrev(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun uDictGetPrev(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun iDictGetPrevOrEqual(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) +fun uDictGetPrevOrEqual(self: cell?, keyLen: int, pivot: int): (int?, slice?, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; @@ -299,13 +300,13 @@ fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) */ @pure -fun prefixDictGet(self: cell, keyLen: int, key: slice): (slice, slice, slice, bool) +fun prefixDictGet(self: cell?, keyLen: int, key: slice): (slice, slice?, slice?, bool) asm(key self keyLen) "PFXDICTGETQ" "NULLSWAPIFNOT2"; @pure -fun prefixDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): bool +fun prefixDictSet(mutate self: cell?, keyLen: int, key: slice, value: slice): bool asm(value key self keyLen) "PFXDICTSET"; @pure -fun prefixDictDelete(mutate self: cell, keyLen: int, key: slice): bool +fun prefixDictDelete(mutate self: cell?, keyLen: int, key: slice): bool asm(key self keyLen) "PFXDICTDEL"; diff --git a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk index 72a54aac..136eaa4a 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.8 +tolk 0.9 /// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. /// The primitive returns the current value of `c3`. diff --git a/crypto/tl/tlblib.cpp b/crypto/tl/tlblib.cpp index 05ea8e1c..de5a483c 100644 --- a/crypto/tl/tlblib.cpp +++ b/crypto/tl/tlblib.cpp @@ -196,6 +196,13 @@ bool TLB::print_ref(std::ostream& os, Ref cell_ref, int indent, int re return pp.fail_unless(print_ref(pp, std::move(cell_ref))); } +bool TLB::print_ref(td::StringBuilder& sb, Ref cell_ref, int indent, int rec_limit) const { + std::ostringstream ss; + auto result = print_ref(ss, std::move(cell_ref), indent, rec_limit); + sb << ss.str(); + return result; +} + std::string TLB::as_string_skip(vm::CellSlice& cs, int indent) const { std::ostringstream os; print_skip(os, cs, indent); diff --git a/crypto/tl/tlblib.hpp b/crypto/tl/tlblib.hpp index a6350ece..c10049a9 100644 --- a/crypto/tl/tlblib.hpp +++ b/crypto/tl/tlblib.hpp @@ -246,7 +246,14 @@ class TLB { bool print(std::ostream& os, Ref cs_ref, int indent = 0, int rec_limit = 0) const { return print(os, *cs_ref, indent, rec_limit); } + bool print(td::StringBuilder& sb, Ref cs_ref, int indent = 0, int rec_limit = 0) const { + std::ostringstream ss; + auto result = print(ss, *cs_ref, indent, rec_limit); + sb << ss.str(); + return result; + } bool print_ref(std::ostream& os, Ref cell_ref, int indent = 0, int rec_limit = 0) const; + bool print_ref(td::StringBuilder& sb, Ref cell_ref, int indent = 0, int rec_limit = 0) const; bool print_ref(int rec_limit, std::ostream& os, Ref cell_ref, int indent = 0) const { return print_ref(os, std::move(cell_ref), indent, rec_limit); } diff --git a/crypto/vm/boc.cpp b/crypto/vm/boc.cpp index 7ec8bdd1..72afb998 100644 --- a/crypto/vm/boc.cpp +++ b/crypto/vm/boc.cpp @@ -1153,8 +1153,12 @@ td::Result CellStorageStat::add_used_storage(Refsecond; } } - vm::CellSlice cs{vm::NoVm{}, std::move(cell)}; - return add_used_storage(std::move(cs), kill_dup, skip_count_root); + vm::CellSlice cs{vm::NoVm{}, cell}; + TRY_RESULT(res, add_used_storage(std::move(cs), kill_dup, skip_count_root)); + if (kill_dup) { + seen[cell->get_hash()] = res; + } + return res; } void NewCellStorageStat::add_cell(Ref cell) { diff --git a/crypto/vm/boc.h b/crypto/vm/boc.h index 8adf240f..17e7eb69 100644 --- a/crypto/vm/boc.h +++ b/crypto/vm/boc.h @@ -101,9 +101,9 @@ class NewCellStorageStat { private: const CellUsageTree* usage_tree_; - std::set seen_; + td::HashSet seen_; Stat stat_; - std::set proof_seen_; + td::HashSet proof_seen_; Stat proof_stat_; const NewCellStorageStat* parent_{nullptr}; @@ -117,7 +117,7 @@ struct CellStorageStat { struct CellInfo { td::uint32 max_merkle_depth = 0; }; - std::map seen; + td::HashMap seen; CellStorageStat() : cells(0), bits(0), public_cells(0) { } explicit CellStorageStat(unsigned long long limit_cells) @@ -173,7 +173,7 @@ class ProofStorageStat { enum CellStatus { c_none = 0, c_prunned = 1, c_loaded = 2 }; - std::map cells_; + td::HashMap cells_; td::uint64 proof_size_ = 0; }; diff --git a/crypto/vm/cells/CellSlice.cpp b/crypto/vm/cells/CellSlice.cpp index 4d8c3c5a..9cd3e931 100644 --- a/crypto/vm/cells/CellSlice.cpp +++ b/crypto/vm/cells/CellSlice.cpp @@ -1026,6 +1026,13 @@ bool CellSlice::print_rec(std::ostream& os, int indent) const { return print_rec(os, &limit, indent); } +bool CellSlice::print_rec(td::StringBuilder& sb, int indent) const { + std::ostringstream ss; + auto result = print_rec(ss, indent); + sb << ss.str(); + return result; +} + bool CellSlice::print_rec(int limit, std::ostream& os, int indent) const { return print_rec(os, &limit, indent); } diff --git a/crypto/vm/cells/CellSlice.h b/crypto/vm/cells/CellSlice.h index 33fad741..ecce30f5 100644 --- a/crypto/vm/cells/CellSlice.h +++ b/crypto/vm/cells/CellSlice.h @@ -257,6 +257,7 @@ class CellSlice : public td::CntObject { void dump(std::ostream& os, int level = 0, bool endl = true) const; void dump_hex(std::ostream& os, int mode = 0, bool endl = false) const; bool print_rec(std::ostream& os, int indent = 0) const; + bool print_rec(td::StringBuilder& sb, int indent = 0) const; bool print_rec(std::ostream& os, int* limit, int indent = 0) const; bool print_rec(int limit, std::ostream& os, int indent = 0) const; void error() const { diff --git a/crypto/vm/tonops.cpp b/crypto/vm/tonops.cpp index 5d90b8fd..aab1711f 100644 --- a/crypto/vm/tonops.cpp +++ b/crypto/vm/tonops.cpp @@ -1761,6 +1761,10 @@ int exec_send_message(VmState* st) { vm::VmStorageStat stat(max_cells); CellSlice cs = load_cell_slice(msg_cell); cs.skip_first(cs.size()); + if (st->get_global_version() >= 10 && have_extra_currencies) { + // Skip extra currency dict + cs.advance_refs(1); + } stat.add_storage(cs); if (!ext_msg) { @@ -1773,7 +1777,9 @@ int exec_send_message(VmState* st) { if (value.is_null()) { throw VmError{Excno::type_chk, "invalid param BALANCE"}; } - have_extra_currencies |= !tuple_index(balance, 1).as_cell().is_null(); + if (st->get_global_version() < 10) { + have_extra_currencies |= !tuple_index(balance, 1).as_cell().is_null(); + } } else if (mode & 64) { // value += value of incoming message Ref balance = get_param(st, 11).as_tuple(); if (balance.is_null()) { @@ -1784,7 +1790,9 @@ int exec_send_message(VmState* st) { throw VmError{Excno::type_chk, "invalid param INCOMINGVALUE"}; } value += balance_grams; - have_extra_currencies |= !tuple_index(balance, 1).as_cell().is_null(); + if (st->get_global_version() < 10) { + have_extra_currencies |= !tuple_index(balance, 1).as_cell().is_null(); + } } } diff --git a/doc/GlobalVersions.md b/doc/GlobalVersions.md index f4156ca0..77963e95 100644 --- a/doc/GlobalVersions.md +++ b/doc/GlobalVersions.md @@ -134,4 +134,25 @@ Example: if the last masterchain block seqno is `19071` then the list contains b - `PFXDICTADD`, `PFXDICTSET`, `PFXDICTREPLACE`, `PFXDICTDEL`, `GETGASFEE`, `GETSTORAGEFEE`, `GETFORWARDFEE`, `GETORIGINALFWDFEE`, `GETGASFEESIMPLE`, `GETFORWARDFEESIMPLE`, `HASHEXT` - Now setting the contract code to a library cell does not consume additional gas on execution of the code. - Temporary increase gas limit for some accounts (see [this post](https://t.me/tondev_news/129) for details, `override_gas_limit` in `transaction.cpp` for the list of accounts). -- Fix recursive jump to continuations with non-null control data. \ No newline at end of file +- Fix recursive jump to continuations with non-null control data. + +## Version 10 + +### Extra currencies +- Internal messages cannot carry more than 2 different extra currencies. The limit can be changed in size limits config (`ConfigParam 43`). +- Amount of an extra currency in an output action "send message" can be zero. + - In action phase zero values are automatically deleted from the dictionary before sending. + - However, the size of the extra currency dictionary in the "send message" action should not be greater than 2 (or the value in size limits config). +- Extra currency dictionary is not counted in message size and does not affect message fees. +- Message mode `+64` (carry all remaining message balance) is now considered as "carry all remaining TONs from message balance". +- Message mode `+128` (carry all remaining account balance) is now considered as "carry all remaining TONs from account balance". +- Message mode `+32` (delete account if balance is zero) deletes account if it has zero TONs, regardless of extra currencies. + - Deleted accounts with extra currencies become `account_uninit`, extra currencies remain on the account. +- `SENDMSG` in TVM calculates message size and fees without extra currencies, uses new `+64` and `+128` mode behavior. + - `SENDMSG` does not check the number of extra currencies. +- Extra currency dictionary is not counted in the account size and does not affect storage fees. + - Accounts with already existing extra currencies will get their sizes recomputed without EC only after modifying `AccountState`. + +### TVM changes +- `SENDMSG` calculates messages size and fees without extra currencies, uses new +64 and +128 mode behavior. + - `SENDMSG` does not check the number of extra currencies. diff --git a/emulator/CMakeLists.txt b/emulator/CMakeLists.txt index a0799541..663c8fd2 100644 --- a/emulator/CMakeLists.txt +++ b/emulator/CMakeLists.txt @@ -1,8 +1,6 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) -if (NOT OPENSSL_FOUND) - find_package(OpenSSL REQUIRED) -endif() +option(EMULATOR_STATIC "Build emulator as static library" OFF) set(EMULATOR_STATIC_SOURCE transaction-emulator.cpp @@ -22,7 +20,7 @@ include(GenerateExportHeader) add_library(emulator_static STATIC ${EMULATOR_STATIC_SOURCE}) target_link_libraries(emulator_static PUBLIC ton_crypto smc-envelope) -if (USE_EMSCRIPTEN) +if (EMULATOR_STATIC OR USE_EMSCRIPTEN) add_library(emulator STATIC ${EMULATOR_SOURCE}) else() add_library(emulator SHARED ${EMULATOR_SOURCE}) @@ -35,7 +33,7 @@ else() endif() generate_export_header(emulator EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/emulator_export.h) -if (USE_EMSCRIPTEN) +if (EMULATOR_STATIC OR USE_EMSCRIPTEN) target_compile_definitions(emulator PUBLIC EMULATOR_STATIC_DEFINE) endif() target_include_directories(emulator PUBLIC diff --git a/emulator/transaction-emulator.cpp b/emulator/transaction-emulator.cpp index e87b2dfb..6267f9bd 100644 --- a/emulator/transaction-emulator.cpp +++ b/emulator/transaction-emulator.cpp @@ -16,6 +16,7 @@ td::Result> TransactionEmu block::StoragePhaseConfig storage_phase_cfg{&storage_prices}; block::ComputePhaseConfig compute_phase_cfg; block::ActionPhaseConfig action_phase_cfg; + block::SerializeConfig serialize_config; td::RefInt256 masterchain_create_fee, basechain_create_fee; if (!utime) { @@ -25,11 +26,9 @@ td::Result> TransactionEmu utime = (unsigned)std::time(nullptr); } - auto fetch_res = block::FetchConfigParams::fetch_config_params(*config_, prev_blocks_info_, &old_mparams, - &storage_prices, &storage_phase_cfg, - &rand_seed_, &compute_phase_cfg, - &action_phase_cfg, &masterchain_create_fee, - &basechain_create_fee, account.workchain, utime); + auto fetch_res = block::FetchConfigParams::fetch_config_params( + *config_, prev_blocks_info_, &old_mparams, &storage_prices, &storage_phase_cfg, &rand_seed_, &compute_phase_cfg, + &action_phase_cfg, &serialize_config, &masterchain_create_fee, &basechain_create_fee, account.workchain, utime); if(fetch_res.is_error()) { return fetch_res.move_as_error_prefix("cannot fetch config params "); } @@ -66,7 +65,7 @@ td::Result> TransactionEmu return std::make_unique(std::move(vm_log), vm_exit_code, elapsed); } - if (!trans->serialize()) { + if (!trans->serialize(serialize_config)) { return td::Status::Error(-669,"cannot serialize new transaction for smart contract "s + trans->account.addr.to_hex()); } diff --git a/overlay/overlay-fec.cpp b/overlay/overlay-fec.cpp index b29fce22..817d3b7c 100644 --- a/overlay/overlay-fec.cpp +++ b/overlay/overlay-fec.cpp @@ -32,7 +32,7 @@ void OverlayOutboundFecBroadcast::alarm() { fec_type_.size(), flags_, std::move(X.data), X.id, fec_type_, date_); } - alarm_timestamp() = td::Timestamp::in(0.010); + alarm_timestamp() = td::Timestamp::in(delay_); if (seqno_ >= to_send_) { stop(); @@ -46,8 +46,9 @@ void OverlayOutboundFecBroadcast::start_up() { OverlayOutboundFecBroadcast::OverlayOutboundFecBroadcast(td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, - PublicKeyHash local_id) + PublicKeyHash local_id, double speed_multiplier) : flags_(flags) { + delay_ /= speed_multiplier; CHECK(data.size() <= (1 << 27)); local_id_ = local_id; overlay_ = std::move(overlay); @@ -63,9 +64,10 @@ OverlayOutboundFecBroadcast::OverlayOutboundFecBroadcast(td::BufferSlice data, t } td::actor::ActorId OverlayOutboundFecBroadcast::create( - td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, PublicKeyHash local_id) { - return td::actor::create_actor(td::actor::ActorOptions().with_name("bcast"), - std::move(data), flags, overlay, local_id) + td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, PublicKeyHash local_id, + double speed_multiplier) { + return td::actor::create_actor( + td::actor::ActorOptions().with_name("bcast"), std::move(data), flags, overlay, local_id, speed_multiplier) .release(); } diff --git a/overlay/overlay-fec.hpp b/overlay/overlay-fec.hpp index a9cc3634..b72e830e 100644 --- a/overlay/overlay-fec.hpp +++ b/overlay/overlay-fec.hpp @@ -37,6 +37,7 @@ class OverlayOutboundFecBroadcast : public td::actor::Actor { PublicKeyHash local_id_; Overlay::BroadcastDataHash data_hash_; td::uint32 flags_ = 0; + double delay_ = 0.010; td::int32 date_; std::unique_ptr encoder_; td::actor::ActorId overlay_; @@ -45,9 +46,9 @@ class OverlayOutboundFecBroadcast : public td::actor::Actor { public: static td::actor::ActorId create(td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, - PublicKeyHash local_id); + PublicKeyHash local_id, double speed_multiplier = 1.0); OverlayOutboundFecBroadcast(td::BufferSlice data, td::uint32 flags, td::actor::ActorId overlay, - PublicKeyHash local_id); + PublicKeyHash local_id, double speed_multiplier = 1.0); void alarm() override; void start_up() override; diff --git a/overlay/overlay.cpp b/overlay/overlay.cpp index 429c6a9c..30a40b1c 100644 --- a/overlay/overlay.cpp +++ b/overlay/overlay.cpp @@ -63,7 +63,7 @@ td::actor::ActorOwn Overlay::create_private( return td::actor::create_actor( overlay_actor_name(overlay_id), keyring, adnl, manager, dht_node, local_id, std::move(overlay_id), OverlayType::FixedMemberList, std::move(nodes), std::vector(), OverlayMemberCertificate{}, - std::move(callback), std::move(rules), std::move(scope)); + std::move(callback), std::move(rules), std::move(scope), std::move(opts)); } td::actor::ActorOwn Overlay::create_semiprivate( @@ -99,6 +99,7 @@ OverlayImpl::OverlayImpl(td::actor::ActorId keyring, td::actor overlay_id_ = id_full_.compute_short_id(); frequent_dht_lookup_ = opts_.frequent_dht_lookup_; peer_list_.local_member_flags_ = opts_.local_overlay_member_flags_; + opts_.broadcast_speed_multiplier_ = std::max(opts_.broadcast_speed_multiplier_, 1e-9); VLOG(OVERLAY_INFO) << this << ": creating"; @@ -490,7 +491,8 @@ void OverlayImpl::send_broadcast_fec(PublicKeyHash send_as, td::uint32 flags, td VLOG(OVERLAY_WARNING) << "broadcast source certificate is invalid"; return; } - OverlayOutboundFecBroadcast::create(std::move(data), flags, actor_id(this), send_as); + OverlayOutboundFecBroadcast::create(std::move(data), flags, actor_id(this), send_as, + opts_.broadcast_speed_multiplier_); } void OverlayImpl::print(td::StringBuilder &sb) { diff --git a/overlay/overlays.h b/overlay/overlays.h index c0385fc7..5eb63b13 100644 --- a/overlay/overlays.h +++ b/overlay/overlays.h @@ -269,6 +269,7 @@ struct OverlayOptions { td::uint32 nodes_to_send_ = 4; td::uint32 propagate_broadcast_to_ = 5; td::uint32 default_permanent_members_flags_ = 0; + double broadcast_speed_multiplier_ = 1.0; }; class Overlays : public td::actor::Actor { diff --git a/recent_changelog.md b/recent_changelog.md index dfa39aa6..820d2aa4 100644 --- a/recent_changelog.md +++ b/recent_changelog.md @@ -1,12 +1,13 @@ -## 2025.02 Update -1. Series of improvement/fixes for `Config8.version >= 9`, check [GlobalVersions.md](./doc/GlobalVersions.md) -2. Fix for better discovery of updated nodes' (validators') IPs: retry dht queries -3. Series of improvements for extra currency adoption: fixed c7 in rungetmethod, reserve modes -4. TVM: Fix processing continuation control data on deep jump -5. A few fixes of tl-b schemes: crc computation, incorrect tag for merkle proofs, advance_ext, NatWidth print -6. Emulator improvements: fix setting libraries, extracurrency support -7. Increase of gas limit for unlocking highload-v2 wallets locked in the beginning of 2024 -8. Validator console improvement: dashed names, better shard formats +## 2025.03 Update +1. New extracurrency behavior introduced, check [GlobalVersions.md](./doc/GlobalVersions.md#version-10) +2. Optmization of validation process, in particular CellStorageStat. +3. Flag for speeding up broadcasts in various overlays. +4. Fixes for static builds for emulator and tonlibjson +5. Improving getstats output: add + * Liteserver queries count + * Collated/validated blocks count, number of active sessions + * Persistent state sizes + * Initial sync progress +6. Fixes in logging, TON Storage, external message checking, persistent state downloading, UB in tonlib - -Besides the work of the core team, this update is based on the efforts of @dbaranovstonfi from StonFi(libraries in emulator), @Rexagon (ret on deep jumps), @tvorogme from DTon (`advance_ext`), Nan from Zellic (`stk_und` and JNI) +Besides the work of the core team, this update is based on the efforts of @Sild from StonFi(UB in tonlib). diff --git a/storage/PeerActor.cpp b/storage/PeerActor.cpp index 48d45626..e140b4ce 100644 --- a/storage/PeerActor.cpp +++ b/storage/PeerActor.cpp @@ -251,7 +251,7 @@ void PeerActor::loop_update_init() { } s = s.substr(peer_init_offset_, UPDATE_INIT_BLOCK_SIZE); auto query = create_update_query(ton::create_tl_object( - td::BufferSlice(s), (int)peer_init_offset_, to_ton_api(node_state))); + td::BufferSlice(s), (int)peer_init_offset_ * 8, to_ton_api(node_state))); // take care about update_state_query initial state update_state_query_.state = node_state; @@ -502,11 +502,11 @@ void PeerActor::process_update_peer_parts(const tl_object_ptr(offset + i)); } diff --git a/tdutils/td/utils/Time.h b/tdutils/td/utils/Time.h index ece822d4..c7795ae4 100644 --- a/tdutils/td/utils/Time.h +++ b/tdutils/td/utils/Time.h @@ -128,6 +128,10 @@ inline Timestamp &operator+=(Timestamp &a, double b) { return a; } +inline double operator-(const Timestamp &a, const Timestamp &b) { + return a.at() - b.at(); +} + template void store(const Timestamp ×tamp, StorerT &storer) { storer.store_binary(timestamp.at() - Time::now() + Clocks::system()); diff --git a/tdutils/td/utils/logging.h b/tdutils/td/utils/logging.h index dbf4c64b..bb28f6df 100644 --- a/tdutils/td/utils/logging.h +++ b/tdutils/td/utils/logging.h @@ -264,8 +264,8 @@ class Logger { sb_ << other; return *this; } - LambdaPrintHelper operator<<(const LambdaPrint &) { - return LambdaPrintHelper{*this}; + LambdaPrintHelper operator<<(const LambdaPrint &) { + return LambdaPrintHelper{sb_}; } MutableCSlice as_cslice() { diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index 755a3bfb..9d24f38d 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -2,7 +2,7 @@ import "@stdlib/tvm-lowlevel" fun pair_first(p: [X, Y]): X asm "FIRST"; -fun one(dummy: tuple) { +fun one(dummy: tuple?) { return 1; } @@ -144,15 +144,16 @@ fun test95() { """ test95 PROC:<{ ... - next GETGLOB // '10 - 3 PUSHINT // '10 '12=3 - 4 PUSHINT // '10 '12=3 '13=4 - 5 PUSHINT // '10 '12=3 '13=4 '14=5 - TRIPLE // '15 '16 - next SETGLOB + next GETGLOB // g_next + 3 PUSHINT // g_next '14=3 + 4 PUSHINT // g_next '14=3 '15=4 + 5 PUSHINT // g_next '14=3 '15=4 '16=5 + TRIPLE // '10 '11 + SWAP cur SETGLOB - cur GETGLOB // '17 - next GETGLOB // '17 '18 + next SETGLOB + cur GETGLOB // g_cur + next GETGLOB // g_cur g_next }> """ */ diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index df758a1e..e20e8218 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -147,5 +147,5 @@ fun main() { // x.0 x.1 """ -@code_hash 7627024945492125068389905298530400936797031708759561372406088054030801992712 +@code_hash 61280273714870328160131559159866470128402169974050439159015534193532598351244 */ diff --git a/tolk-tester/tests/assignment-tests.tolk b/tolk-tester/tests/assignment-tests.tolk index 40761939..bb647652 100644 --- a/tolk-tester/tests/assignment-tests.tolk +++ b/tolk-tester/tests/assignment-tests.tolk @@ -26,10 +26,189 @@ fun typesAsIdentifiers(builder: builder) { return int; } +global callOrder: tuple; + +fun getTensor_12() { + callOrder.tuplePush(100); + return (1, 2); +} +fun getTensor_1X(x: int) { + callOrder.tuplePush(101); + return (1, x); +} +fun getTuple_12() { + callOrder.tuplePush(110); + return [1, 2]; +} +fun getTuple_1X(x: int) { + callOrder.tuplePush(111); + return [1, x]; +} +fun getUntypedTuple_12() { + callOrder.tuplePush(120); + var t = createEmptyTuple(); t.tuplePush(1); t.tuplePush(2); + return t; +} +fun getUntypedTuple_1X(x: int) { + callOrder.tuplePush(121); + var t = createEmptyTuple(); t.tuplePush(1); t.tuplePush(x); + return t; +} +fun getIntValue5() { + callOrder.tuplePush(10); + return 5; +} +fun getIntValueX(x: int) { + callOrder.tuplePush(11); + return x; +} + +@method_id(102) +fun test102() { + callOrder = createEmptyTuple(); + var x = 0; + getTensor_12().0 = getIntValue5(); + getTensor_1X(5).1 = getIntValue5(); + getTensor_1X(x = 10).0 = getIntValueX(x); + return (callOrder, x); +} + +@method_id(103) +fun test103() { + callOrder = createEmptyTuple(); + var x = 0; + getTuple_12().0 = getIntValue5(); + getTuple_1X(5).1 = getIntValue5(); + getTuple_1X(x = 10).0 = getIntValueX(x); + return (callOrder, x); +} + +@method_id(104) +fun test104() { + callOrder = createEmptyTuple(); + var x = 0; + getUntypedTuple_12().0 = getIntValue5(); + getUntypedTuple_1X(5).1 = getIntValue5(); + getUntypedTuple_1X(x = 10).0 = getIntValueX(x); + return (callOrder, x); +} + +@method_id(105) +fun test105() { + callOrder = createEmptyTuple(); + getTensor_12().0 = getTensor_1X(getIntValue5()).1 = getIntValueX(getTensor_12().1); + return callOrder; +} + +@method_id(106) +fun test106() { + callOrder = createEmptyTuple(); + getTuple_12().0 = getTuple_1X(getIntValue5()).1 = getIntValueX(getTuple_12().1); + return callOrder; +} + +global t107: (int, int); + +@method_id(107) +fun test107() { + ((t107 = (1, 2)).0, (t107 = (3, 4)).1) = (5, 6); + return t107; +} + +global g108: int; +fun assertEq(a: int, b: int) { + assert(a == b, 10); + return b; +} + +@method_id(108) +fun test108() { + callOrder = createEmptyTuple(); + g108 = 0; + getTensor_1X(g108 = 8).1 = assertEq(g108, 8); + return (callOrder, g108); +} + +@method_id(109) +fun test109() { + callOrder = createEmptyTuple(); + var x = 0; + [getTuple_12().0, getTuple_1X(x = getIntValue5()).1, getTuple_1X(x += 10).0] = [getIntValue5(), getIntValue5(), getIntValueX(x)]; + return (callOrder, x); +} + +global g110: int; +global t110: (int, int); + +@method_id(110) +fun test110() { + callOrder = createEmptyTuple(); + var xy = [0, 0]; + [xy.0, getTuple_1X(g110 = 8).0] = [g110 += 5, getIntValueX(g110 += 10)]; + [xy.1, getTuple_1X((t110 = (8, 9)).0).1] = [t110.0 += 5, getIntValueX(t110.1 += 10)]; + return (xy, callOrder, g110, t110); +} + +@method_id(111) +fun test111() { + callOrder = createEmptyTuple(); + var z = -1; + var xy = [0, z = 0]; + var rhs = [getIntValueX(xy.1 += 10), xy.1, xy.0, z += 50]; + [xy.0, getTuple_1X(g110 = 8 + getIntValueX(xy.1)).0, xy.1, z] = rhs; + return (xy, g110, callOrder, z); +} + +@method_id(112) +fun test112() { + var xy = [1, 2]; + ((((xy))).0, ((xy.1))) = ((xy).1, ((xy.0))); + return xy; +} + +@method_id(113) +fun test113() { + var (a, t, z) = (1, [2,3], (-1,-1)); + (a, t, a, z, t.1, z.1) = (10, [a,12], 13, (a, t.1), 14, t.1); + return (a, t, z); +} + +global g114: int; +global t114: [int, int]; +global z114: (int, int); + +@method_id(114) +fun test114() { + g114 = 1; + t114 = [2, 3]; + (g114, t114, g114, z114, t114.1, z114.1) = (10, [g114,12], 13, (g114, t114.1), 14, t114.1); + return (g114, t114, z114); +} + +@method_id(115) +fun test115() { + callOrder = createEmptyTuple(); + var x = 0; + var y = 0; + [getTensor_1X(x = 5).0, y] = getTuple_1X(x = 9); + return (callOrder, x, y); +} + +@method_id(116) +fun test116() { + var (a,b,c,d) = (0,0,0,0); + var rhs = [1, 2, 3, 4]; + var rhs2 = ([a,b,c,d] = rhs); + __expect_type(rhs2, "[int, int, int, int]"); + return (a, b, c, d, rhs2); +} + + + fun main(value: int) { - var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); + var (x: int?, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); if (x == null && y == null) { return null; } - return x == null || y == null ? -1 : x + y; + return x == null || y == null ? -1 : x! + y!; } /** @@ -37,4 +216,35 @@ fun main(value: int) { @testcase | 0 | 6 | -1 @testcase | 0 | 11 | (null) @testcase | 101 | 78 | 88 +@testcase | 102 | | [ 100 10 101 10 101 11 ] 10 +@testcase | 103 | | [ 110 10 111 10 111 11 ] 10 +@testcase | 104 | | [ 120 10 121 10 121 11 ] 10 +@testcase | 105 | | [ 100 10 101 100 11 ] +@testcase | 106 | | [ 110 10 111 110 11 ] +@testcase | 107 | | 3 4 +@testcase | 108 | | [ 101 ] 8 +@testcase | 109 | | [ 110 10 111 111 10 10 11 ] 15 +@testcase | 110 | | [ 13 13 ] [ 111 11 111 11 ] 23 13 19 +@testcase | 111 | | [ 10 0 ] 18 [ 11 11 111 ] 50 +@testcase | 112 | | [ 2 1 ] +@testcase | 113 | | 13 [ 1 14 ] 1 3 +@testcase | 114 | | 13 [ 1 14 ] 1 3 +@testcase | 115 | | [ 101 111 ] 9 9 +@testcase | 116 | | 1 2 3 4 [ 1 2 3 4 ] + + +@fif_codegen +""" + test116 PROC:<{ + // + 1 PUSHINT // '10=1 + 2 PUSHINT // '10=1 '11=2 + 3 PUSHINT // '10=1 '11=2 '12=3 + 4 PUSHINT // '10=1 '11=2 '12=3 '13=4 + 4 TUPLE // rhs + DUP // rhs rhs + 4 UNTUPLE // rhs2 a b c d + 4 ROLL // a b c d rhs2 + }> +""" */ diff --git a/tolk-tester/tests/c2.tolk b/tolk-tester/tests/c2.tolk index 257aba5b..bcbc6c93 100644 --- a/tolk-tester/tests/c2.tolk +++ b/tolk-tester/tests/c2.tolk @@ -8,7 +8,7 @@ fun unnamed_args(_: int, _: slice, _: int) { return true; } -fun main(x: int, y: int, z: int): bool { +fun main(x: int, y: int, z: int): bool? { op = `_+_`; if (0) { return null; } return check_assoc(x, y, z); diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index 19e2e215..772812eb 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -32,7 +32,8 @@ fun test1(): [int,int,int,int,int] { fun test2(): [int,int,int] { var b: builder = beginCell().myStoreInt(1, 32); b = b.myStoreInt(2, 32); - b.myStoreInt(3, 32); + // operator ! here and below is used just for testing purposes, it doesn't affect the result + b!.myStoreInt(3, 32); var cs: slice = b.endCell().beginParse(); var one: int = cs.myLoadInt(32); @@ -43,14 +44,14 @@ fun test2(): [int,int,int] { @method_id(103) fun test3(ret: int): int { - val same: int = beginCell().storeUint(ret,32).endCell().beginParse().loadUint(32); + val same: int = beginCell()!.storeUint(ret,32).endCell().beginParse().loadUint(32); return same; } @method_id(104) fun test4(): [int,int] { - var b: builder = beginCell().myStoreInt(1, 32); - b = b.storeInt(2, 32).storeInt(3, 32); + var b: builder = (beginCell() as builder).myStoreInt(1, 32); + b = b!.storeInt(2, 32)!.storeInt(3, 32); var cs: slice = b.endCell().beginParse(); var (one, _, three) = (cs.getFirstBits(32).loadUint(32), cs.skipBits(64), cs.load_u32()); @@ -116,7 +117,7 @@ fun test10() { fun test11() { var s: slice = beginCell().storeInt(1, 32).storeInt(2, 32).storeInt(3, 32).storeInt(4, 32).storeInt(5, 32).storeInt(6, 32).storeInt(7, 32).endCell().beginParse(); var size1 = getRemainingBitsCount(s); - s.skipBits(32); + s!.skipBits(32); var s1: slice = s.getFirstBits(64); var n1 = s1.loadInt(32); var size2 = getRemainingBitsCount(s); diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk index b355a9b7..5b46c093 100644 --- a/tolk-tester/tests/codegen_check_demo.tolk +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -35,7 +35,7 @@ Below, I just give examples of @fif_codegen tag: """ main PROC:<{ // s - 17 PUSHINT // s '1=17 + 17 PUSHINT // s '3=17 OVER // s z=17 t WHILE:<{ ... diff --git a/tolk-tester/tests/dicts-demo.tolk b/tolk-tester/tests/dicts-demo.tolk index 291bd2ea..606318cb 100644 --- a/tolk-tester/tests/dicts-demo.tolk +++ b/tolk-tester/tests/dicts-demo.tolk @@ -1,15 +1,15 @@ import "@stdlib/tvm-dicts" -fun addIntToIDict(mutate self: cell, key: int, number: int): void { +fun addIntToIDict(mutate self: cell?, key: int, number: int): void { return self.iDictSetBuilder(32, key, beginCell().storeInt(number, 32)); } -fun calculateDictLen(d: cell) { +fun calculateDictLen(d: cell?) { var len = 0; var (k, v, f) = d.uDictGetFirst(32); while (f) { len += 1; - (k, v, f) = d.uDictGetNext(32, k); + (k, v, f) = d.uDictGetNext(32, k!); } return len; } @@ -25,13 +25,13 @@ fun loadTwoDigitNumberFromSlice(mutate self: slice): int { fun test101(getK1: int, getK2: int, getK3: int) { var dict = createEmptyDict(); dict.uDictSetBuilder(32, 1, beginCell().storeUint(1, 32)); - var (old1: slice, found1) = dict.uDictSetAndGet(32, getK1, beginCell().storeUint(2, 32).endCell().beginParse()); - var (old2: slice, found2) = dict.uDictSetAndGet(32, getK2, beginCell().storeUint(3, 32).endCell().beginParse()); - var (cur3: slice, found3) = dict.uDictGet(32, getK3); + var (old1: slice?, found1) = dict.uDictSetAndGet(32, getK1, beginCell().storeUint(2, 32).endCell().beginParse()); + var (old2: slice?, found2) = dict.uDictSetAndGet(32, getK2, beginCell().storeUint(3, 32).endCell().beginParse()); + var (cur3: slice?, found3) = dict.uDictGet(32, getK3); return ( - found1 ? old1.loadUint(32) : -1, - found2 ? old2.loadUint(32) : -1, - found3 ? cur3.loadUint(32) : -1 + found1 ? old1!.loadUint(32) : -1, + found2 ? old2!.loadUint(32) : -1, + found3 ? cur3!.loadUint(32) : -1 ); } @@ -47,7 +47,7 @@ fun test102() { while (!shouldBreak) { var (kDel, kVal, wasDel) = dict.iDictDeleteLastAndGet(32); if (wasDel) { - deleted.tuplePush([kDel, kVal.loadInt(32)]); + deleted.tuplePush([kDel, kVal!.loadInt(32)]); } else { shouldBreak = true; } @@ -82,14 +82,14 @@ fun test104() { var (old2, _) = dict.sDictDeleteAndGet(32, "key1"); var (restK, restV, _) = dict.sDictGetFirst(32); var (restK1, restV1, _) = dict.sDictDeleteLastAndGet(32); - assert (restK.isSliceBitsEqual(restK1)) throw 123; - assert (restV.isSliceBitsEqual(restV1)) throw 123; + assert (restK!.isSliceBitsEqual(restK1!)) throw 123; + assert (restV!.isSliceBitsEqual(restV1!)) throw 123; return ( - old1.loadTwoDigitNumberFromSlice(), - old2.loadTwoDigitNumberFromSlice(), - restV.loadTwoDigitNumberFromSlice(), - restK.loadTwoDigitNumberFromSlice(), - restK.loadTwoDigitNumberFromSlice() + old1!.loadTwoDigitNumberFromSlice(), + old2!.loadTwoDigitNumberFromSlice(), + restV!.loadTwoDigitNumberFromSlice(), + restK!.loadTwoDigitNumberFromSlice(), + restK!.loadTwoDigitNumberFromSlice() ); } diff --git a/tolk-tester/tests/generics-1.tolk b/tolk-tester/tests/generics-1.tolk index 453ec282..ca310927 100644 --- a/tolk-tester/tests/generics-1.tolk +++ b/tolk-tester/tests/generics-1.tolk @@ -49,17 +49,17 @@ fun manyEq(a: T1, b: T2, c: T3): [T1, T2, T3] { fun test104(f: int) { var result = ( manyEq(1 ? 1 : 1, f ? 0 : null, !f ? getTwo() as int : null), - manyEq(f ? null as int : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool(), eq4(f)) + manyEq(f ? null as int? : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool(), eq4(f)) ); - __expect_type(result, "([int, int, int], [int, bool, int])"); + __expect_type(result, "([int, int?, int?], [int?, bool, int])"); return result; } -fun calcSum(x: X, y: X) { return x + y; } +fun calcSum(x: X, y: X) { return x! + y!; } @method_id(105) fun test105() { - if (0) { calcSum(((0)), null); } + if (0) { calcSum(((0 as int?)), null); } return (calcSum(1, 2)); } diff --git a/tolk-tester/tests/imports/use-dicts.tolk b/tolk-tester/tests/imports/use-dicts.tolk index c9d5dcfe..2daaf2b1 100644 --- a/tolk-tester/tests/imports/use-dicts.tolk +++ b/tolk-tester/tests/imports/use-dicts.tolk @@ -1,21 +1,21 @@ import "@stdlib/tvm-dicts" -fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { - var dict: cell = createEmptyDict(); +fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell? { + var dict: cell? = createEmptyDict(); dict.iDictSetBuilder(32, 3, beginCell().storeInt(30, 32)); dict.iDictSetBuilder(32, 4, beginCell().storeInt(40, 32)); dict.iDictSetBuilder(32, 5, beginCell().storeInt(valueAt5, 32)); return dict; } -fun lookupIdxByValue(idict32: cell, value: int): int { - var cur_key = -1; +fun lookupIdxByValue(idict32: cell?, value: int): int { + var cur_key: int? = -1; do { - var (cur_key redef, cs: slice, found: bool) = idict32.iDictGetNext(32, cur_key); + var (cur_key redef, cs: slice?, found: bool) = idict32.iDictGetNext(32, cur_key!); // one-line condition (via &) doesn't work, since right side is calculated immediately if (found) { - if (cs.loadInt(32) == value) { - return cur_key; + if (cs!.loadInt(32) == value) { + return cur_key!; } } } while (found); diff --git a/tolk-tester/tests/indexed-access.tolk b/tolk-tester/tests/indexed-access.tolk index 38094fa5..e2bd3dd9 100644 --- a/tolk-tester/tests/indexed-access.tolk +++ b/tolk-tester/tests/indexed-access.tolk @@ -21,6 +21,26 @@ fun plus(mutate self: int, y: int): int { fun eq(v: X): X { return v; } +global gTup: [int]; +global gTens: (int, int); + +@method_id(100) +fun testCodegenSimple() { + var t1 = [1]; + t1.0 = 2; + debugPrintString(""); + var t2 = [[1]]; + t2.0.0 = 2; + debugPrintString(""); + gTup = [1]; + gTup.0 = 2; + debugPrintString(""); + gTens = (1,2); + gTens.1 = 4; + debugPrintString(""); + return (t1, t2, gTup, gTens); +} + @method_id(101) fun test101() { var t = (1, (2, 3), [4, 5, [6, 7]], 8); @@ -66,8 +86,8 @@ fun test104() { } @method_id(105) -fun test105(x: int, y: int): (tuple, int, (int, int), int, int) { - var ab = (createEmptyTuple(), (x, y), tupleSize); +fun test105(x: int, y: int): (tuple, int, (int?, int), int, int) { + var ab = (createEmptyTuple(), (x as int?, y), tupleSize); ab.0.tuplePush(1); tuplePush(mutate ab.0, 2); ab.1.0 = null; @@ -78,7 +98,7 @@ fun test105(x: int, y: int): (tuple, int, (int, int), int, int) { @method_id(106) fun test106(x: int, y: int) { - var ab = [createEmptyTuple(), [x, y], tupleSize]; + var ab = [createEmptyTuple(), [x as int?, y], tupleSize]; ab.0.tuplePush(1); tuplePush(mutate ab.0, 2); ab.1.0 = null; @@ -158,7 +178,7 @@ fun test114(f: int, s: int) { @method_id(115) fun test115() { var y = [[[[true]]]]; - return (y, y.0.0.0.0 = !y.0.0.0.0, y.0); + return (y, ((((y).0).0).0).0 = !y.0.0.0.0, y.0); } @method_id(116) @@ -213,6 +233,25 @@ fun test121(zero: int) { return t; } +fun isFirstComponentGt0(t: (T1, T2)): bool { + return t.0 > 0; +} + +@method_id(122) +fun test122(x: (int, int)) { + return ( + isFirstComponentGt0(x), isFirstComponentGt0((2, beginCell())), isFirstComponentGt0((0, null)), + x.isFirstComponentGt0(), (2, beginCell()).isFirstComponentGt0(), (0, null).isFirstComponentGt0() + ); +} + +@method_id(123) +fun test123() { + var t = [[10, 20]] as [[int,int]]?; + ((t!).0).0 = ((t!).0).1 = 100; + return t; +} + fun main(){} @@ -238,6 +277,58 @@ fun main(){} @testcase | 119 | 1 2 3 4 | 4 1 3 @testcase | 120 | | 3 4 [ 5 6 ] @testcase | 121 | 0 | [ 3 ] +@testcase | 122 | 1 2 | -1 -1 0 -1 -1 0 +@testcase | 123 | | [ [ 100 100 ] ] + +@fif_codegen +""" + testCodegenSimple PROC:<{ + // + 1 PUSHINT // '2=1 + SINGLE // t1 + 2 PUSHINT // t1 '3=2 + 0 SETINDEX // t1 + x{} PUSHSLICE // t1 '6 + STRDUMP DROP + 1 PUSHINT // t1 '10=1 + SINGLE // t1 '9 + SINGLE // t1 t2 + 2 PUSHINT // t1 t2 '11=2 + OVER // t1 t2 '11=2 t2 + 0 INDEX // t1 t2 '11=2 '14 + SWAP // t1 t2 '14 '11=2 + 0 SETINDEX // t1 t2 '14 + 0 SETINDEX // t1 t2 + x{} PUSHSLICE // t1 t2 '17 + STRDUMP DROP + 1 PUSHINT // t1 t2 '20=1 + SINGLE // t1 t2 '18 + gTup SETGLOB + 2 PUSHINT // t1 t2 '21=2 + gTup GETGLOB // t1 t2 '21=2 g_gTup + SWAP // t1 t2 g_gTup '21=2 + 0 SETINDEX // t1 t2 g_gTup + gTup SETGLOB + x{} PUSHSLICE // t1 t2 '25 + STRDUMP DROP + 1 PUSHINT // t1 t2 '28=1 + 2 PUSHINT // t1 t2 '26=1 '27=2 + PAIR + gTens SETGLOB + 4 PUSHINT // t1 t2 g_gTens.1=4 + gTens GETGLOB + UNPAIR // t1 t2 g_gTens.1=4 g_gTens.0 g_gTens.1 + DROP // t1 t2 g_gTens.1=4 g_gTens.0 + SWAP // t1 t2 g_gTens.0 g_gTens.1=4 + PAIR + gTens SETGLOB + x{} PUSHSLICE // t1 t2 '36 + STRDUMP DROP + gTup GETGLOB // t1 t2 g_gTup + gTens GETGLOB + UNPAIR // t1 t2 g_gTup g_gTens.0 g_gTens.1 + }> +""" @fif_codegen """ @@ -247,26 +338,6 @@ fun main(){} }> """ -@fif_codegen -""" - test104 PROC:<{ - // - 5 PUSHINT // '2=5 - DUP // '2=5 '3=5 - PAIR // '1 - SINGLE // m - 10 PUSHINT // m '5=10 - 20 PUSHINT // m '5=10 '6=20 - s2 PUSH // m '5=10 '6=20 m - 0 INDEX // m '10=10 '12=20 '8 - SWAP // m '10=10 '8 '12=20 - 1 SETINDEX // m '10=10 '8 - SWAP // m '8 '10=10 - 0 SETINDEX // m '8 - 0 SETINDEX // m - ... -""" - @fif_codegen """ testCodegenIndexPostfix1 PROC:<{ diff --git a/tolk-tester/tests/inference-tests.tolk b/tolk-tester/tests/inference-tests.tolk index 3d451581..5020d0dd 100644 --- a/tolk-tester/tests/inference-tests.tolk +++ b/tolk-tester/tests/inference-tests.tolk @@ -18,10 +18,12 @@ fun test1(x: int, y: int) { __expect_type(random() ? x : y, "int"); __expect_type(eq(x), "int"); __expect_type(eq(x), "int"); - __expect_type(eq(null), "int"); + __expect_type(eq(null), "int?"); __expect_type(x as int, "int"); __expect_type(+x, "int"); __expect_type(~x, "int"); + __expect_type(x!, "int"); + __expect_type(x!!!, "int"); { var x: slice = beginCell().endCell().beginParse(); __expect_type(x, "slice"); @@ -62,9 +64,9 @@ fun test5(x: int) { __expect_type([], "[]"); __expect_type([x], "[int]"); __expect_type([x, x >= 1], "[int, bool]"); - __expect_type([x, x >= 1, null as slice], "[int, bool, slice]"); + __expect_type([x, x >= 1, null as slice?], "[int, bool, slice?]"); __expect_type((x, [x], [[x], x]), "(int, [int], [[int], int])"); - __expect_type(getMyOriginalBalanceWithExtraCurrencies(), "[int, cell]"); + __expect_type(getMyOriginalBalanceWithExtraCurrencies(), "[int, cell?]"); } fun test6() { @@ -84,6 +86,17 @@ fun test7() { // __expect_type(eq<(int, slice)>, "(int, slice) -> (int, slice)"); } +fun alwaysThrows(): never { throw 123; } +fun alwaysThrowsNotAnnotated() { throw 123; } +fun alwaysThrowsNotAnnotated2() { alwaysThrows(); } + +fun test9() { + __expect_type(alwaysThrows(), "never"); + __expect_type(alwaysThrows, "() -> never"); + __expect_type(alwaysThrowsNotAnnotated(), "void"); + __expect_type(alwaysThrowsNotAnnotated2(), "void"); +} + fun main() { return 0; diff --git a/tolk-tester/tests/invalid-assign-1.tolk b/tolk-tester/tests/invalid-assign-1.tolk index f605056e..799176df 100644 --- a/tolk-tester/tests/invalid-assign-1.tolk +++ b/tolk-tester/tests/invalid-assign-1.tolk @@ -1,9 +1,9 @@ fun main() { - var c = 1; - (c, c) = (2, 3); + var t = createEmptyTuple(); + t.0 = (1, 2); } /** @compilation_should_fail -@stderr one variable modified twice inside the same expression +@stderr a tuple can not have `(int, int)` inside, because it occupies 2 stack slots in TVM, not 1 */ diff --git a/tolk-tester/tests/invalid-assign-2.tolk b/tolk-tester/tests/invalid-assign-2.tolk index 2838ed9a..6a33e696 100644 --- a/tolk-tester/tests/invalid-assign-2.tolk +++ b/tolk-tester/tests/invalid-assign-2.tolk @@ -1,11 +1,8 @@ -fun incThree(mutate a: int, mutate b: int, mutate c: int) {} - -fun main() { - var c = [[[1, 2]]]; - incThree(mutate c.0.0.0, mutate c.0.0.1, mutate c.0.0.0); +fun main(cs: slice) { + var cb = cs.tupleSize; } /** @compilation_should_fail -@stderr one variable modified twice inside the same expression +@stderr referencing a method for `tuple` with object of type `slice` */ diff --git a/tolk-tester/tests/invalid-assign-3.tolk b/tolk-tester/tests/invalid-assign-3.tolk index d3f5d1f1..567ace33 100644 --- a/tolk-tester/tests/invalid-assign-3.tolk +++ b/tolk-tester/tests/invalid-assign-3.tolk @@ -1,10 +1,9 @@ -global gg: (int, int); - fun main() { - [gg.0, gg.1, gg.0] = [0, 1, 0]; + var t = createEmptyTuple(); + var xy = t.0 as (int, int); } /** @compilation_should_fail -@stderr one variable modified twice inside the same expression +@stderr a tuple can not have `(int, int)` inside, because it occupies 2 stack slots in TVM, not 1 */ diff --git a/tolk-tester/tests/invalid-assign-4.tolk b/tolk-tester/tests/invalid-assign-4.tolk deleted file mode 100644 index 67340b20..00000000 --- a/tolk-tester/tests/invalid-assign-4.tolk +++ /dev/null @@ -1,10 +0,0 @@ -global gg: (int, [int, int]); - -fun main() { - (gg.1.0, gg.1, gg.1.1) = (0, [1, 2], 3); -} - -/** -@compilation_should_fail -@stderr one variable both modified and read inside the same expression -*/ diff --git a/tolk-tester/tests/invalid-assign-5.tolk b/tolk-tester/tests/invalid-assign-5.tolk deleted file mode 100644 index f3fe59f7..00000000 --- a/tolk-tester/tests/invalid-assign-5.tolk +++ /dev/null @@ -1,9 +0,0 @@ -fun main() { - var ab = (1, 2); - (ab, ab.1) = ((2, 3), 4); -} - -/** -@compilation_should_fail -@stderr one variable both modified and read inside the same expression -*/ diff --git a/tolk-tester/tests/invalid-assign-6.tolk b/tolk-tester/tests/invalid-assign-6.tolk deleted file mode 100644 index 59d769e9..00000000 --- a/tolk-tester/tests/invalid-assign-6.tolk +++ /dev/null @@ -1,9 +0,0 @@ -fun main() { - var t = createEmptyTuple(); - t.0 = (1, 2); -} - -/** -@compilation_should_fail -@stderr can not put `(int, int)` into a tuple, because it occupies 2 stack slots in TVM, not 1 -*/ diff --git a/tolk-tester/tests/invalid-assign-7.tolk b/tolk-tester/tests/invalid-assign-7.tolk deleted file mode 100644 index 6a33e696..00000000 --- a/tolk-tester/tests/invalid-assign-7.tolk +++ /dev/null @@ -1,8 +0,0 @@ -fun main(cs: slice) { - var cb = cs.tupleSize; -} - -/** -@compilation_should_fail -@stderr referencing a method for `tuple` with object of type `slice` -*/ diff --git a/tolk-tester/tests/invalid-call-10.tolk b/tolk-tester/tests/invalid-call-10.tolk index 9a28c004..4da85f4f 100644 --- a/tolk-tester/tests/invalid-call-10.tolk +++ b/tolk-tester/tests/invalid-call-10.tolk @@ -7,5 +7,5 @@ fun main() { /** @compilation_should_fail -@stderr can not put `(int, builder)` into a tuple, because it occupies 2 stack slots in TVM, not 1 +@stderr a tuple can not have `(int, builder)` inside, because it occupies 2 stack slots in TVM, not 1 */ diff --git a/tolk-tester/tests/invalid-generics-1.tolk b/tolk-tester/tests/invalid-generics-1.tolk index c8ff7fec..0bbdeee6 100644 --- a/tolk-tester/tests/invalid-generics-1.tolk +++ b/tolk-tester/tests/invalid-generics-1.tolk @@ -6,5 +6,5 @@ fun failCantDeduceWithoutArgument() { /** @compilation_should_fail -@stderr can not deduce X for generic function `f` +@stderr too few arguments in call to `f`, expected 2, have 1 */ diff --git a/tolk-tester/tests/invalid-generics-13.tolk b/tolk-tester/tests/invalid-generics-13.tolk new file mode 100644 index 00000000..d10e2174 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-13.tolk @@ -0,0 +1,11 @@ +fun calcSum(x: X, y: X) { return x + y; } + +fun cantApplyPlusOnNullable() { + return calcSum(((0 as int?)), null); +} + +/** +@compilation_should_fail +@stderr in function `calcSum` +@stderr can not apply operator `+` to `int?` and `int?` + */ diff --git a/tolk-tester/tests/invalid-generics-14.tolk b/tolk-tester/tests/invalid-generics-14.tolk new file mode 100644 index 00000000..eb3adc92 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-14.tolk @@ -0,0 +1,17 @@ +fun eq(v: X) {} + +fun cantDeduceWhenNotInferred() { + // at type inferring (before type checking) they are unknown + var (x, y) = 2; + + eq(x as int); // ok (since execution doesn't reach type checking) + eq(x); // ok (since execution doesn't reach type checking) + eq(x); +} + +/** +@compilation_should_fail +@stderr in function `cantDeduceWhenNotInferred` +@stderr can not deduce X for generic function `eq` +@stderr eq(x); + */ diff --git a/tolk-tester/tests/invalid-generics-7.tolk b/tolk-tester/tests/invalid-generics-7.tolk index b51bb82c..076b7804 100644 --- a/tolk-tester/tests/invalid-generics-7.tolk +++ b/tolk-tester/tests/invalid-generics-7.tolk @@ -11,8 +11,7 @@ fun foo(value: X) : X { /** @compilation_should_fail -@stderr while instantiating generic function `foo` -@stderr while instantiating generic function `bar` +@stderr in function `bar` @stderr can not convert type `int` to return type `slice` @stderr return 1 */ diff --git a/tolk-tester/tests/invalid-mutate-18.tolk b/tolk-tester/tests/invalid-mutate-18.tolk new file mode 100644 index 00000000..bb8cde05 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-18.tolk @@ -0,0 +1,10 @@ +fun getNullableTuple(): tuple? { return createEmptyTuple(); } + +fun cantUseLValueUnwrappedNotNull() { + tuplePush(mutate getNullableTuple()!, 1); +} + +/** +@compilation_should_fail +@stderr function call can not be used as lvalue + */ diff --git a/tolk-tester/tests/invalid-mutate-19.tolk b/tolk-tester/tests/invalid-mutate-19.tolk new file mode 100644 index 00000000..bb8cde05 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-19.tolk @@ -0,0 +1,10 @@ +fun getNullableTuple(): tuple? { return createEmptyTuple(); } + +fun cantUseLValueUnwrappedNotNull() { + tuplePush(mutate getNullableTuple()!, 1); +} + +/** +@compilation_should_fail +@stderr function call can not be used as lvalue + */ diff --git a/tolk-tester/tests/invalid-mutate-20.tolk b/tolk-tester/tests/invalid-mutate-20.tolk new file mode 100644 index 00000000..f6eb2f9f --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-20.tolk @@ -0,0 +1,13 @@ +fun acceptMutateNullableTensor(mutate self: (int, int)?) { +} + +fun cantModifyTupleIndexWithTypeTransition() { + var t = [1, null]; + t.1.acceptMutateNullableTensor(); +} + +/** +@compilation_should_fail +@stderr can not call method for mutate `(int, int)?` with object of type `null` +@stderr because mutation is not type compatible + */ diff --git a/tolk-tester/tests/invalid-never-1.tolk b/tolk-tester/tests/invalid-never-1.tolk new file mode 100644 index 00000000..68c6c804 --- /dev/null +++ b/tolk-tester/tests/invalid-never-1.tolk @@ -0,0 +1,8 @@ +fun invalidNever(): never { + if (random()) { throw 123; } +} + +/** +@compilation_should_fail +@stderr a function returning `never` can not have a reachable endpoint + */ diff --git a/tolk-tester/tests/invalid-typing-11.tolk b/tolk-tester/tests/invalid-typing-11.tolk index d6aa09c3..f6e89d08 100644 --- a/tolk-tester/tests/invalid-typing-11.tolk +++ b/tolk-tester/tests/invalid-typing-11.tolk @@ -3,6 +3,7 @@ fun failBitwiseNotOnBool() { if (~eq) { return 0; } + return -1; } /** diff --git a/tolk-tester/tests/invalid-typing-14.tolk b/tolk-tester/tests/invalid-typing-14.tolk new file mode 100644 index 00000000..657ab5f4 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-14.tolk @@ -0,0 +1,14 @@ + +fun autoGetIntOrNull() { + if (random()) { return 1; } + return null; +} + +fun testAutoInferredIntOrNull() { + var b: builder = autoGetIntOrNull() as builder; +} + +/** +@compilation_should_fail +@stderr type `int?` can not be cast to `builder` + */ diff --git a/tolk-tester/tests/invalid-typing-15.tolk b/tolk-tester/tests/invalid-typing-15.tolk new file mode 100644 index 00000000..fbcff8a2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-15.tolk @@ -0,0 +1,13 @@ + +fun getNullable4(): int? { + return 4; +} + +fun testCantSumNullable() { + return 1 + getNullable4(); +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-16.tolk b/tolk-tester/tests/invalid-typing-16.tolk new file mode 100644 index 00000000..1dca7822 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-16.tolk @@ -0,0 +1,13 @@ +@pure +fun myDictDeleteStrict(mutate self: cell, keyLen: int, key: int): bool + asm(key self keyLen) "DICTIDEL"; + + +fun testCantCallDictMethodsOnNullable(c: cell) { + c.beginParse().loadDict().myDictDeleteStrict(16, 1); +} + +/** +@compilation_should_fail +@stderr can not call method for `cell` with object of type `cell?` + */ diff --git a/tolk-tester/tests/invalid-typing-17.tolk b/tolk-tester/tests/invalid-typing-17.tolk new file mode 100644 index 00000000..b7302684 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-17.tolk @@ -0,0 +1,10 @@ + +fun testCantUseNullableAsCondition(x: int?) { + if (x) { return 1; } + return 0; +} + +/** +@compilation_should_fail +@stderr can not use `int?` as a boolean condition + */ diff --git a/tolk-tester/tests/invalid-typing-18.tolk b/tolk-tester/tests/invalid-typing-18.tolk new file mode 100644 index 00000000..cf985add --- /dev/null +++ b/tolk-tester/tests/invalid-typing-18.tolk @@ -0,0 +1,16 @@ +fun incrementOrSetNull(mutate x: int?) { + if (random()) { x! += 1; } + else { x = null; } +} + +fun cantCallMutateMethodNotNullable() { + var x = 1; + incrementOrSetNull(mutate x); + return x; +} + +/** +@compilation_should_fail +@stderr can not pass `int` to mutate `int?` +@stderr because mutation is not type compatible + */ diff --git a/tolk-tester/tests/invalid-typing-19.tolk b/tolk-tester/tests/invalid-typing-19.tolk new file mode 100644 index 00000000..58b6c1fc --- /dev/null +++ b/tolk-tester/tests/invalid-typing-19.tolk @@ -0,0 +1,12 @@ +fun getNullableInt(): int? { return 5; } + +fun testCantApplyNotNullForAlwaysNull() { + var x: int? = getNullableInt(); + if (x != null) { return 0; } + return x! + 1; +} + +/** +@compilation_should_fail +@stderr operator `!` used for always null expression + */ diff --git a/tolk-tester/tests/invalid-typing-20.tolk b/tolk-tester/tests/invalid-typing-20.tolk new file mode 100644 index 00000000..457bc97a --- /dev/null +++ b/tolk-tester/tests/invalid-typing-20.tolk @@ -0,0 +1,15 @@ +fun getNullableInt(): int? { return 5; } + +fun testFlowContextAppliedInBinaryOperator() { + var x: int? = getNullableInt(); + var y: int? = getNullableInt(); + if ((y = null) < y) { + return -100; + } + return 0; +} + +/** +@compilation_should_fail +@stderr can not apply operator `<` to `null` and `null` + */ diff --git a/tolk-tester/tests/invalid-typing-21.tolk b/tolk-tester/tests/invalid-typing-21.tolk new file mode 100644 index 00000000..d2a815ee --- /dev/null +++ b/tolk-tester/tests/invalid-typing-21.tolk @@ -0,0 +1,14 @@ +fun getNullableInt(): int? { return 5; } + +fun testNeverTypeOccurs() { + var x: int? = getNullableInt(); + if (x == null && x != null) { + return x + 0; + } + return 0; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `never` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-22.tolk b/tolk-tester/tests/invalid-typing-22.tolk new file mode 100644 index 00000000..f962f364 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-22.tolk @@ -0,0 +1,9 @@ +fun testLogicalAndNotConditionDoesntAffect(x: int?) { + var gt1 = x != null && x > 1; + return x + 0; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-23.tolk b/tolk-tester/tests/invalid-typing-23.tolk new file mode 100644 index 00000000..74feed52 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-23.tolk @@ -0,0 +1,15 @@ +fun getTensor(): (int?, int?) { return (5, null); } + +fun testSmartCastsForFieldsDropAfterAssign() { + var t = getTensor(); + if (t.0 != null && t.1 != null) { + t = getTensor(); + return t.0 + t.1; + } + return -1; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-24.tolk b/tolk-tester/tests/invalid-typing-24.tolk new file mode 100644 index 00000000..75f61be9 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-24.tolk @@ -0,0 +1,16 @@ +fun getNullableInt(): int? { return 5; } + +fun getTensor(x: int?): (int?, int) { return (x, 0); } + +fun testSmartCastsDropAfterAssign() { + var x: int? = 0; + var y: int? = 0; + (getTensor(x = getNullableInt()).0, getTensor(y = getNullableInt()).0) = (x + y, x - y); + return x+y; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int?` +@stderr x + y, x - y + */ diff --git a/tolk-tester/tests/invalid-typing-25.tolk b/tolk-tester/tests/invalid-typing-25.tolk new file mode 100644 index 00000000..1621bab1 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-25.tolk @@ -0,0 +1,14 @@ +fun takeNullableTensor(mutate ij: (int, int)?) { } + +fun testSmartCastsDropAfterMutate() { + var x: (int, int)? = (1, 2); + return x.0; // ok + takeNullableTensor(mutate x); + return x.1; // error +} + +/** +@compilation_should_fail +@stderr type `(int, int)?` is not indexable +@stderr return x.1 + */ diff --git a/tolk-tester/tests/invalid-typing-26.tolk b/tolk-tester/tests/invalid-typing-26.tolk new file mode 100644 index 00000000..bf5a1165 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-26.tolk @@ -0,0 +1,12 @@ +fun getNullableInt(): int? { return 5; } + +fun testAssertThrowIsConditional() { + var (x, y) = (getNullableInt(), getNullableInt()); + assert(x != null) throw(y = 10); + return x + y; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-27.tolk b/tolk-tester/tests/invalid-typing-27.tolk new file mode 100644 index 00000000..3861403b --- /dev/null +++ b/tolk-tester/tests/invalid-typing-27.tolk @@ -0,0 +1,18 @@ +fun assignNull2(mutate x: T1?, mutate y: T2?) { + if (false) { + x = null; + y = null; + } +} + +fun testSmartCastsDropAfterNullableGeneric() { + var (x: int?, y: int?) = (1, 2); + x * y; // ok + assignNull2(x, y); // treated like assignments to nullable + x << y; // error +} + +/** +@compilation_should_fail +@stderr can not apply operator `<<` to `int?` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-28.tolk b/tolk-tester/tests/invalid-typing-28.tolk new file mode 100644 index 00000000..5d60ff22 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-28.tolk @@ -0,0 +1,15 @@ +fun getNullableInt(): int? { return 5; } + +fun testReassignInRedef() { + var t1: int? = getNullableInt(); + if (t1 != null) { + var (t1 redef, t2) = (getNullableInt(), 5); + return t1 + t2; + } + return -1; +} + +/** +@compilation_should_fail +@stderr can not apply operator `+` to `int?` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-29.tolk b/tolk-tester/tests/invalid-typing-29.tolk new file mode 100644 index 00000000..e8a4e5e2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-29.tolk @@ -0,0 +1,14 @@ +fun getNullableInt(): int? { return 5; } + +fun testTryBodyDontSmartCast() { + var x = getNullableInt(); + try { + x = 5; + } catch {} + return x * 10; // x is not int here; for now, we have no exception edges, assuming it can be anywhere inside try +} + +/** +@compilation_should_fail +@stderr can not apply operator `*` to `int?` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-30.tolk b/tolk-tester/tests/invalid-typing-30.tolk new file mode 100644 index 00000000..53dfc5ca --- /dev/null +++ b/tolk-tester/tests/invalid-typing-30.tolk @@ -0,0 +1,15 @@ +fun getNullableInt(): int? { return 5; } + +fun testDoWhileCondition() { + var (x: int?, y: int?) = (10, 20); + do { + x = getNullableInt(); + y = getNullableInt(); + } while(x == null); + return x * y; // x is 100% int, but y is not +} + +/** +@compilation_should_fail +@stderr can not apply operator `*` to `int` and `int?` + */ diff --git a/tolk-tester/tests/invalid-typing-44.tolk b/tolk-tester/tests/invalid-typing-44.tolk new file mode 100644 index 00000000..2ec5d0e8 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-44.tolk @@ -0,0 +1,9 @@ +fun cantAssignIntToTensor() { + var (x, y) = 2; + x + y; +} + +/** +@compilation_should_fail +@stderr can not assign `int` to a tensor + */ diff --git a/tolk-tester/tests/invalid-typing-45.tolk b/tolk-tester/tests/invalid-typing-45.tolk new file mode 100644 index 00000000..b357b637 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-45.tolk @@ -0,0 +1,9 @@ +fun cantAssignSizesMismatch() { + var [x, y] = [2, 3, 4]; + x + y; +} + +/** +@compilation_should_fail +@stderr can not assign `[int, int, int]`, sizes mismatch + */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index 29cd1d10..700f2a3c 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -53,9 +53,8 @@ fun testDict(last: int) { } @method_id(105) -fun testNotNull(x: int) { - // return [x == null, null == x, !(x == null), null == null, +(null != null)]; - return [x == null, null == x, !(x == null)]; +fun testNotNull(x: int?) { + return [x == null, null == x, !(x == null), null == null, (null != null) as int]; } @method_id(106) @@ -170,8 +169,8 @@ fun main() { @testcase | 104 | 50 | 3 5 -1 @testcase | 104 | 100 | 3 5 5 @testcase | 104 | 0 | 3 -1 5 -@testcase | 105 | 0 | [ 0 0 -1 ] -@testcase | 105 | null | [ -1 -1 0 ] +@testcase | 105 | 0 | [ 0 0 -1 -1 0 ] +@testcase | 105 | null | [ -1 -1 0 -1 0 ] @testcase | 106 | | [ 0 0 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ 0 -1 ] @testcase | 107 | | [ -1 -1 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ -1 0 ] @testcase | 108 | 1 2 | -1 diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index ebd07aca..9ebf8b1d 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -307,7 +307,7 @@ fun main(){} ... incrementTwoInPlace CALLDICT // x y sum1 -ROT - 10 PUSHINT // sum1 x y '10=10 + 10 PUSHINT // sum1 x y '11=10 incrementTwoInPlace CALLDICT // sum1 x y sum2 s1 s3 s0 XCHG3 // x y sum1 sum2 }> diff --git a/tolk-tester/tests/never-type-tests.tolk b/tolk-tester/tests/never-type-tests.tolk new file mode 100644 index 00000000..89447389 --- /dev/null +++ b/tolk-tester/tests/never-type-tests.tolk @@ -0,0 +1,28 @@ +fun takeInt(a: int) {} + +@method_id(101) +fun test1(x: int?) { + if (x == null && x != null) { + var y = x; + __expect_type(y, "never"); + __expect_type(y!, "never"); + // `never` type is assignable to anything, flow won't reach this point + var t: (int, int) = x; + t = y; + takeInt(x); + var cb: (int) -> int = x; + x as int?; + x as (int, int)?; + x as never; + return x; + } + return 123; +} + +fun main() { + __expect_type(test1, "(int?) -> int"); +} + +/** +@testcase | 101 | null | 123 + */ diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 69678434..65890a92 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -2,13 +2,13 @@ import "@stdlib/lisp-lists" @method_id(101) fun test1() { - var numbers: tuple = createEmptyList(); + var numbers: tuple? = createEmptyList(); numbers = listPrepend(1, numbers); numbers = listPrepend(2, numbers); numbers = listPrepend(3, numbers); numbers = listPrepend(4, numbers); - var (h: int, numbers redef) = listSplit(numbers); - h += listGetHead(numbers); + var (h: int, numbers redef) = listSplit(numbers!); + h += listGetHead(numbers!); _ = null; (_, _) = (null, null); @@ -22,22 +22,24 @@ fun test1() { } @method_id(102) -fun test2(x: int) { +fun test2(x: int?) { if (null != x) { - var y: int = null; + var y: int? = null; if (y != null) { return 10; } - return y; + if (10 < 20) { // always true at runtime (not at compile-time) + return y; + } } try { - return x + 10; // will throw, since not a number + return x! + 10; // will throw, since not a number } catch { return -1; } return 100; } -fun myIsNull(x: int): int { - return x == null ? -1 : x; +fun myIsNull(x: int?): int { + return x == null ? -1 : x!; } @method_id(103) @@ -45,14 +47,6 @@ fun test3(x: int) { return myIsNull(x > 10 ? null : x); } -fun getUntypedNull() { - var untyped: null = null; - if (true) { - return untyped; - } - return untyped; -} - @method_id(104) fun test4(): null { var (_, (_, untyped: null)) = (3, (createEmptyTuple, null)); @@ -62,23 +56,25 @@ fun test4(): null { return untyped; } -@method_id(105) -fun test5() { - var n: slice = getUntypedNull(); - return !(null == n) ? n.loadInt(32) : 100; -} - @method_id(107) fun test7() { - var b = beginCell().storeMaybeRef(null); - var s = b.endCell().beginParse(); + var b = beginCell().storeMaybeRef(null) as builder?; + var s = b!.endCell().beginParse(); var c = s.loadMaybeRef(); return (null == c) as int * 10 + (b != null) as int; } +fun test8() { + __expect_type(null, "null"); + __expect_type([[null]], "[[null]]"); + __expect_type(null as tuple?, "tuple?"); + __expect_type(null as [int]?, "[int]?"); + __expect_type(((null)) as (int, int)?, "(int, int)?"); +} + fun main() { - // now, the compiler doesn't optimize this at compile-time, fif codegen contains ifs - var i: int = null; + // the compiler optimizes this at compile-time + var i: int? = null; if (i == null) { return 1; } @@ -92,7 +88,6 @@ fun main() { @testcase | 103 | 5 | 5 @testcase | 103 | 15 | -1 @testcase | 104 | | (null) -@testcase | 105 | | 100 @testcase | 107 | | -11 @fif_codegen """ @@ -120,12 +115,7 @@ fun main() { """ main PROC:<{ // - PUSHNULL // i - ISNULL // '2 - IFJMP:<{ // - 1 PUSHINT // '3=1 - }> // - 10 PUSHINT // '4=10 + 1 PUSHINT // '3=1 }> """ @@ -133,14 +123,14 @@ fun main() { """ test7 PROC:<{ ... - LDOPTREF // b '8 '7 + LDOPTREF // b '9 '8 DROP // b c ISNULL // b '11 10 MULCONST // b '13 SWAP // '13 b ISNULL // '13 '14 - NOT // '13 '15 - ADD // '16 + NOT // '13 '14 + ADD // '15 }> """ */ diff --git a/tolk-tester/tests/nullable-tensors.tolk b/tolk-tester/tests/nullable-tensors.tolk new file mode 100644 index 00000000..d0720273 --- /dev/null +++ b/tolk-tester/tests/nullable-tensors.tolk @@ -0,0 +1,492 @@ +fun getNullableInt(): int? { return 5; } + +fun sumOfNullableTensorComponents(t: (int, int)?): int { + if (t == null) { return 0; } + return t!.0 + t!.1; +} + +fun isTensorNull(t: (int, int)?) { + return t == null; +} + +fun incrementNullableTensorComponents(mutate self: (int, int)?): self { + if (self != null) { + self!.0 += 1; + self!.1 += 1; + } + return self; +} + +fun incrementTensorComponents(mutate self: (int, int)): self { + self.0 += 1; + self.1 += 1; + return self; +} + +fun assignFirstComponent(mutate t: (int, int), first: int) { + t!.0 = first; +} + +fun assignFirstComponentNullable(mutate t: (int, int)?, first: int) { + if (t == null) { + t = (first, 0); + } else { + t!.0 = first; + } +} + +fun getNullableTensor(firstComponent: int?): (int, int)? { + return firstComponent == null ? null : (firstComponent!, 2); +} + +fun sumOfTensor(x: (int, int)) { + return x.0 + x.1; +} + +fun assignNullTo(mutate x: T?) { + x = null; +} + +fun getTensor12() { + return (1,2); +} + +@method_id(101) +fun test101(): (int, int)? { + return (1, 2); +} + +@method_id(102) +fun test102(): ((int, int)?, (int, int)?) { + var t = (1, 2); + return (t, null); +} + +@method_id(103) +fun test103(t: (int, int)) { + var t2: (int, int)? = t; + return (sumOfNullableTensorComponents(t), sumOfNullableTensorComponents(t2), sumOfNullableTensorComponents(null), t2); +} + +@method_id(104) +fun test104() { + var t1_1: (int, int)? = (1, 2); + var t1_2: (int, int)? = t1_1; + var t1_3: (int, int)? = t1_1!; + var t2_1: (int, int)? = getNullableTensor(null); + var t2_2 = t2_1; + return (t1_3, t2_2); +} + +@method_id(105) +fun test105() { + return (null as (int, slice, cell)?, (1, 2, 3) as (int, int, int)?); +} + +@method_id(106) +fun test106() { + var t: (int?, int?)? = (((((1, 2))) as (int, int))); + return t; +} + +@method_id(107) +fun test107() { + var ab = (1, 2); + var ab2: (int, int)? = ab; + return (isTensorNull(ab), isTensorNull(ab2), isTensorNull(null), ab.isTensorNull(), ab2.isTensorNull(), null.isTensorNull()); +} + +@method_id(108) +fun test108(x1: (int, int)) { + incrementTensorComponents(mutate x1); + x1.incrementTensorComponents(); + var x2: (int, int)? = x1; + __expect_type(x2, "(int, int)"); + x2.incrementNullableTensorComponents().incrementNullableTensorComponents(); + incrementNullableTensorComponents(mutate x2); + __expect_type(x2, "(int, int)?"); + var x3: (int, int)? = null; + __expect_type(x3, "null"); + x3.incrementNullableTensorComponents().incrementNullableTensorComponents(); + incrementNullableTensorComponents(mutate x3); + return (x1, x2, x3); +} + +fun isTensorNullGen(t: (T1, T2)?) { + return t == null; +} + +@method_id(109) +fun test109() { + var x1 = (1, 2); + var x2: (int, int)? = x1; + var x3: (int, int)? = x1.1 > 10 ? (1, 2) : null; + return ( + isTensorNullGen(x1), isTensorNullGen(x2), isTensorNullGen(null), + isTensorNullGen(x1), isTensorNullGen(x3), + x1.isTensorNullGen(), x2.isTensorNullGen(), x3.isTensorNullGen(), null.isTensorNullGen() + ); +} + +global g110_1: (int, int); +global g110_2: (int, int)?; + +@method_id(110) +fun test110() { + g110_1 = getNullableTensor(1)!; + incrementTensorComponents(mutate g110_1); + g110_1.incrementTensorComponents(); + g110_2 = g110_1; + g110_2.incrementNullableTensorComponents().incrementNullableTensorComponents(); + incrementNullableTensorComponents(mutate g110_2); + var tmp = g110_2; + g110_2 = null; + g110_2.incrementNullableTensorComponents(); + incrementNullableTensorComponents(mutate g110_2); + return (g110_1, g110_2, tmp); +} + +@method_id(111) +fun test111() { + var x = (1, 2); + assignFirstComponent(mutate x, 50); + var x2: (int, int)? = null; + var x3 = x2 as (int, int)?; + assignFirstComponentNullable(mutate x2, 30); + assignFirstComponentNullable(mutate x3, 70); + g110_1 = (1, 2); + g110_2 = null; + assignFirstComponent(mutate g110_1, 90); + assignFirstComponentNullable(mutate g110_2, 100); + return (x.0, x2!.0, x3!.0, g110_1.0, g110_2!.0); +} + +@method_id(112) +fun test112() { + var x: (int, int)? = (10, 20); + incrementTensorComponents(mutate x!); + x!.incrementTensorComponents(); + return x; +} + +@method_id(113) +fun test113() { + var t = [1, null]; // t.1 is always null + return isTensorNull(t.1); +} + +@method_id(114) +fun test114(): ((slice, (cell, [int, slice, tuple]))?, slice?, (int?, bool?)?) { + var t = [[null]]; + return (t.0.0, t.0.0, t.0.0); +} + +@method_id(115) +fun test115() { + var tt = getNullableTensor(null); + assignFirstComponentNullable(mutate tt, 5); + return ( + getNullableTensor(1)!.incrementTensorComponents(), + sumOfNullableTensorComponents(getNullableTensor(1).incrementNullableTensorComponents().incrementNullableTensorComponents()), + getNullableTensor(null).incrementNullableTensorComponents(), + tt, + sumOfNullableTensorComponents(getNullableTensor(null)) + ); +} + +@method_id(116) +fun test116(returnNull: bool) { + var t1: (int, int)? = returnNull ? null : getTensor12(); + var t2 = returnNull ? null as (int, int)? : getTensor12() as (int, int)?; + returnNull ? null : (1, 2); + return (t1, t2); +} + +@method_id(117) +fun test117() { + var (a, b: (int, int)?, c) = (1, null, 3); + return (b, a, c); +} + +fun autoInferNullableTensor(a: int?, b: int) { + if (a != null) { + return (a!, b); + } + return null; +} + +@method_id(118) +fun test118(a: int?) { + return autoInferNullableTensor(a, 10); +} + +@method_id(119) +fun test119() { + var x: (int, int)? = (1, 2); + x = null; + var tt: (int, (int, int)?) = (0, (1, 2)); + tt.1 = null; + var third: (int, (int, int)?, int) = (0, (1, 2), 3); + third.2 = 100; + return (x, tt.1, third.1, third.2); +} + +@method_id(120) +fun test120(setNull: bool) { + var x: (int, int)? = (1, 2); + if (setNull) { + assignNullTo(mutate x); + } + return x; +} + +@method_id(121) +fun test121() { + var t: [int?, [int?, int?]?] = [1, [2, 3]]; + t.1 = [3, 4]; + return t; +} + +@method_id(122) +fun test122(setNull: bool) { + var t: [int?, [int?, int?]?, int?, [int?, int?]?]? = [1, [2, 3], 4, null]; + if (setNull) { + assignNullTo(mutate t!.1); + } else { + var rhs = [3, 4]; + t!!.1 = rhs; + } + return t; +} + +@method_id(123) +fun test123() { + var t: (int?, (int?, int?)?) = (1, (2, 3)); + t.1 = (3, 4); + return t; +} + +@method_id(124) +fun test124(setNull: bool) { + var t: (int?, (int?, int?)?, int?, (int?, int?)?)? = (1, (2, 3), 4, null); + if (setNull) { + assignNullTo(mutate t!.1); + } else { + var rhs = (3, 4); + t!!.1 = rhs; + } + return t; +} + +global g125: int; +fun getT125(): (int, (int, int)?, (int?, int)?) { return (g125 += 1, null, null); } + +@method_id(125) +fun test125() { + g125 = 0; + getT125().1 = null; + getT125().2 = (1, 2); + (getT125()!! as (int, (int, int)?, (int?, int)?)).2 = null; + // test that nothing left on a stack + return g125; +} + +@method_id(126) +fun test126() { + var tt1: (int, null, int) = (1, null, 2); + var (a: int, b: (int, int)?, c: int) = tt1; + return (a, b, c); +} + +@method_id(127) +fun test127(choice: int) { + var tt1: (int, null, int) = (1, null, 2); + var tt2: (int, (int, int), int) = (1, (2, 3), 4); + var tt3: (int, (int, int)?, int) = (1, null, 5); + var abc: (int, (int, int)?, int) = choice == 1 ? tt1 : choice == 2 ? tt2 : tt3; + return abc; +} + +fun get128_1() { return (1, null, 2); } +fun get128_2() { return null; } +fun get128_3() { return (1, (2, 3), 4); } +fun takeT128(abc: (int, (int, int)?, int)?) { return abc; } + +@method_id(128) +fun test128(choice: int) { + if (choice == 1) { + return takeT128(get128_1())!; + } + if (choice == 2) { + return takeT128(get128_2()); + } + return takeT128(get128_3()); +} + +@method_id(129) +fun test129(setNull: bool) { + var t: (int?, int?) = (getNullableInt(), getNullableInt()); + var r1 = (t, t == null, t != null); + t = (setNull ? null : 1, setNull ? null : 2); + var r2 = (t, t == null, t != null); + return (r1, r2); +} + +@method_id(130) +fun test130(setNull: bool) { + var os: (int, (int, int)?) = (1, setNull ? null : (2, 3)); + return os; +} + +fun getEmptyNullableTensor(getNull: bool): ()? { + return getNull ? null : (); +} + +@method_id(131) +fun test131() { + var nonNullEmptyT = getEmptyNullableTensor(false); + var nullEmptyT = getEmptyNullableTensor(true); + var emptyT = nonNullEmptyT!; + __expect_type(emptyT, "()"); + var doubleNulls1 = (null, null) as (()?, ()?); + var doubleNulls2 = ((), ()) as (()?, ()?); + var doubleNulls3 = ((), ()) as (()?, ()?)?; + var stillEmpty = ((), ()); + return (nonNullEmptyT, 777, nullEmptyT, 777, emptyT, 777, nullEmptyT!, 777, doubleNulls1, doubleNulls2, 777, doubleNulls3, 777, stillEmpty); +} + +@method_id(132) +fun test132() { + var doubleNulls: (()?, ()?) = (getEmptyNullableTensor(true), getEmptyNullableTensor(false)); + var result = ((null as ()?) == null, (() as ()?) == null, doubleNulls.0 == null, doubleNulls.1 == null); + var aln1: int? = (doubleNulls.1 = null); + var aln2: null = (doubleNulls.1 = null); + return (result, 777, aln1, aln2, doubleNulls.1 == null, doubleNulls); +} + +@method_id(133) +fun test133() { + var x: (int, int)? = (10, 20); + return sumOfTensor(x) + x.0 + x.1; // smart casted +} + +@method_id(134) +fun test134(): (int, int)? { + var x: (int, int)? = (10, 20); + incrementTensorComponents(mutate x); // smart casted + return x; +} + + +fun getNormalNullableTensorWidth1(vLess100: int?): ([int?], ())? { + if (vLess100 != null && vLess100 >= 100) { + return null; + } + return ([vLess100], ()); // such a nullable tensor can store NULL in the same slot +} + +fun getTrickyNullableTensorWidth1(vLess100: int?): (int?, ())? { + if (vLess100 != null && vLess100 >= 100) { + return null; + } + return (vLess100, ()); // such a nullable tensor requires an extra stack slot for null presence +} + +fun getEvenTrickierNullableWidth1(vLess100: int?): ((), (int?, ()), ())? { + if (vLess100 != null && vLess100 >= 100) { + return null; + } + return ((), (vLess100, ()), ()); +} + +@method_id(135) +fun test135() { + var n1 = getNormalNullableTensorWidth1(10); // ([10], ()) + var n2 = getNormalNullableTensorWidth1(null); // ([null], ()) + var n3 = getNormalNullableTensorWidth1(100); // null + var t1 = getTrickyNullableTensorWidth1(10); // (10, ()) + var t2 = getTrickyNullableTensorWidth1(null); // (null, ()) + var t3 = getTrickyNullableTensorWidth1(100); // null + var e1 = getEvenTrickierNullableWidth1(10); // ((), (10, ()), ()) + var e2 = getEvenTrickierNullableWidth1(null); // ((), (null, (), ()) + var e3 = getEvenTrickierNullableWidth1(100); // null + return (n1, n2, n3, 777, t1, t2, t3, 777, e1, e2, e3, 777, + n1 == null, n2 == null, n3 == null, t1 == null, t2 == null, t3 == null, e1 == null, e2 == null, e3 == null, 777, + t1!.0 == null, t2!.0 == null, e1!.1.0 == null, e1!.1.1 == null, e2!.1.0 == null, e2!.1.1 == null); +} + + + +fun main(){} + +/** +@testcase | 101 | | 1 2 -1 +@testcase | 102 | | 1 2 -1 (null) (null) 0 +@testcase | 103 | 1 2 | 3 3 0 1 2 +@testcase | 104 | | 1 2 (null) (null) 0 +@testcase | 105 | | (null) (null) (null) 0 1 2 3 -1 +@testcase | 106 | | 1 2 +@testcase | 107 | | 0 0 -1 0 0 -1 +@testcase | 108 | 5 6 | 7 8 10 11 -1 (null) (null) 0 +@testcase | 109 | | 0 0 -1 0 -1 0 0 -1 -1 +@testcase | 110 | | 3 4 (null) (null) 0 6 7 -1 +@testcase | 111 | | 50 30 70 90 100 +@testcase | 112 | | 12 22 +@testcase | 113 | | -1 +@testcase | 114 | | (null) (null) (null) 0 (null) (null) (null) 0 +@testcase | 115 | | 2 3 7 (null) (null) 0 5 0 -1 0 +@testcase | 116 | -1 | (null) (null) 0 (null) (null) 0 +@testcase | 116 | 0 | 1 2 -1 1 2 -1 +@testcase | 117 | | (null) 1 3 +@testcase | 118 | 5 | 5 10 -1 +@testcase | 118 | null | (null) (null) 0 +@testcase | 119 | | (null) (null) 1 2 -1 100 +@testcase | 120 | -1 | (null) (null) 0 +@testcase | 120 | 0 | 1 2 -1 +@testcase | 121 | | [ 1 [ 3 4 ] ] +@testcase | 122 | 0 | [ 1 [ 3 4 ] 4 (null) ] +@testcase | 122 | -1 | [ 1 (null) 4 (null) ] +@testcase | 123 | | 1 3 4 -1 +@testcase | 124 | 0 | 1 3 4 -1 4 (null) (null) 0 +@testcase | 124 | -1 | 1 (null) (null) 0 4 (null) (null) 0 +@testcase | 125 | | 3 +@testcase | 126 | | 1 (null) 2 +@testcase | 127 | 1 | 1 (null) (null) 0 2 +@testcase | 127 | 2 | 1 2 3 -1 4 +@testcase | 127 | 3 | 1 (null) (null) 0 5 +@testcase | 128 | 1 | 1 (null) (null) 0 2 -1 +@testcase | 128 | 2 | (null) (null) (null) (null) (null) 0 +@testcase | 128 | 3 | 1 2 3 -1 4 -1 +@testcase | 129 | 0 | 5 5 0 -1 1 2 0 -1 +@testcase | 129 | -1 | 5 5 0 -1 (null) (null) 0 -1 +@testcase | 130 | 0 | 1 2 3 -1 +@testcase | 130 | -1 | 1 (null) (null) 0 +@testcase | 131 | | -1 777 0 777 777 777 0 0 -1 -1 777 -1 -1 -1 777 +@testcase | 132 | | -1 0 -1 0 777 (null) (null) -1 0 0 +@testcase | 133 | | 60 +@testcase | 134 | | 11 21 -1 +@testcase | 135 | | [ 10 ] [ (null) ] (null) 777 10 -1 (null) -1 (null) 0 777 10 -1 (null) -1 (null) 0 777 0 0 -1 0 0 -1 0 0 -1 777 0 -1 0 0 -1 0 + +@fif_codegen +""" + isTensorNull PROC:<{ + // t.0 t.1 t.NNFlag + 2 1 BLKDROP2 // t.NNFlag + 0 EQINT // '3 + }> +""" + +@fif_codegen +""" + test113 PROC:<{ + // + 1 PUSHINT // '2=1 + PUSHNULL // '2=1 '3 + PAIR // t + 1 INDEX // '5 + PUSHNULL // '5 '6 + 0 PUSHINT // '5 '6 '7=0 + isTensorNull CALLDICT // '8 + }> +""" +*/ diff --git a/tolk-tester/tests/nullable-types.tolk b/tolk-tester/tests/nullable-types.tolk new file mode 100644 index 00000000..24aa7f8a --- /dev/null +++ b/tolk-tester/tests/nullable-types.tolk @@ -0,0 +1,109 @@ + +fun getNullable4(): int? { return 4; } +fun getNullableIntNull(): int? asm "PUSHNULL"; + +fun eqInt(x: int) { return x; } +fun eq(x: T) { return x; } + +fun unwrap(x: T?): T { return x!; } +fun intOr0(x: int?): int { return null == x ? 0 : x!; } + +@method_id(101) +fun test101(x: int) { + var re = x == 0 ? null : 100; + return re == null ? re : 200 + getNullable4()!; +} + +@method_id(102) +fun test102(a: int) { + try { + throw (123, a > 10 ? null : a); + return 0; + } catch (excno, arg) { + var i = arg as int?; + return excno + (i != null ? i!!!!! : -100); + } +} + +@method_id(103) +fun test103(x: int?): (bool, bool, int) { + var x_gt_0 = x != null && eqInt(x!) > 0; + var x_lt_0 = x != null && eq(x)! < 0; + if (x == null) { + return (x_gt_0, x_lt_0, 0); + } + return (x_gt_0, x_lt_0, x!); +} + +@method_id(104) +fun test104(x: int?) { + var x2 = eq(x = 10); + var ab = (x2, getNullableIntNull()); + return (unwrap(ab.0) + (ab.1 == null ? -100 : ab.1!), ab.1); +} + +@method_id(105) +fun test105() { + var xy: (int?, int?) = (5, null); + var ab = [1 ? [xy.0, xy.1] : null]; + ab.0!.0 = intOr0(ab.0!.0); + ab.0!.1 = intOr0(ab.0!.1); + return ab.0!.0! + ab.0!.1!; +} + +global gTup106: tuple?; +global gInt106: int?; + +@method_id(106) +fun test106() { + gInt106 = 0; + gInt106! += 5; + var int106: int? = 0; + var gTup106 = createEmptyTuple(); + gTup106!.tuplePush(createEmptyTuple()); + (gTup106!.0 as tuple?)!.tuplePush(0 as int?); + tuplePush(mutate gTup106!, gInt106); + tuplePush(mutate gTup106!.0, int106! += 1); + return (gTup106 == null, null != gTup106, gTup106, gTup106!.0 as tuple?); +} + +@method_id(107) +fun test107() { + var b: builder? = beginCell(); + b!.storeInt(1, 32).storeInt(2, 32); + b = b!.storeInt(3, 32); + storeInt(mutate b!, 4, 32); + (b! as builder).storeInt(5, 32); + return b!.getBuilderBitsCount(); +} + +@method_id(108) +fun test108() { + var (a, b: cell?, c) = (1, beginCell().endCell(), 3); + if (10>3) { b = null; } + return a + (b == null ? 0 : b!.beginParse().loadInt(32)) + c; +} + +@method_id(109) +fun test109() { + var a = getNullable4(); + var b = getNullable4(); + return ([a, b] = [3, 4], a, b); +} + +fun main(x: int?, y: int?) { +} + +/** +@testcase | 101 | 0 | (null) +@testcase | 101 | -1 | 204 +@testcase | 102 | 5 | 128 +@testcase | 102 | 15 | 23 +@testcase | 103 | 10 | -1 0 10 +@testcase | 104 | 8 | -90 (null) +@testcase | 105 | | 5 +@testcase | 106 | | 0 -1 [ [ 0 1 ] 5 ] [ 0 1 ] +@testcase | 107 | | 160 +@testcase | 108 | | 4 +@testcase | 109 | | [ 3 4 ] 3 4 + */ diff --git a/tolk-tester/tests/smart-cast-tests.tolk b/tolk-tester/tests/smart-cast-tests.tolk new file mode 100644 index 00000000..4d71bb63 --- /dev/null +++ b/tolk-tester/tests/smart-cast-tests.tolk @@ -0,0 +1,678 @@ +// the goal of this file is not only to @testcase results — +// but to check that this file compiles + +fun getNullableInt(): int? { return 5; } +fun getNullableSlice(): slice? { return null; } +fun takeNullableInt(a: int?) {} +fun takeNullableSlice(a: slice?) {} +fun increment(mutate self: int) { self += 1; } +fun assignToInt(mutate self: int, value: int) { self = value; } +fun assignToNullableInt(mutate self: int?, value: int) { self = value; } +fun sameTensor(t: (int, int)) { return t; } +fun sameTensor2(t: (int?, (slice, slice, slice, builder)?)) { return t; } +fun eq(v: T) { return v; } +fun getTwo(): X { return 2 as X; } + +fun test1(): int { + var x = getNullableInt(); + var y = getNullableInt(); + if (x != null && y != null) { + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; + } + return -1; +} + +fun test2() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (x == null || y == null) { + return null; + } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test3(): int { + var ([x, y]) = [getNullableInt(), getNullableInt()]; + if (x != null) { + if (((y)) != null) { + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; + } + return x; + } + if (random() > -1) { + if (y == null) { return -1; } + else { return y; } + } + return 0; +} + +fun test4() { + var x = getNullableInt(); + if (x != null && x > 0) { + var x = getNullableInt(); + if ((x) != null && x + 10 < 0) { + var x = getNullableInt(); + return 10 > 3 && 10 < 10 && x != null && x + 8 > 10; + } + } + if (x != null && x < 1) { + return false; + } + if (x == null && x == null) { + __expect_type(x, "null"); + return true; + } + return x < x + 3; +} + +fun test5() { + var (a, (b, c)) = (getNullableInt(), (getNullableInt(), getNullableInt())); + if (a == null) { return -1; } + if (!(b != null)) { return -2; } + if (random() ? c == null && c == null : c == null) { return -3; } + return a + b + c; +} + +fun test6() { + var a: int? = 5; + __expect_type(a, "int"); + __expect_type(a != null ? a : null, "int"); + __expect_type(a == null ? "" : a, "int"); + takeNullableInt(a); + __expect_type(a, "int"); + if (random()) { + a = null; + } else { + if (random()) { a = null; } + else { a = null; } + } + __expect_type(a, "null"); + takeNullableSlice(a); // ok, `slice?` is `slice | null`, here a definitely null + var b: int? = true ? null : "sl"; + __expect_type(b, "null"); + takeNullableInt(b); + takeNullableSlice(b); // same reason + var c: int? = 10; + __expect_type(c, "int"); + takeNullableSlice(c = null); +} + +fun test7() { + var (a, b, c, d) = (getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()); + if (a == null && true) { return -1; } + if (true && true && 1 && !0 && b == null) { return -2; } + if (true ? c == null && (((c))) == null && true : false) { return -3; } + if (!true ? random() > 0 : a != null && (d == null && b != null)) { return -4; } + return a + b + c + d; +} + +fun test8(x: int?, y: int?) { + var allGt1 = x != null && x > 1 && y != null && y > 1; + var xGtY = x != null && y != null && x > y; + var xLtEq0 = x == null || x < 0; + (x = 0) < random() || x > 10; + return x + 0; +} + +fun test9() { + var x = getNullableInt(); + var y = getNullableInt(); + if (x == null || y == null) { + return -1; + } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test10(): int { + var (x, y) = (getNullableInt(), getNullableInt()); + if (x == null) { + if (y == null) { return -1; } + __expect_type(x, "null"); + __expect_type(y, "int"); + return y; + } + if (y == null) { + return x; + } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test11() { + var [x, y] = [getNullableInt(), getNullableInt()]; + if (random()) { return x == null || y == null ? -1 : x + y; } + if (true && (x == null || y == null) && !!true) { return 0; } + return x + y; +} + +fun test12() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (random() ? x == null || y == null : x == null || y == null) { return -1; } + __expect_type(x, "int"); + __expect_type(y, "int"); + return x + y; +} + +fun test13() { + var x: int? = getNullableInt(); + var y: int? = 10; + var z = getNullableInt(); + var w = getNullableInt(); + beginCell().storeInt(x!, 32).storeInt(x = getNullableInt()!, 32).storeInt(x, 32) + .storeInt(y, 32).storeInt(z = 10, 32).storeInt(x + y + z, 32) + .storeInt(w == null ? -1 : w, 32).storeInt(!(null == w) ? w : -1, 32); +} + +fun test14() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (x == null) { + x = 0; + } + if (y == null) { + if (random()) { return 0; } + else { y = 0; } + } + return x + y; +} + +fun test20() { + var t = (getNullableInt(), getNullableInt()); + if (t.0 != null && t.1 != null) { + __expect_type(t.0, "int"); + __expect_type(t.1, "int"); + return t.0 + t.1; + } + t.0 = 10; + if (t.1 == null) { + t.1 = 20; + } + __expect_type(t.0, "int"); + __expect_type(t.1, "int"); + return t.0 + t.1; +} + +fun test21() { + var t = (getNullableInt(), (getNullableInt(), getNullableInt())); + if (t.0 != null && t.1.0 != null) { + if (t.1.1 != null) { return t.0 + t.1.0 + t.1.1; } + return t.0 + t.1.0; + } + if (t.0 != null) { + return t.0 + 0; + } + __expect_type(t.0, "null"); + __expect_type(t.1.0, "int?"); + return t.1.0 == null ? -1 : t.1.0 + 0; +} + +fun test22() { + var t = (getNullableInt(), (getNullableInt(), getNullableInt())); + if (t.0 == null || t.1.0 == null || t.1.1 == null) { + return -1; + } + return t.0 + t.1.0 + t.1.1; +} + +@method_id(123) +fun test23() { + var (x: int?, y: int?, z: int?) = (getNullableInt(), getNullableInt(), getNullableInt()); + ((x = 1, 0).0, (y = 2, 1).0) = (3, z = 4); + return x + y + z; +} + +@method_id(124) +fun test24(x: int?) { + if (x == null) { + __expect_type(x, "null"); + assignToNullableInt(mutate x, 10); + __expect_type(x, "int?"); + x.assignToNullableInt(x! + 5); + } else { + __expect_type(x, "int"); + increment(mutate x); + x.increment(); + __expect_type(x, "int"); + } + __expect_type(x, "int?"); + return x; +} + +fun test25() { + var x = (getNullableInt(), getNullableInt(), getNullableInt()); + x.0 = x.2 = random(); + return (x.0) + ((x.2)); +} + +fun test26() { + var x = [getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), + getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()]; + if (~(x.0 = random())) { return; } + if ((x.1 = random()) < (x.2 = random())) { return; } + else if (!(x.2 <=> (x.3 = random()))) { return; } + x.5 = (x.4 = random()) ? (x.6 = random()) : (x.6 = random()); + if ((x.7 = random()) as int) { return; } + if (((((x.8 = random()) != null)))) { return; } + if ([x.1, (x.9 = random())!].1) { return; } + val result = x.0+x.1+x.2+x.3+x.4+x.5+x.6+x.7+x.8+x.9; +} + +fun test27() { + var (x, _) = ([getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), + getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()], []); + +(x.0 = random()); + x.0 += [((x.1 = random()) < (x.2 = random() + x.1)) as int].0; + !(x.2 <=> (x.3 = random() + x.2)); + x.5 = (x.4 = random()) ? (x.6 = random()) : (x.6 = random()); + (x.7 = random()) as int; + (((((x.8 = random()) != null)))); + [x.1, (x.9 = random())!].1; + return x.0+x.1+x.2+x.3+x.4+x.5+x.6+x.7+x.8+x.9; +} + +fun test28() { + var x = (getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()); + __expect_type((x.0 = random(), x.0 += (x.1 = random()) as int, !(x.1 <=> (x.2 = random() + x.0)) == null, (x.3 = random()) ? x.3 : (!x.3) as int), + "(int, int, bool, int)"); +} + +fun test29() { + var x = (getNullableInt(), getNullableInt(), getNullableInt(), getNullableInt()); + __expect_type([x.0 = random(), ((x.0 += (x.1 = random()) as int)), !(x.1 <=> (x.2 = random() + x.0)) == null, (x.3 = random()) ? x.3 : (!x.3) as int], + "[int, int, bool, int]"); +} + +@method_id(130) +fun test30(initial5: bool) { + var t: (int?, (int?, (int?, int?))) = initial5 + ? (getNullableInt(), (getNullableInt(), (getNullableInt(), getNullableInt()))) + : (null, (null, (null, null))); + if (t.0 == null || t.1.0 == null || t.1.1.0 == null || t.1.1.1 == null) { + if (t.1.0 == null || t.1.1.0 == null) { + if (t.1.1.0 == null) { + t.1.1.0 = 4; + } + __expect_type(t.1.1.0, "int"); + __expect_type(t.1.1.1, "int?"); + __expect_type(t.1.0, "int?"); + t.1.1.1 = 3; + t.1.0 = 2; + __expect_type(t.1.1.1, "int"); + __expect_type(t.1.0, "int"); + } + if (((((t.1.1.1)))) != null) {} + else { t.1.1.1 = 3; } + t.0 = 1; + } + return t.0 + t.1.0 + t.1.1.0 + t.1.1.1; +} + +fun test31() { + var t = (getNullableInt(), getNullableInt()); + t.0 == null ? (t.0, t.1) = (1, 2) : (t.1, t.0) = (4, 3); + return t.0 + t.1; +} + +@method_id(132) +fun test32() { + var t: (int?, (int?, int?)?, (int?, int?)) = (getNullableInt(), (getNullableInt(), getNullableInt()), (getNullableInt(), getNullableInt())); + if (t.0 == null) { return -1; } + t.1 != null && t.1.0 == null ? t.1 = (1, 2) : t.1 = (3, 4); + if (t.2.1 != null) { t.2.0 = 1; t.2.1 = 2; } + else { [t.2.0, t.2.1] = [3, 4]; } + return t.0 + t.1.0! + t.1.1! + t.2.0 + t.2.1; +} + +@method_id(133) +fun test33(): int { + var x = getNullableInt(); + repeat (eq(x = 5)) { + __expect_type(x, "int"); + increment(mutate x); + } + return x; +} + +fun test34() { + var (x, y) = (getNullableInt(), getNullableInt()); + if (random()) { throw (x = 1, y = 2); } + else { throw (x = 3, y = (1, getNullableInt()!).1); } + return x + y; +} + +fun test35() { + var (x, y, z, t) = (getNullableInt(), getNullableInt(), getNullableInt(), (getNullableInt(), getNullableInt())); + assert (x != null, 404); + assert (t.0 != null && true && !(t.1 == null) && !(z = 4)) throw (y = 404); + __expect_type(y, "int?"); + return x + t.0 + t.1 + z; +} + +fun test36() { + var x = getNullableInt(); + assert (x == null, x + 0); // check that x is int there + __expect_type(x, "null"); +} + +fun test37() { + var (x, code) = (getNullableInt()!, getNullableInt()); + try { + } catch(code) { + x = 20; + return x + code; // code is scoped + } + return code == null ? x : x + code; +} + +fun assignNull2(mutate x: T1?, mutate y: T2?) { + x = null; + y = null; +} + +fun test38() { + var (x: int?, y: int?) = (1, 2); + __expect_type(x, "int"); + __expect_type(y, "int"); + assignNull2(mutate x, mutate y); + __expect_type(x, "int?"); + __expect_type(y, "int?"); + if (x != null) { + if (y == null) { return -1; } + return x + y; + } + var t: (int?, slice?) = (null, null); + if (!false) { t.0 = 1; } + if (true) { t.1 = beginCell().endCell().beginParse(); } + __expect_type(t.0, "int"); + __expect_type(t.1, "slice"); + t.0 + t.1.loadInt(32); + assignNull2(mutate t.0, mutate t.1); + __expect_type(t.0, "int?"); + __expect_type(t.1, "slice?"); + t.0 != null && t.1 != null ? t.0 + loadInt(mutate t.1, 32) : -1; + return t.0 != null && t.1 != null ? t.0 + loadInt(mutate t.1, 32) : -1; +} + +@method_id(139) +fun test39() { + var x: (int?, int?)? = (4, null); + x.1 = 10; + x.1 += 1; + x!.1 += 1; + return (x!.0! + x.1); +} + +@method_id(140) +fun test40(second: int?) { + var x: (int?, int?)? = (4, second); + if (x.1 != null) { + val result = x.1 + x!.1 + x!!.1 + x.1! + x!!.1!!; + } + if (x!.1 != null) { + val result = x.1 + x!.1 + x!!.1 + x.1! + x!!.1!!; + } + if (!(x!!.1 != null)) { + return -1; + } + return x.1 + x!.1 + x!!.1 + x.1! + x!!.1!!; +} + +@method_id(141) +fun test41() { + var t: (int, int)? = null; + return sameTensor(t = (1, 2)); +} + +@method_id(142) +fun test42() { + var t: (int?, (int?, (int, int)?)?) = (getNullableInt(), (1, (2, 3))); + t.1 = (3,null); + __expect_type(t.1, "(int?, (int, int)?)"); + __expect_type(t, "(int?, (int?, (int, int)?)?)"); + return (t, t.1); +} + +@method_id(143) +fun test43() { + var t1: ((int, int), int?) = ((1, 2), 3); + var t2: ((int?, int?), (int?,int?)?) = ((null, null), (null, 5)); + t2.0 = t1.0 = (10, 11); + t2.1 = t1.1 = null; + return (t1, t2); +} + +@method_id(144) +fun test44() { + var t1: ((int, int), int?) = ((1, 2), 3); + var t2: ((int?, int?), (int?,int?)?) = ((null, null), (null, 5)); + t1.0 = t2.0 = (10, 11); + t1.1 = t2.1 = null; + __expect_type(t1, "((int, int), int?)"); + __expect_type(t2, "((int?, int?), (int?, int?)?)"); + return (t1, t2); +} + +@method_id(145) +fun test45() { + var t: (int?, (int?, (int, int)?)?) = (getNullableInt(), (1, (2, 3))); + var t2 = sameTensor2(t.1 = (3,null)); + return (t, t2, t.1); +} + +fun autoInfer46() { + var t1: int? = 3; + var t2: (int, int)? = (4, 5); + __expect_type(t1, "int"); + __expect_type(t2, "(int, int)"); + return (t1, t2); // proven to be not null, inferred (int, (int,int)) +} + +@method_id(146) +fun test46() { + var r46_1: (int, (int,int)) = autoInfer46(); + var r46_2: (int, (int,int)?) = autoInfer46(); + return (r46_1, r46_2); +} + +@method_id(147) +fun test47() { + var t1: int? = 3; + var t2: (int, int)? = (4, 5); + t1 = t2 = null; + __expect_type(t1, "null"); + __expect_type(t2, "null"); + var result = (t1, t2); // proven to be always null, inferred (null, null), 2 slots on a stack + return (result, 100, result.1, 100, t2 as (int, int)?); +} + +fun test48() { + var t1: int? = getNullableInt(); + if (t1 != null) { + var (t1 redef, t2) = (10, 5); + return t1 + t2; + var t2 redef = getNullableInt()!; + return t1 + t2; + } + return -1; +} + +fun test49(x: int?) { + while (x == null) { + x = getNullableInt(); + } + __expect_type(x, "int"); + return x + 1; +} + +fun test50() { + var (x: int?, y: int?) = (1, 2); + do { + x = getNullableInt(); + y = getNullableInt(); + } while (x == null || y == null); + return x + y; +} + +fun test51() { + while (true) { return; } + // test that no error "control reaches end of function" +} + +fun test52() { + do { } while (true); +} + +fun test53() { + var x1: int? = getNullableInt(); + var x2: int? = 5; + var x3: int? = 5; + var x10: int? = null; + var x11: int? = 5; + var x12: int? = 5; + while (x1 != null) { + __expect_type(x1, "int"); // because condition + __expect_type(x2, "int?"); // because re-assigned + __expect_type(x3, "int?"); // because re-assigned + __expect_type(x10, "null"); + __expect_type(x11, "int"); + x1 = getNullableInt(); + __expect_type(x1, "int?"); + assignToNullableInt(mutate x2, 5); + x3.assignToNullableInt(5); + x11 = 10; + assignToInt(mutate x12, 5); + } + __expect_type(x1, "null"); + __expect_type(x2, "int?"); + __expect_type(x3, "int?"); +} + +fun test54() { + var x1: int? = null; + var x2: int? = 5; + var x3: int? = 5; + var x10: int? = null; + var x11: int? = 5; + var x12: int? = 5; + do { + __expect_type(x1, "int?"); // because re-assigned + __expect_type(x2, "int?"); // because re-assigned + __expect_type(x3, "int?"); // because re-assigned + __expect_type(x10, "null"); + __expect_type(x11, "int"); + x1 = getNullableInt(); + __expect_type(x1, "int?"); + assignToNullableInt(mutate x2, 5); + if (random()) { x3.assignToNullableInt(5); } + x11 = 10; + assignToInt(mutate x12, 5); + } while (x1 != null); + __expect_type(x1, "null"); + __expect_type(x2, "int?"); + __expect_type(x3, "int?"); +} + +fun eq55(v: T) { return v; } + +fun test55() { + var x: int? = 4; + while (true) { + // currently, generic functions are instantiated at the type inferring step + // in case of loops, type inferring is re-enterable + // first iteration: x is int, eq instantiated + // second (final) iteration: x is int?, eq instantiated + // (checked via codegen) + eq55(x); + __expect_type(x, "int?"); // types are checked (unlike generics instantiated) after inferring + x = random() ? 1 : null; + } + __expect_type(x, "int?"); +} + +fun test56() { + var i: int? = null; + var (j: int?, k: int?) = (null, null); + __expect_type(i, "null"); + __expect_type(k, "null"); + i = getTwo(); + [j, ((k))] = [getTwo(), ((getTwo()))]; + __expect_type(i, "int?"); + __expect_type(j, "int?"); + __expect_type(k, "int?"); +} + +fun test57(mutate x: int?): int { + if (x == null) { x = 5; } + else { + if (x < 10) { x = 10; } + else { x = 20; } + } + if (x != null) { + return 123; + } + __expect_type(x, "int"); + // no "return" needed, because end of function is unreachable +} + +@method_id(158) +fun test58() { + var (x1, x2: int?) = (getNullableInt(), null); + return (test57(mutate x1), x1, test57(mutate x2), x2); +} + +fun test59() { + var (x1: int?, x2, x3) = (getNullableInt()!, getNullableInt(), 5); + if ((x2 = x3) != null) { + __expect_type(x2, "int"); + } + __expect_type(x2, "int"); + if ((x2 = getNullableInt()) != null) { + __expect_type(x2, "int"); + } + __expect_type(x2, "int?"); + if (((x1) = x2) == null) { + return; + } + __expect_type(x1, "int"); +} + + + +fun main(x: int?): int { + return x == null ? -1 : x; +} + +/** +@testcase | 0 | 1 | 1 +@testcase | 123 | | 7 +@testcase | 124 | 4 | 6 +@testcase | 124 | null | 15 +@testcase | 130 | -1 | 20 +@testcase | 130 | 0 | 10 +@testcase | 132 | | 15 +@testcase | 133 | | 10 +@testcase | 139 | | 16 +@testcase | 140 | 5 | 25 +@testcase | 141 | | 1 2 +@testcase | 142 | | 5 3 (null) (null) 0 -1 3 (null) (null) 0 +@testcase | 143 | | 10 11 (null) 10 11 (null) (null) 0 +@testcase | 144 | | 10 11 (null) 10 11 (null) (null) 0 +@testcase | 145 | | 5 3 (null) (null) 0 -1 3 (null) (null) (null) (null) 0 3 (null) (null) 0 +@testcase | 146 | | 3 4 5 3 4 5 -1 +@testcase | 147 | | (null) (null) 100 (null) 100 (null) (null) 0 +@testcase | 158 | | 123 10 123 5 + +@stderr warning: expression of type `int` is always not null, this condition is always true +@stderr warning: unreachable code +@stderr var t2 redef = getNullableInt()!; + +@fif_codegen eq55 PROC:<{ +@fif_codegen eq55 PROC:<{ +*/ diff --git a/tolk-tester/tests/try-func.tolk b/tolk-tester/tests/try-func.tolk index dfd72e9e..4ac86d96 100644 --- a/tolk-tester/tests/try-func.tolk +++ b/tolk-tester/tests/try-func.tolk @@ -164,6 +164,78 @@ fun test109(): (int, int) { return (g_reg, l_reg); } +fun alwaysThrow123(): never { + throw 123; +} + +fun alwaysThrowX(x: int): never { + if (x > 10) { throw (x, beginCell()); } + else { throw (x, null); } +} + +fun anotherNever(throw123: bool): never { + if (throw123) { alwaysThrow123(); } + alwaysThrowX(456); +} + +fun testCodegen1(x: int) { + if (x > 10) { + throw 123; + anotherNever(true); // unreachable, will be dropped + } + else if (x < 10) { + throw x; + return -123; // unreachable, will be dropped + } + return 0; +} + +fun testCodegen2(x: int) { + if (x > 10) { + alwaysThrow123(); + anotherNever(true); // unreachable, will be dropped + } + else if (x < 10) { + anotherNever(false); + return -123; // unreachable, will be dropped + } + return 0; +} + +@method_id(110) +fun test110(b: bool) { + try { + if (b == true) { testCodegen1(100); } + testCodegen1(5); + return -1; + } catch (ex) { + return ex; + } +} + +@method_id(111) +fun test111(b: bool) { + try { + if (b == true) { testCodegen2(100); } + testCodegen2(5); + return -1; + } catch (ex) { + return ex; + } +} + +fun mySetCode(newCode: slice): void + asm "SETCODE"; + +fun testCodegen3(numberId: int, paramVal: cell) { + if (numberId == -1000) { + var cs = paramVal.beginParse(); + mySetCode(cs); + throw 0; + } + paramVal.beginParse(); +} + fun main() { } @@ -187,6 +259,65 @@ fun main() { @testcase | 107 | 5 | 5 @testcase | 107 | 20 | 20 @testcase | 108 | | 0 +@testcase | 109 | | 10 10 +@testcase | 110 | -1 | 123 +@testcase | 110 | 0 | 5 +@testcase | 111 | -1 | 123 +@testcase | 111 | 0 | 456 -@code_hash 39307974281105539319288356721945232226028429128341177951717392648324358675585 +@code_hash 57361460846265694653029920796509802052573595128418810728101968091567195330515 + +@fif_codegen +""" + testCodegen1 PROC:<{ + // x + DUP // x x + 10 GTINT // x '2 + IFJMP:<{ // x + 123 THROW + }> // x + DUP // x x + 10 LESSINT // x '6 + IFJMP:<{ // x + THROWANY + }> // x + DROP // + 0 PUSHINT // '8=0 + }> +""" + +@fif_codegen +""" + testCodegen2 PROC:<{ + // x + DUP // x x + 10 GTINT // x '2 + IFJMP:<{ // x + DROP // + alwaysThrow123 CALLDICT + }> // x + 10 LESSINT // '5 + IFJMP:<{ // + FALSE // '6 + anotherNever CALLDICT + }> // + 0 PUSHINT // '8=0 + }> +""" + +@fif_codegen +""" + testCodegen3 PROC:<{ + // numberId paramVal + SWAP + -1000 PUSHINT // paramVal numberId '2=-1000 + EQUAL // paramVal '3 + IFJMP:<{ // paramVal + CTOS // cs + SETCODE + 0 THROW + }> // paramVal + DROP // + }> +""" */ diff --git a/tolk-tester/tests/unreachable-3.tolk b/tolk-tester/tests/unreachable-3.tolk new file mode 100644 index 00000000..fab21fd2 --- /dev/null +++ b/tolk-tester/tests/unreachable-3.tolk @@ -0,0 +1,22 @@ +fun main(x: int?) { + if (x != null && x == null) { + return 1 + 2; + } + if (x == null) { + return -1; + } + if (x != null) { + return -2; + } + return 3 + 4; +} + +/** +@testcase | 0 | 5 | -2 +@testcase | 0 | null | -1 + +@stderr warning: variable `x` of type `int` is always not null +@stderr if (x != null) +@stderr warning: unreachable code +@stderr return 3 + 4 + */ diff --git a/tolk-tester/tests/unreachable-4.tolk b/tolk-tester/tests/unreachable-4.tolk new file mode 100644 index 00000000..6b25b3d9 --- /dev/null +++ b/tolk-tester/tests/unreachable-4.tolk @@ -0,0 +1,24 @@ +fun alwaysThrows(): never { + throw 456; +} + +fun testUnreachable(x: int) { + if (x) { throw 123; } + else { alwaysThrows(); } + return 1; +} + +fun main() { + try { + testUnreachable(100); + throw 80; + } catch (excNo) { + return excNo; + } +} + +/** +@testcase | 0 | | 123 +@stderr warning: unreachable code +@stderr return 1; + */ diff --git a/tolk-tester/tests/use-before-declare.tolk b/tolk-tester/tests/use-before-declare.tolk index d3e6b165..2a0e0e7f 100644 --- a/tolk-tester/tests/use-before-declare.tolk +++ b/tolk-tester/tests/use-before-declare.tolk @@ -27,8 +27,8 @@ fun test1(): int { var demo_var: int = demo_10; var demo_slice: int = demo_20; if (demo_var > 0) { - var demo_var: tuple = null; - var demo_slice: tuple = null; + var demo_var: tuple? = null; + var demo_slice: tuple? = null; } return demo_var + demo_slice; } diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk index 16863560..d189430f 100644 --- a/tolk-tester/tests/var-apply.tolk +++ b/tolk-tester/tests/var-apply.tolk @@ -138,6 +138,43 @@ fun testIndexedAccessApply() { return functions2.0(functions1.1(b)).loadInt(32); } +fun getNullable4(): int? { return 4; } +fun myBeginCell(): builder? asm "NEWC"; + +@method_id(108) +fun testCallingNotNull() { + var n4: () -> int? = getNullable4; + var creator: (() -> builder?)? = myBeginCell; + var end2: [int, (builder -> cell)?] = [0, endCell]; + var c: cell = end2.1!((creator!()!)!.storeInt(getNullable4()!, 32)); + return c.beginParse().loadInt(32); +} + +fun sumOfTensorIfNotNull(t: (int, int)?) { + if (t == null) { return 0; } + return t!.0 + t!.1; +} + +@method_id(109) +fun testTypeTransitionOfVarCall() { + var summer = sumOfTensorIfNotNull; + var hh1 = [1, null]; + var tt1 = (3, 4); + return (summer(null), summer((1,2)), summer(hh1.1), summer(tt1)); +} + +fun makeTensor(x1: int, x2: int, x3: int, x4: int, x5: int) { + return (x1, x2, x3, x4, x5); +} + +fun eq(x: T): T { return x; } + +@method_id(110) +fun testVarsModificationInsideVarCall(x: int) { + var cb = makeTensor; + return x > 3 ? cb(x, x += 5, eq(x *= x), x, eq(x)) : null; +} + fun main() {} /** @@ -148,4 +185,8 @@ fun main() {} @testcase | 105 | | 1 @testcase | 106 | | 1 1 [ 2 ] [ 2 ] @testcase | 107 | | 65537 +@testcase | 108 | | 4 +@testcase | 109 | | 0 3 0 7 +@testcase | 110 | 5 | 5 10 100 100 100 -1 +@testcase | 110 | 0 | (null) (null) (null) (null) (null) 0 */ diff --git a/tolk-tester/tests/warnings-1.tolk b/tolk-tester/tests/warnings-1.tolk new file mode 100644 index 00000000..040057d1 --- /dev/null +++ b/tolk-tester/tests/warnings-1.tolk @@ -0,0 +1,28 @@ +fun getNullableInt(): int? { return null; } + +fun main() { + var c: int? = 6; + __expect_type(c, "int"); + if (c == null) {} + + var d: int? = c; + if (((d)) != null && tupleSize(createEmptyTuple())) {} + + var e: int? = getNullableInt(); + if (e != null) { + return true; + } + __expect_type(e, "null"); + null == e; + + return null != null; +} + +/** +@testcase | 0 | | 0 + +@stderr warning: variable `c` of type `int` is always not null, this condition is always false +@stderr warning: variable `d` of type `int` is always not null, this condition is always true +@stderr warning: variable `e` is always null, this condition is always true +@stderr warning: expression is always null, this condition is always false + */ diff --git a/tolk-tester/tests/warnings-2.tolk b/tolk-tester/tests/warnings-2.tolk new file mode 100644 index 00000000..57ecb21a --- /dev/null +++ b/tolk-tester/tests/warnings-2.tolk @@ -0,0 +1,26 @@ +fun main() { + var (a, b, c, d, e) = (1, beginCell(), beginCell().endCell().beginParse(), [1], true as bool?); + + var alwaysInt = a != null ? 1 : null; + __expect_type(alwaysInt, "int"); + + if (!(c == null)) { + if (10 < 3) { assert(b == null, 100); } + } + while (d == null || false) {} + + return e! != null; +} + +/** +@testcase | 0 | | -1 + +@stderr warning: variable `a` of type `int` is always not null, this condition is always true +@stderr warning: condition of ternary operator is always true +@stderr warning: variable `c` of type `slice` is always not null, this condition is always false +@stderr warning: condition of `if` is always true +@stderr warning: variable `b` of type `builder` is always not null, this condition is always false +@stderr warning: condition of `assert` is always false +@stderr warning: condition of `while` is always false +@stderr warning: expression of type `bool` is always not null, this condition is always true + */ diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 9d720024..de408115 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -12,8 +12,8 @@ set(TOLK_SOURCE pipe-register-symbols.cpp pipe-resolve-identifiers.cpp pipe-calc-rvalue-lvalue.cpp - pipe-detect-unreachable.cpp pipe-infer-types-and-calls.cpp + pipe-check-inferred-types.cpp pipe-refine-lvalue-for-mutate.cpp pipe-check-rvalue-lvalue.cpp pipe-check-pure-impure.cpp @@ -23,6 +23,7 @@ set(TOLK_SOURCE pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp type-system.cpp + smart-casts-cfg.cpp generics-helpers.cpp abscode.cpp analyzer.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index b465b72b..fc160984 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -402,7 +402,7 @@ void CodeBlob::print(std::ostream& os, int flags) const { std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, std::string name) { std::vector ir_idx; - int stack_w = var_type->calc_width_on_stack(); + int stack_w = var_type->get_width_on_stack(); ir_idx.reserve(stack_w); if (const TypeDataTensor* t_tensor = var_type->try_as()) { for (int i = 0; i < t_tensor->size(); ++i) { @@ -410,7 +410,11 @@ std::vector CodeBlob::create_var(TypePtr var_type, SrcLocation loc, s std::vector nested = create_var(t_tensor->items[i], loc, std::move(sub_name)); ir_idx.insert(ir_idx.end(), nested.begin(), nested.end()); } - } else if (var_type != TypeDataVoid::create()) { + } else if (const TypeDataNullable* t_nullable = var_type->try_as(); t_nullable && stack_w != 1) { + std::string null_flag_name = name.empty() ? name : name + ".NNFlag"; + ir_idx = create_var(t_nullable->inner, loc, std::move(name)); + ir_idx.emplace_back(create_var(TypeDataBool::create(), loc, std::move(null_flag_name))[0]); + } else if (var_type != TypeDataVoid::create() && var_type != TypeDataNever::create()) { #ifdef TOLK_DEBUG tolk_assert(stack_w == 1); #endif diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 9303bc83..c38b0bfa 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -20,6 +20,13 @@ namespace tolk { +// functions returning "never" are assumed to interrupt flow +// for instance, variables after their call aren't considered used +// its main purpose is `throw` statement, it's a call to a built-in `__throw` function +static bool does_function_always_throw(FunctionPtr fun_ref) { + return fun_ref->declared_return_type == TypeDataNever::create(); +} + /* * * ANALYZE AND PREPROCESS ABSTRACT CODE @@ -262,17 +269,6 @@ VarDescrList& VarDescrList::operator|=(const VarDescrList& y) { } } -VarDescrList& VarDescrList::operator&=(const VarDescrList& values) { - for (const VarDescr& vd : values.list) { - VarDescr* item = operator[](vd.idx); - if (item) { - *item &= vd; - } - } - unreachable |= values.unreachable; - return *this; -} - VarDescrList& VarDescrList::import_values(const VarDescrList& values) { if (values.unreachable) { set_unreachable(); @@ -326,6 +322,17 @@ bool Op::compute_used_vars(const CodeBlob& code, bool edit) { } return std_compute_used_vars(true); } + if (cl == _Call && does_function_always_throw(f_sym)) { + VarDescrList new_var_info; // empty, not next->var_info + if (args.size() == right.size()) { + for (const VarDescr& arg : args) { + new_var_info.add_var(arg.idx, arg.is_unused()); + } + } else { + new_var_info.add_vars(right, false); + } + return set_var_info(std::move(new_var_info)); + } return std_compute_used_vars(); } case _SetGlob: { @@ -516,20 +523,19 @@ bool prune_unreachable(std::unique_ptr& ops) { case Op::_SliceConst: case Op::_GlobVar: case Op::_SetGlob: - case Op::_Call: case Op::_CallInd: case Op::_Tuple: case Op::_UnTuple: case Op::_Import: + case Op::_Let: reach = true; break; - case Op::_Let: { - reach = true; - break; - } case Op::_Return: reach = false; break; + case Op::_Call: + reach = !does_function_always_throw(op.f_sym); + break; case Op::_If: { // if left then block0 else block1; ... VarDescr* c_var = op.var_info[op.left[0]]; @@ -712,6 +718,9 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { values.add_newval(i); } } + if (does_function_always_throw(f_sym)) { + values.set_unreachable(); + } break; } case _Tuple: @@ -860,10 +869,11 @@ bool Op::mark_noreturn() { case _SetGlob: case _GlobVar: case _CallInd: - case _Call: return set_noreturn(next->mark_noreturn()); case _Return: return set_noreturn(); + case _Call: + return set_noreturn(next->mark_noreturn() || does_function_always_throw(f_sym)); case _If: case _TryCatch: // note, that & | (not && ||) here and below is mandatory to invoke both left and right calls diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index f5855bc1..fcaa1157 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -111,23 +111,16 @@ static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bits } } -// replace (a == null) and similar to isNull(a) (call of a built-in function) -static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) { +// replace (a == null) and similar to ast_is_null_check(a) (special AST vertex) +static AnyExprV maybe_replace_eq_null_with_isNull_check(V v) { bool has_null = v->get_lhs()->type == ast_null_keyword || v->get_rhs()->type == ast_null_keyword; bool replace = has_null && (v->tok == tok_eq || v->tok == tok_neq); if (!replace) { return v; } - auto v_ident = createV(v->loc, "__isNull"); // built-in function - auto v_ref = createV(v->loc, v_ident, nullptr); - AnyExprV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); - AnyExprV v_arg = createV(v->loc, v_null, false); - AnyExprV v_isNull = createV(v->loc, v_ref, createV(v->loc, {v_arg})); - if (v->tok == tok_neq) { - v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); - } - return v_isNull; + AnyExprV v_nullable = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); + return createV(v->loc, v_nullable, v->tok == tok_neq); } @@ -372,16 +365,31 @@ static AnyExprV parse_expr100(Lexer& lex) { } } -// parse E(...) (left-to-right) +// parse E(...) and E! having parsed E already (left-to-right) +static AnyExprV parse_fun_call_postfix(Lexer& lex, AnyExprV lhs) { + while (true) { + if (lex.tok() == tok_oppar) { + lhs = createV(lhs->loc, lhs, parse_argument_list(lex)); + } else if (lex.tok() == tok_logical_not) { + lex.next(); + lhs = createV(lhs->loc, lhs); + } else { + break; + } + } + return lhs; +} + +// parse E(...) and E! (left-to-right) static AnyExprV parse_expr90(Lexer& lex) { AnyExprV res = parse_expr100(lex); - while (lex.tok() == tok_oppar) { - res = createV(res->loc, res, parse_argument_list(lex)); + if (lex.tok() == tok_oppar || lex.tok() == tok_logical_not) { + res = parse_fun_call_postfix(lex, res); } return res; } -// parse E.field and E.method(...) (left-to-right) +// parse E.field and E.method(...) and E.field! (left-to-right) static AnyExprV parse_expr80(Lexer& lex) { AnyExprV lhs = parse_expr90(lex); while (lex.tok() == tok_dot) { @@ -402,8 +410,8 @@ static AnyExprV parse_expr80(Lexer& lex) { lex.unexpected("method name"); } lhs = createV(loc, lhs, v_ident, v_instantiationTs); - while (lex.tok() == tok_oppar) { - lhs = createV(lex.cur_location(), lhs, parse_argument_list(lex)); + if (lex.tok() == tok_oppar || lex.tok() == tok_logical_not) { + lhs = parse_fun_call_postfix(lex, lhs); } } return lhs; @@ -491,7 +499,7 @@ static AnyExprV parse_expr15(Lexer& lex) { AnyExprV rhs = parse_expr17(lex); lhs = createV(loc, operator_name, t, lhs, rhs); if (t == tok_eq || t == tok_neq) { - lhs = maybe_replace_eq_null_with_isNull_call(lhs->as()); + lhs = maybe_replace_eq_null_with_isNull_check(lhs->as()); } } return lhs; diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h index c8350747..5103cc92 100644 --- a/tolk/ast-replacer.h +++ b/tolk/ast-replacer.h @@ -108,6 +108,8 @@ protected: virtual AnyExprV replace(V v) { return replace_children(v); } virtual AnyExprV replace(V v) { return replace_children(v); } virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } // statements virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } @@ -144,6 +146,8 @@ protected: case ast_binary_operator: return replace(v->as()); case ast_ternary_operator: return replace(v->as()); case ast_cast_as_operator: return replace(v->as()); + case ast_not_null_operator: return replace(v->as()); + case ast_is_null_check: return replace(v->as()); default: throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::replace"); } @@ -174,20 +178,20 @@ protected: } public: - virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + virtual bool should_visit_function(FunctionPtr fun_ref) = 0; - void start_replacing_in_function(const FunctionData* fun_ref, V v_function) { + void start_replacing_in_function(FunctionPtr fun_ref, V v_function) { replace(v_function->get_body()); } }; -const std::vector& get_all_not_builtin_functions(); +const std::vector& get_all_not_builtin_functions(); template void replace_ast_of_all_functions() { BodyReplacerT visitor; - for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + for (FunctionPtr fun_ref : get_all_not_builtin_functions()) { if (visitor.should_visit_function(fun_ref)) { visitor.start_replacing_in_function(fun_ref, fun_ref->ast_root->as()); } diff --git a/tolk/ast-replicator.h b/tolk/ast-replicator.h index 02198adb..16bbbeb8 100644 --- a/tolk/ast-replicator.h +++ b/tolk/ast-replicator.h @@ -121,6 +121,12 @@ protected: virtual V clone(V v) { return createV(v->loc, clone(v->get_expr()), clone(v->cast_to_type)); } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr()), v->is_negated); + } // statements @@ -200,6 +206,8 @@ protected: case ast_binary_operator: return clone(v->as()); case ast_ternary_operator: return clone(v->as()); case ast_cast_as_operator: return clone(v->as()); + case ast_not_null_operator: return clone(v->as()); + case ast_is_null_check: return clone(v->as()); default: throw UnexpectedASTNodeType(v, "ASTReplicatorFunction::clone"); } diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index 4ec72cdd..a7f260de 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -56,6 +56,8 @@ class ASTStringifier final : public ASTVisitor { {ast_binary_operator, "ast_binary_operator"}, {ast_ternary_operator, "ast_ternary_operator"}, {ast_cast_as_operator, "ast_cast_as_operator"}, + {ast_not_null_operator, "ast_not_null_operator"}, + {ast_is_null_check, "ast_is_null_check"}, // statements {ast_empty_statement, "ast_empty_statement"}, {ast_sequence, "ast_sequence"}, @@ -193,7 +195,7 @@ class ASTStringifier final : public ASTVisitor { } case ast_local_var_lhs: { std::ostringstream os; - os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); + os << (v->as()->inferred_type ? v->as()->inferred_type->as_human_readable() : v->as()->declared_type->as_human_readable()); if (v->as()->get_name().empty()) { return "_: " + os.str(); } @@ -268,6 +270,8 @@ public: case ast_binary_operator: return handle_vertex(v->as()); case ast_ternary_operator: return handle_vertex(v->as()); case ast_cast_as_operator: return handle_vertex(v->as()); + case ast_not_null_operator: return handle_vertex(v->as()); + case ast_is_null_check: return handle_vertex(v->as()); // statements case ast_empty_statement: return handle_vertex(v->as()); case ast_sequence: return handle_vertex(v->as()); diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index a54cb13b..d697aa82 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -109,6 +109,8 @@ protected: virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } // statements virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -146,6 +148,8 @@ protected: case ast_binary_operator: return visit(v->as()); case ast_ternary_operator: return visit(v->as()); case ast_cast_as_operator: return visit(v->as()); + case ast_not_null_operator: return visit(v->as()); + case ast_is_null_check: return visit(v->as()); // statements case ast_empty_statement: return visit(v->as()); case ast_sequence: return visit(v->as()); @@ -167,20 +171,20 @@ protected: } public: - virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + virtual bool should_visit_function(FunctionPtr fun_ref) = 0; - virtual void start_visiting_function(const FunctionData* fun_ref, V v_function) { + virtual void start_visiting_function(FunctionPtr fun_ref, V v_function) { visit(v_function->get_body()); } }; -const std::vector& get_all_not_builtin_functions(); +const std::vector& get_all_not_builtin_functions(); template void visit_ast_of_all_functions() { BodyVisitorT visitor; - for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + for (FunctionPtr fun_ref : get_all_not_builtin_functions()) { if (visitor.should_visit_function(fun_ref)) { visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); } diff --git a/tolk/ast.cpp b/tolk/ast.cpp index 092260ff..26eaacd5 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -117,11 +117,16 @@ void ASTNodeExpressionBase::assign_lvalue_true() { this->is_lvalue = true; } +void ASTNodeExpressionBase::assign_always_true_or_false(int flow_true_false_state) { + this->is_always_true = flow_true_false_state == 1; // see smart-casts-cfg.h + this->is_always_false = flow_true_false_state == 2; +} + void Vertex::assign_sym(const Symbol* sym) { this->sym = sym; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_maybe = fun_ref; } @@ -129,7 +134,7 @@ void Vertex::assign_resolved_type(TypePtr cast_to_type) { this->cast_to_type = cast_to_type; } -void Vertex::assign_var_ref(const GlobalVarData* var_ref) { +void Vertex::assign_var_ref(GlobalVarPtr var_ref) { this->var_ref = var_ref; } @@ -137,7 +142,7 @@ void Vertex::assign_resolved_type(TypePtr declared_t this->declared_type = declared_type; } -void Vertex::assign_const_ref(const GlobalConstData* const_ref) { +void Vertex::assign_const_ref(GlobalConstPtr const_ref) { this->const_ref = const_ref; } @@ -149,7 +154,7 @@ void Vertex::assign_resolved_type(TypePtr substituted_t this->substituted_type = substituted_type; } -void Vertex::assign_param_ref(const LocalVarData* param_ref) { +void Vertex::assign_param_ref(LocalVarPtr param_ref) { this->param_ref = param_ref; } @@ -157,23 +162,31 @@ void Vertex::assign_resolved_type(TypePtr declared_type) { this->declared_type = declared_type; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_ref = fun_ref; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_ref = fun_ref; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_ref = fun_ref; } +void Vertex::assign_is_negated(bool is_negated) { + this->is_negated = is_negated; +} + +void Vertex::assign_first_unreachable(AnyV first_unreachable) { + this->first_unreachable = first_unreachable; +} + void Vertex::assign_target(const DotTarget& target) { this->target = target; } -void Vertex::assign_fun_ref(const FunctionData* fun_ref) { +void Vertex::assign_fun_ref(FunctionPtr fun_ref) { this->fun_ref = fun_ref; } @@ -181,7 +194,7 @@ void Vertex::assign_resolved_type(TypePtr declared_ret this->declared_return_type = declared_return_type; } -void Vertex::assign_var_ref(const LocalVarData* var_ref) { +void Vertex::assign_var_ref(LocalVarPtr var_ref) { this->var_ref = var_ref; } diff --git a/tolk/ast.h b/tolk/ast.h index d2db49f8..9b7c5d1a 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -88,6 +88,8 @@ enum ASTNodeType { ast_binary_operator, ast_ternary_operator, ast_cast_as_operator, + ast_not_null_operator, + ast_is_null_check, // statements ast_empty_statement, ast_sequence, @@ -184,11 +186,14 @@ struct ASTNodeExpressionBase : ASTNodeBase { TypePtr inferred_type = nullptr; bool is_rvalue: 1 = false; bool is_lvalue: 1 = false; + bool is_always_true: 1 = false; // inside `if`, `while`, ternary condition, `== null`, etc. + bool is_always_false: 1 = false; // (when expression is guaranteed to be always true or always false) ASTNodeExpressionBase* mutate() const { return const_cast(this); } void assign_inferred_type(TypePtr type); void assign_rvalue_true(); void assign_lvalue_true(); + void assign_always_true_or_false(int flow_true_false_state); ASTNodeExpressionBase(ASTNodeType type, SrcLocation loc) : ASTNodeBase(type, loc) {} }; @@ -408,7 +413,7 @@ private: V identifier; public: - const LocalVarData* var_ref = nullptr; // filled on resolve identifiers; for `redef` points to declared above; for underscore, name is empty + LocalVarPtr var_ref = nullptr; // filled on resolve identifiers; for `redef` points to declared above; for underscore, name is empty TypePtr declared_type; // not null for `var x: int = rhs`, otherwise nullptr bool is_immutable; // declared via 'val', not 'var' bool marked_as_redef; // var (existing_var redef, new_var: int) = ... @@ -417,7 +422,7 @@ public: std::string_view get_name() const { return identifier->name; } // empty for underscore Vertex* mutate() const { return const_cast(this); } - void assign_var_ref(const LocalVarData* var_ref); + void assign_var_ref(LocalVarPtr var_ref); void assign_resolved_type(TypePtr declared_type); Vertex(SrcLocation loc, V identifier, TypePtr declared_type, bool is_immutable, bool marked_as_redef) @@ -530,12 +535,12 @@ private: public: typedef std::variant< - const FunctionData*, // for `t.tupleAt` target is `tupleAt` global function + FunctionPtr, // for `t.tupleAt` target is `tupleAt` global function int // for `t.0` target is "indexed access" 0 > DotTarget; DotTarget target = static_cast(nullptr); // filled at type inferring - bool is_target_fun_ref() const { return std::holds_alternative(target); } + bool is_target_fun_ref() const { return std::holds_alternative(target); } bool is_target_indexed_access() const { return std::holds_alternative(target); } AnyExprV get_obj() const { return child; } @@ -560,7 +565,7 @@ template<> // example: `getF()()` then callee is another func call (which type is TypeDataFunCallable) // example: `obj.method()` then callee is dot access (resolved while type inferring) struct Vertex final : ASTExprBinary { - const FunctionData* fun_maybe = nullptr; // filled while type inferring for `globalF()` / `obj.f()`; remains nullptr for `local_var()` / `getF()()` + FunctionPtr fun_maybe = nullptr; // filled while type inferring for `globalF()` / `obj.f()`; remains nullptr for `local_var()` / `getF()()` AnyExprV get_callee() const { return lhs; } bool is_dot_call() const { return lhs->type == ast_dot_access; } @@ -570,7 +575,7 @@ struct Vertex final : ASTExprBinary { auto get_arg(int i) const { return rhs->as()->get_arg(i); } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); Vertex(SrcLocation loc, AnyExprV lhs_f, V arguments) : ASTExprBinary(ast_function_call, loc, lhs_f, arguments) {} @@ -603,7 +608,7 @@ template<> // ast_set_assign represents assignment-and-set operation "lhs = rhs" // examples: `a += 4` / `b <<= c` struct Vertex final : ASTExprBinary { - const FunctionData* fun_ref = nullptr; // filled at type inferring, points to `_+_` built-in for += + FunctionPtr fun_ref = nullptr; // filled at type inferring, points to `_+_` built-in for += std::string_view operator_name; // without equal sign, "+" for operator += TokenType tok; // tok_set_* @@ -611,7 +616,7 @@ struct Vertex final : ASTExprBinary { AnyExprV get_rhs() const { return rhs; } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) : ASTExprBinary(ast_set_assign, loc, lhs, rhs) @@ -622,14 +627,14 @@ template<> // ast_unary_operator is "some operator over one expression" // examples: `-1` / `~found` struct Vertex final : ASTExprUnary { - const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function + FunctionPtr fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; AnyExprV get_rhs() const { return child; } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV rhs) : ASTExprUnary(ast_unary_operator, loc, rhs) @@ -641,7 +646,7 @@ template<> // examples: `a + b` / `x & true` / `(a, b) << g()` // note, that `a = b` is NOT a binary operator, it's ast_assign, also `a += b`, it's ast_set_assign struct Vertex final : ASTExprBinary { - const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function + FunctionPtr fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; @@ -649,7 +654,7 @@ struct Vertex final : ASTExprBinary { AnyExprV get_rhs() const { return rhs; } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) : ASTExprBinary(ast_binary_operator, loc, lhs, rhs) @@ -684,6 +689,32 @@ struct Vertex final : ASTExprUnary { , cast_to_type(cast_to_type) {} }; +template<> +// ast_not_null_operator is non-null assertion: like TypeScript ! or Kotlin !! +// examples: `nullableInt!` / `getNullableBuilder()!` +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } + + Vertex(SrcLocation loc, AnyExprV expr) + : ASTExprUnary(ast_not_null_operator, loc, expr) {} +}; + +template<> +// ast_is_null_check is an artificial vertex for "expr == null" / "expr != null" / same but null on the left +// it's created instead of a general binary expression to emphasize its purpose +struct Vertex final : ASTExprUnary { + bool is_negated; + + AnyExprV get_expr() const { return child; } + + Vertex* mutate() const { return const_cast(this); } + void assign_is_negated(bool is_negated); + + Vertex(SrcLocation loc, AnyExprV expr, bool is_negated) + : ASTExprUnary(ast_is_null_check, loc, expr) + , is_negated(is_negated) {} +}; + // // --------------------------------------------------------- @@ -706,10 +737,14 @@ template<> // example: do while body is a sequence struct Vertex final : ASTStatementVararg { SrcLocation loc_end; + AnyV first_unreachable = nullptr; const std::vector& get_items() const { return children; } AnyV get_item(int i) const { return children.at(i); } + Vertex* mutate() const { return const_cast(this); } + void assign_first_unreachable(AnyV first_unreachable); + Vertex(SrcLocation loc, SrcLocation loc_end, std::vector items) : ASTStatementVararg(ast_sequence, loc, std::move(items)) , loc_end(loc_end) {} @@ -892,7 +927,7 @@ template<> // ast_parameter is a parameter of a function in its declaration // example: `fun f(a: int, mutate b: slice)` has 2 parameters struct Vertex final : ASTOtherLeaf { - const LocalVarData* param_ref = nullptr; // filled on resolve identifiers + LocalVarPtr param_ref = nullptr; // filled on resolve identifiers std::string_view param_name; TypePtr declared_type; bool declared_as_mutate; // declared as `mutate param_name` @@ -900,7 +935,7 @@ struct Vertex final : ASTOtherLeaf { bool is_underscore() const { return param_name.empty(); } Vertex* mutate() const { return const_cast(this); } - void assign_param_ref(const LocalVarData* param_ref); + void assign_param_ref(LocalVarPtr param_ref); void assign_resolved_type(TypePtr declared_type); Vertex(SrcLocation loc, std::string_view param_name, TypePtr declared_type, bool declared_as_mutate) @@ -951,7 +986,7 @@ struct Vertex final : ASTOtherVararg { auto get_param(int i) const { return children.at(1)->as()->get_param(i); } AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body - const FunctionData* fun_ref = nullptr; // filled after register + FunctionPtr fun_ref = nullptr; // filled after register TypePtr declared_return_type; // filled at ast parsing; if unspecified (nullptr), means "auto infer" V genericsT_list; // for non-generics it's nullptr td::RefInt256 method_id; // specified via @method_id annotation @@ -962,7 +997,7 @@ struct Vertex final : ASTOtherVararg { bool is_builtin_function() const { return children.at(2)->type == ast_empty_statement; } Vertex* mutate() const { return const_cast(this); } - void assign_fun_ref(const FunctionData* fun_ref); + void assign_fun_ref(FunctionPtr fun_ref); void assign_resolved_type(TypePtr declared_return_type); Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body, TypePtr declared_return_type, V genericsT_list, td::RefInt256 method_id, int flags) @@ -975,13 +1010,13 @@ template<> // example: `global g: int;` // note, that globals don't have default values, since there is no single "entrypoint" for a contract struct Vertex final : ASTOtherVararg { - const GlobalVarData* var_ref = nullptr; // filled after register + GlobalVarPtr var_ref = nullptr; // filled after register TypePtr declared_type; // filled always, typing globals is mandatory auto get_identifier() const { return children.at(0)->as(); } Vertex* mutate() const { return const_cast(this); } - void assign_var_ref(const GlobalVarData* var_ref); + void assign_var_ref(GlobalVarPtr var_ref); void assign_resolved_type(TypePtr declared_type); Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type) @@ -993,14 +1028,14 @@ template<> // ast_constant_declaration is declaring a global constant, outside a function // example: `const op = 0x123;` struct Vertex final : ASTOtherVararg { - const GlobalConstData* const_ref = nullptr; // filled after register + GlobalConstPtr const_ref = nullptr; // filled after register TypePtr declared_type; // not null for `const op: int = ...` auto get_identifier() const { return children.at(0)->as(); } AnyExprV get_init_value() const { return child_as_expr(1); } Vertex* mutate() const { return const_cast(this); } - void assign_const_ref(const GlobalConstData* const_ref); + void assign_const_ref(GlobalConstPtr const_ref); void assign_resolved_type(TypePtr declared_type); Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type, AnyExprV init_value) diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 2b207c25..cb89c984 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -1088,6 +1088,7 @@ void define_builtins() { TypePtr Slice = TypeDataSlice::create(); TypePtr Builder = TypeDataBuilder::create(); TypePtr Tuple = TypeDataTuple::create(); + TypePtr Never = TypeDataNever::create(); std::vector itemsT; itemsT.emplace_back("T"); @@ -1201,10 +1202,10 @@ void define_builtins() { define_builtin_func("__isNull", {typeT}, Bool, declGenericT, compile_is_null, FunctionData::flagMarkedAsPure); - define_builtin_func("__throw", ParamsInt1, Unit, nullptr, + define_builtin_func("__throw", ParamsInt1, Never, nullptr, compile_throw, 0); - define_builtin_func("__throw_arg", {typeT, Int}, Unit, declGenericT, + define_builtin_func("__throw_arg", {typeT, Int}, Never, declGenericT, compile_throw_arg, 0); define_builtin_func("__throw_if_unless", ParamsInt3, Unit, nullptr, diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index ad61b8a5..ac1cf639 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -274,8 +274,16 @@ void Stack::rearrange_top(var_idx_t top, bool last) { bool Op::generate_code_step(Stack& stack) { stack.opt_show(); - stack.drop_vars_except(var_info); - stack.opt_show(); + + // detect `throw 123` (actually _IntConst 123 + _Call __throw) + // don't clear the stack, since dropping unused elements make no sense, an exception is thrown anyway + bool will_now_immediate_throw = (cl == _Call && f_sym->is_builtin_function() && f_sym->name == "__throw") + || (cl == _IntConst && next->cl == _Call && next->f_sym->is_builtin_function() && next->f_sym->name == "__throw"); + if (!will_now_immediate_throw) { + stack.drop_vars_except(var_info); + stack.opt_show(); + } + bool inline_func = stack.mode & Stack::_InlineFunc; switch (cl) { case _Nop: @@ -285,6 +293,7 @@ bool Op::generate_code_step(Stack& stack) { stack.enforce_state(left); if (stack.o.retalt_ && (stack.mode & Stack::_NeedRetAlt)) { stack.o << "RETALT"; + stack.o.retalt_inserted_ = true; } stack.opt_show(); return false; @@ -348,9 +357,9 @@ bool Op::generate_code_step(Stack& stack) { std::vector args0, res; int w_arg = 0; for (const LocalVarData& param : f_sym->parameters) { - w_arg += param.declared_type->calc_width_on_stack(); + w_arg += param.declared_type->get_width_on_stack(); } - int w_ret = f_sym->inferred_return_type->calc_width_on_stack(); + int w_ret = f_sym->inferred_return_type->get_width_on_stack(); tolk_assert(w_ret >= 0 && w_arg >= 0); for (int i = 0; i < w_ret; i++) { res.emplace_back(0); @@ -514,7 +523,7 @@ bool Op::generate_code_step(Stack& stack) { int j = ret_order ? ret_order->at(i) : i; stack.push_new_var(left.at(j)); } - return true; + return !f_sym || f_sym->declared_return_type != TypeDataNever::create(); } case _SetGlob: { tolk_assert(g_sym); diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp index 66fad844..95a7e6a5 100644 --- a/tolk/compiler-state.cpp +++ b/tolk/compiler-state.cpp @@ -66,7 +66,7 @@ void CompilerSettings::parse_experimental_options_cmd_arg(const std::string& cmd } } -const std::vector& get_all_not_builtin_functions() { +const std::vector& get_all_not_builtin_functions() { return G.all_functions; } diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index d33eec81..1d166a3a 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -95,10 +95,10 @@ struct CompilerState { GlobalSymbolTable symtable; PersistentHeapAllocator persistent_mem; - std::vector all_functions; // all user-defined (not built-in) functions, with generic instantiations - std::vector all_get_methods; - std::vector all_global_vars; - std::vector all_constants; + std::vector all_functions; // all user-defined (not built-in) functions, with generic instantiations + std::vector all_get_methods; + std::vector all_global_vars; + std::vector all_constants; AllRegisteredSrcFiles all_src_files; bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } diff --git a/tolk/constant-evaluator.cpp b/tolk/constant-evaluator.cpp index 9ad27381..4d11b922 100644 --- a/tolk/constant-evaluator.cpp +++ b/tolk/constant-evaluator.cpp @@ -255,7 +255,7 @@ struct ConstantEvaluator { if (!sym) { v->error("undefined symbol `" + static_cast(name) + "`"); } - const GlobalConstData* const_ref = sym->try_as(); + GlobalConstPtr const_ref = sym->try_as(); if (!const_ref) { v->error("symbol `" + static_cast(name) + "` is not a constant"); } diff --git a/tolk/fwd-declarations.h b/tolk/fwd-declarations.h index e3599f36..8d3b24a8 100644 --- a/tolk/fwd-declarations.h +++ b/tolk/fwd-declarations.h @@ -32,6 +32,11 @@ struct FunctionData; struct GlobalVarData; struct GlobalConstData; +using LocalVarPtr = const LocalVarData*; +using FunctionPtr = const FunctionData*; +using GlobalVarPtr = const GlobalVarData*; +using GlobalConstPtr = const GlobalConstData*; + class TypeData; using TypePtr = const TypeData*; diff --git a/tolk/generics-helpers.cpp b/tolk/generics-helpers.cpp index 7a2dd83f..9dae3f00 100644 --- a/tolk/generics-helpers.cpp +++ b/tolk/generics-helpers.cpp @@ -37,12 +37,38 @@ static TypePtr replace_genericT_with_deduced(TypePtr orig, const GenericsDeclara if (idx == -1) { throw Fatal("can not replace generic " + asT->nameT); } + if (substitutionTs[idx] == nullptr) { + throw GenericDeduceError("can not deduce " + asT->nameT); + } return substitutionTs[idx]; } return child; }); } +GenericSubstitutionsDeduceForCall::GenericSubstitutionsDeduceForCall(FunctionPtr fun_ref) + : fun_ref(fun_ref) { + substitutionTs.resize(fun_ref->genericTs->size()); // filled with nullptr (nothing deduced) +} + +void GenericSubstitutionsDeduceForCall::provide_deducedT(const std::string& nameT, TypePtr deduced) { + if (deduced == TypeDataNullLiteral::create() || deduced->has_unknown_inside()) { + return; // just 'null' doesn't give sensible info + } + + int idx = fun_ref->genericTs->find_nameT(nameT); + if (substitutionTs[idx] == nullptr) { + substitutionTs[idx] = deduced; + } else if (substitutionTs[idx] != deduced) { + throw GenericDeduceError(nameT + " is both " + substitutionTs[idx]->as_human_readable() + " and " + deduced->as_human_readable()); + } +} + +void GenericSubstitutionsDeduceForCall::provide_manually_specified(std::vector&& substitutionTs) { + this->substitutionTs = std::move(substitutionTs); + this->manually_specified = true; +} + // purpose: having `f(value: T)` and call `f(5)`, deduce T = int // generally, there may be many generic Ts for declaration, and many arguments // for every argument, `consider_next_condition()` is called @@ -51,71 +77,67 @@ static TypePtr replace_genericT_with_deduced(TypePtr orig, const GenericsDeclara // - next condition: param_type = `T1`, arg_type = `int`, deduce T1 = int // - next condition: param_type = `(T1, T2)`, arg_type = `(int, slice)`, deduce T1 = int, T2 = slice // for call `f(6, cs, (8, cs))` T1 will be both `slice` and `int`, fired an error -class GenericSubstitutionsDeduceForFunctionCall final { - const FunctionData* fun_ref; - std::vector substitutions; - - void provideDeducedT(const std::string& nameT, TypePtr deduced) { - if (deduced == TypeDataNullLiteral::create() || deduced->has_unknown_inside()) { - return; // just 'null' doesn't give sensible info +void GenericSubstitutionsDeduceForCall::consider_next_condition(TypePtr param_type, TypePtr arg_type) { + if (const auto* asT = param_type->try_as()) { + // `(arg: T)` called as `f([1, 2])` => T is [int, int] + provide_deducedT(asT->nameT, arg_type); + } else if (const auto* p_nullable = param_type->try_as()) { + // `arg: T?` called as `f(nullableInt)` => T is int + if (const auto* a_nullable = arg_type->try_as()) { + consider_next_condition(p_nullable->inner, a_nullable->inner); } - - int idx = fun_ref->genericTs->find_nameT(nameT); - if (substitutions[idx] == nullptr) { - substitutions[idx] = deduced; - } else if (substitutions[idx] != deduced) { - throw std::runtime_error(nameT + " is both " + substitutions[idx]->as_human_readable() + " and " + deduced->as_human_readable()); + // `arg: T?` called as `f(int)` => T is int + else { + consider_next_condition(p_nullable->inner, arg_type); } - } - -public: - explicit GenericSubstitutionsDeduceForFunctionCall(const FunctionData* fun_ref) - : fun_ref(fun_ref) { - substitutions.resize(fun_ref->genericTs->size()); // filled with nullptr (nothing deduced) - } - - void consider_next_condition(TypePtr param_type, TypePtr arg_type) { - if (const auto* asT = param_type->try_as()) { - // `(arg: T)` called as `f([1, 2])` => T is [int, int] - provideDeducedT(asT->nameT, arg_type); - } else if (const auto* p_tensor = param_type->try_as()) { - // `arg: (int, T)` called as `f((5, cs))` => T is slice - if (const auto* a_tensor = arg_type->try_as(); a_tensor && a_tensor->size() == p_tensor->size()) { - for (int i = 0; i < a_tensor->size(); ++i) { - consider_next_condition(p_tensor->items[i], a_tensor->items[i]); - } - } - } else if (const auto* p_tuple = param_type->try_as()) { - // `arg: [int, T]` called as `f([5, cs])` => T is slice - if (const auto* a_tuple = arg_type->try_as(); a_tuple && a_tuple->size() == p_tuple->size()) { - for (int i = 0; i < a_tuple->size(); ++i) { - consider_next_condition(p_tuple->items[i], a_tuple->items[i]); - } - } - } else if (const auto* p_callable = param_type->try_as()) { - // `arg: fun(TArg) -> TResult` called as `f(calcTupleLen)` => TArg is tuple, TResult is int - if (const auto* a_callable = arg_type->try_as(); a_callable && a_callable->params_size() == p_callable->params_size()) { - for (int i = 0; i < a_callable->params_size(); ++i) { - consider_next_condition(p_callable->params_types[i], a_callable->params_types[i]); - } - consider_next_condition(p_callable->return_type, a_callable->return_type); + } else if (const auto* p_tensor = param_type->try_as()) { + // `arg: (int, T)` called as `f((5, cs))` => T is slice + if (const auto* a_tensor = arg_type->try_as(); a_tensor && a_tensor->size() == p_tensor->size()) { + for (int i = 0; i < a_tensor->size(); ++i) { + consider_next_condition(p_tensor->items[i], a_tensor->items[i]); } } - } - - int get_first_not_deduced_idx() const { - for (int i = 0; i < static_cast(substitutions.size()); ++i) { - if (substitutions[i] == nullptr) { - return i; + } else if (const auto* p_tuple = param_type->try_as()) { + // `arg: [int, T]` called as `f([5, cs])` => T is slice + if (const auto* a_tuple = arg_type->try_as(); a_tuple && a_tuple->size() == p_tuple->size()) { + for (int i = 0; i < a_tuple->size(); ++i) { + consider_next_condition(p_tuple->items[i], a_tuple->items[i]); } } - return -1; + } else if (const auto* p_callable = param_type->try_as()) { + // `arg: fun(TArg) -> TResult` called as `f(calcTupleLen)` => TArg is tuple, TResult is int + if (const auto* a_callable = arg_type->try_as(); a_callable && a_callable->params_size() == p_callable->params_size()) { + for (int i = 0; i < a_callable->params_size(); ++i) { + consider_next_condition(p_callable->params_types[i], a_callable->params_types[i]); + } + consider_next_condition(p_callable->return_type, a_callable->return_type); + } } +} - std::vector flush() { - return {std::move(substitutions)}; +TypePtr GenericSubstitutionsDeduceForCall::replace_by_manually_specified(TypePtr param_type) const { + return replace_genericT_with_deduced(param_type, fun_ref->genericTs, substitutionTs); +} + +TypePtr GenericSubstitutionsDeduceForCall::auto_deduce_from_argument(FunctionPtr cur_f, SrcLocation loc, TypePtr param_type, TypePtr arg_type) { + try { + if (!manually_specified) { + consider_next_condition(param_type, arg_type); + } + return replace_genericT_with_deduced(param_type, fun_ref->genericTs, substitutionTs); + } catch (const GenericDeduceError& ex) { + throw ParseError(cur_f, loc, ex.message + " for generic function `" + fun_ref->as_human_readable() + "`; instantiate it manually with `" + fun_ref->name + "<...>()`"); } -}; +} + +int GenericSubstitutionsDeduceForCall::get_first_not_deduced_idx() const { + for (int i = 0; i < static_cast(substitutionTs.size()); ++i) { + if (substitutionTs[i] == nullptr) { + return i; + } + } + return -1; +} // clone the body of `f` replacing T everywhere with a substitution // before: `fun f(v: T) { var cp: [T] = [v]; }` @@ -175,11 +197,10 @@ int GenericsDeclaration::find_nameT(std::string_view nameT) const { // after creating a deep copy of `f` like `f`, its new and fresh body needs the previous pipeline to run // for example, all local vars need to be registered as symbols, etc. -static void run_pipeline_for_instantiated_function(const FunctionData* inst_fun_ref) { +static void run_pipeline_for_instantiated_function(FunctionPtr inst_fun_ref) { // these pipes are exactly the same as in tolk.cpp — all preceding (and including) type inferring pipeline_resolve_identifiers_and_assign_symbols(inst_fun_ref); pipeline_calculate_rvalue_lvalue(inst_fun_ref); - pipeline_detect_unreachable_statements(inst_fun_ref); pipeline_infer_types_and_calls_and_fields(inst_fun_ref); } @@ -198,34 +219,12 @@ std::string generate_instantiated_name(const std::string& orig_name, const std:: return name; } -td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint) { - try { - GenericSubstitutionsDeduceForFunctionCall deducing(called_fun); - for (const LocalVarData& param : called_fun->parameters) { - if (param.declared_type->has_genericT_inside() && param.param_idx < static_cast(arg_types.size())) { - deducing.consider_next_condition(param.declared_type, arg_types[param.param_idx]); - } - } - int idx = deducing.get_first_not_deduced_idx(); - if (idx != -1 && return_hint && called_fun->declared_return_type->has_genericT_inside()) { - deducing.consider_next_condition(called_fun->declared_return_type, return_hint); - idx = deducing.get_first_not_deduced_idx(); - } - if (idx != -1) { - return td::Status::Error(td::Slice{"can not deduce " + called_fun->genericTs->get_nameT(idx)}); - } - return deducing.flush(); - } catch (const std::runtime_error& ex) { - return td::Status::Error(td::Slice{ex.what()}); - } -} - -const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs) { +FunctionPtr instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, const std::string& inst_name, std::vector&& substitutionTs) { tolk_assert(fun_ref->genericTs); // if `f` was earlier instantiated, return it if (const auto* existing = lookup_global_symbol(inst_name)) { - const FunctionData* inst_ref = existing->try_as(); + FunctionPtr inst_ref = existing->try_as(); tolk_assert(inst_ref); return inst_ref; } diff --git a/tolk/generics-helpers.h b/tolk/generics-helpers.h index 2a304f55..5ed245af 100644 --- a/tolk/generics-helpers.h +++ b/tolk/generics-helpers.h @@ -57,8 +57,46 @@ struct GenericsInstantiation { } }; +// this class helps to deduce Ts on the fly +// purpose: having `f(value: T)` and call `f(5)`, deduce T = int +// while analyzing a call, arguments are handled one by one, by `auto_deduce_from_argument()` +// this class also handles manually specified substitutions like `f(5)` +class GenericSubstitutionsDeduceForCall { + FunctionPtr fun_ref; + std::vector substitutionTs; + bool manually_specified = false; + + void provide_deducedT(const std::string& nameT, TypePtr deduced); + void consider_next_condition(TypePtr param_type, TypePtr arg_type); + +public: + explicit GenericSubstitutionsDeduceForCall(FunctionPtr fun_ref); + + bool is_manually_specified() const { + return manually_specified; + } + + void provide_manually_specified(std::vector&& substitutionTs); + TypePtr replace_by_manually_specified(TypePtr param_type) const; + TypePtr auto_deduce_from_argument(FunctionPtr cur_f, SrcLocation loc, TypePtr param_type, TypePtr arg_type); + int get_first_not_deduced_idx() const; + + std::vector&& flush() { + return std::move(substitutionTs); + } +}; + +struct GenericDeduceError final : std::exception { + std::string message; + explicit GenericDeduceError(std::string message) + : message(std::move(message)) { } + + const char* what() const noexcept override { + return message.c_str(); + } +}; + std::string generate_instantiated_name(const std::string& orig_name, const std::vector& substitutions); -td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint); -const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs); +FunctionPtr instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, const std::string& inst_name, std::vector&& substitutionTs); } // namespace tolk diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index f5eca22c..1561aa40 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -30,158 +30,27 @@ * Up to this point, all types have been inferred, all validity checks have been passed, etc. * All properties in AST nodes are assigned and can be safely used (fun_ref, etc.). * So, if execution reaches this pass, the input is (almost) correct, and code generation should succeed. - * The only thing additionally checked during this pass is tricky lvalue, like one and the same variable - * assigned/mutated multiple times in same expression, e.g. `(t.0, t.0) = rhs` / `f(mutate x.1.2, mutate x)`. + * (previously, there was a check for one variable modified twice like `(t.0, t.0) = rhs`, but after changing + * execution order of assignment to "first lhs, then lhs", it was removed for several reasons) +* + * A noticeable property for IR generation is "target_type" used to extend/shrink stack. + * Example: `var a: (int,int)? = null`. This `null` has inferred_type "null literal", but target_type "nullable tensor", + * and when it's assigned, it's "expanded" from 1 stack slot to 3 (int + int + null flag). + * Example: `fun analyze(t: (int,int)?)` and a call `analyze((1,2))`. `(1,2)` is `(int,int)` (2 stack slots), + * and when passed to target (3 slots, one for null flag), this null flag is implicitly added (zero value). + * Example: `nullableInt!`; for `nullableInt` inferred_type is `int?`, and target_type is `int` + * (this doesn't lead to stack reorganization, but in case `nullableTensor!` does) + * (inferred_type of `nullableInt!` is `int`, and its target_type depends on its usage). + * The same mechanism will work for union types in the future. */ namespace tolk { -// fire error on cases like `(a, a) = rhs` / `f(mutate t.1.0, mutate t.1.0)` -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_variable_modified_twice_inside_same_expression(SrcLocation loc) { - throw ParseError(loc, "one variable modified twice inside the same expression"); -} +class LValContext; +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, TypePtr target_type = nullptr, LValContext* lval_ctx = nullptr); +std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx); +void process_any_statement(AnyV v, CodeBlob& code); -// fire error on cases like `(m.1.0, m.1) = rhs` (m.1 inside m.1.0 is "rval inside lval") -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_variable_modified_and_read_inside_same_expression(SrcLocation loc) { - throw ParseError(loc, "one variable both modified and read inside the same expression"); -} - -// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable -// exists, but on its change, something non-trivial should happen. -// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6` -// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)` -// Of course, mixing globals with tuples should also be supported. -// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue". -// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global. -// A challenging thing is handling "unique" parts, to be read/updated only once. -// Example: `f(mutate globalTensor.0, mutate globalTensor.1)`, then globalTensor should be read/written once. -// Example: `(t.0.0, t.0.1) = rhs` (m is [[int, int]]), then t.0 should be read/updated once. -// Solving this by calculating hashes of every lvalue or rvalue inside lvalue automatically gives an ability -// to detect and fire "multiple writes inside expression", like `(a, a) = rhs` / `[t.0, (t.0.1, c)] = rhs`. -// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global). -class LValContext { - // every global variable used as lvalue is registered here - // example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after - // global tensors are stored as tuples (unpacked on reading, packed on writing), then multiple tmp vars are created - struct ModifiedGlob { - const GlobalVarData* glob_ref; - std::vector local_ir_idx; // typically 1, generally calc_width_on_stack() of global var (tensors) - - void apply(CodeBlob& code, SrcLocation loc) const { - Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, local_ir_idx, glob_ref); - op.set_impure_flag(); - } - }; - - // every tuple index used as lvalue is registered here - // example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after - // for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times - struct ModifiedTupleIndex { - uint64_t hash; - var_idx_t tuple_ir_idx; - var_idx_t index_ir_idx; - var_idx_t field_ir_idx; - - void apply(CodeBlob& code, SrcLocation loc) const { - const FunctionData* builtin_sym = lookup_global_symbol("tupleSetAt")->as(); - code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx, field_ir_idx, index_ir_idx}, builtin_sym); - } - }; - - int level_rval_inside_lval = 0; - std::vector> modifications; - std::unordered_set all_modified_hashes; - - void fire_if_one_variable_modified_twice(SrcLocation loc, uint64_t modified_hash) { - if (!is_rval_inside_lval()) { - if (!all_modified_hashes.insert(modified_hash).second) { - fire_error_variable_modified_twice_inside_same_expression(loc); - } - if (all_modified_hashes.contains(~modified_hash)) { - fire_error_variable_modified_and_read_inside_same_expression(loc); - } - } else { - all_modified_hashes.insert(~modified_hash); - if (all_modified_hashes.contains(modified_hash)) { - fire_error_variable_modified_and_read_inside_same_expression(loc); - } - } - } - -public: - void enter_rval_inside_lval() { level_rval_inside_lval++; } - void exit_rval_inside_lval() { level_rval_inside_lval--; } - bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; } - - uint64_t register_lval(SrcLocation loc, const LocalVarData* var_ref) { - uint64_t hash = reinterpret_cast(var_ref); - fire_if_one_variable_modified_twice(loc, hash); - return hash; - } - - uint64_t register_lval(SrcLocation loc, const GlobalVarData* glob_ref) { - uint64_t hash = reinterpret_cast(glob_ref); - fire_if_one_variable_modified_twice(loc, hash); - return hash; - } - - uint64_t register_lval(SrcLocation loc, V v) { - uint64_t hash = 7; - AnyExprV leftmost_obj = v; - while (auto v_dot = leftmost_obj->try_as()) { - if (!v_dot->is_target_indexed_access()) { - break; - } - hash = hash * 1915239017 + std::get(v_dot->target); - leftmost_obj = v_dot->get_obj(); - } - if (auto v_ref = leftmost_obj->try_as()) { - hash *= reinterpret_cast(v_ref->sym); // `v.0` and `v.0` in 2 places is the same - } else { - hash *= reinterpret_cast(leftmost_obj); // unlike `f().0` and `f().0` (pointers to AST nodes differ) - } - fire_if_one_variable_modified_twice(loc, hash); - return hash; - } - - const std::vector* exists_already_known_global(const GlobalVarData* glob_ref) const { - for (const auto& m : modifications) { - if (const auto* m_glob = std::get_if(&m); m_glob && m_glob->glob_ref == glob_ref) { - return &m_glob->local_ir_idx; - } - } - return nullptr; - } - - const var_idx_t* exists_already_known_tuple_index(uint64_t hash) const { - for (const auto& m : modifications) { - if (const auto* m_tup = std::get_if(&m); m_tup && m_tup->hash == hash) { - return &m_tup->field_ir_idx; - } - } - return nullptr; - } - - void register_modified_global(const GlobalVarData* glob_ref, std::vector local_ir_idx) { - modifications.emplace_back(ModifiedGlob{glob_ref, std::move(local_ir_idx)}); - } - - void register_modified_tuple_index(uint64_t hash, var_idx_t tuple_ir_idx, var_idx_t index_ir_idx, var_idx_t field_ir_idx) { - modifications.emplace_back(ModifiedTupleIndex{hash, tuple_ir_idx, index_ir_idx, field_ir_idx}); - } - - void gen_ops_if_nonempty(CodeBlob& code, SrcLocation loc) const { - for (auto it = modifications.rbegin(); it != modifications.rend(); ++it) { // reverse, it's important - if (const auto* m_glob = std::get_if(&*it)) { - m_glob->apply(code, loc); - } else if (const auto* m_tup = std::get_if(&*it)) { - m_tup->apply(code, loc); - } - } - } -}; // The goal of VarsModificationWatcher is to detect such cases: `return (x, x += y, x)`. // Without any changes, ops will be { _Call $2 = +($0_x, $1_y); _Return $0_x, $2, $0_x } - incorrect @@ -229,18 +98,196 @@ public: static VarsModificationWatcher vars_modification_watcher; -std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx = nullptr); -void process_any_statement(AnyV v, CodeBlob& code); + +// Main goal of LValContext is to handle non-primitive lvalues. At IR level, a usual local variable +// exists, but on its change, something non-trivial should happen. +// Example: `globalVar = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `SetGlob "globVar" = $6` +// Example: `tupleVar.0 = 9` actually does `Const $5 = 9` + `Let $6 = $5` + `Const $7 = 0` + `Call tupleSetAt($4, $6, $7)` +// Of course, mixing globals with tuples should also be supported. +// To achieve this, treat tupleObj inside "tupleObj.i" like "rvalue inside lvalue". +// For instance, `globalTuple.0 = 9` reads global (like rvalue), assigns 9 to tmp var, modifies tuple, writes global. +// Note, that tensors (not tuples) `tensorVar.0 = 9` do not emit anything special (unless global). +class LValContext { + // every global variable used as lvalue is registered here + // example: `globalInt = 9`, implicit var is created `$tmp = 9`, and `SetGlob "globalInt" $tmp` is done after + struct ModifiedGlobal { + GlobalVarPtr glob_ref; + std::vector lval_ir_idx; // typically 1, generally get_width_on_stack() of global var (tensors) + + // for 1-slot globals int/cell/slice, assigning to them is just SETGLOB + // same for tensors, if they are fully rewritten in an expression: `gTensor = (5,6)` + void apply_fully_rewrite(CodeBlob& code, SrcLocation loc) const { + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, lval_ir_idx, glob_ref); + op.set_impure_flag(); + } + + // for N-slot globals tensor/struct/union, assigning to their parts, like `gTensor.1 = 6` + // we need to read gTensor as a whole (0-th and 1-th component), rewrite 1-th component, and SETGLOB a whole back + void apply_partially_rewrite(CodeBlob& code, SrcLocation loc, std::vector&& was_modified_by_let) const { + LValContext local_lval; + local_lval.enter_rval_inside_lval(); + std::vector local_ir_idx = pre_compile_symbol(loc, glob_ref, code, &local_lval); + for (size_t i = 0; i < local_ir_idx.size(); ++i) { + if (was_modified_by_let[i]) { + code.emplace_back(loc, Op::_Let, std::vector{local_ir_idx[i]}, std::vector{lval_ir_idx[i]}); + } + } + + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, local_ir_idx, glob_ref); + op.set_impure_flag(); + } + }; + + // every tensor index, when a tensor is a global, is registered here (same for structs and fields) + // example: `global v: (int, int); v.1 = 5`, implicit var is created `$tmp = 5`, and when it's modified, + // we need to partially update w; essentially, apply_partially_rewrite() above will be called + struct ModifiedFieldOfGlobal { + AnyExprV tensor_obj; + int index_at; + std::vector lval_ir_idx; + + void apply(CodeBlob& code, SrcLocation loc) const { + LValContext local_lval; + local_lval.enter_rval_inside_lval(); + std::vector obj_ir_idx = pre_compile_expr(tensor_obj, code, nullptr, &local_lval); + const TypeDataTensor* t_tensor = tensor_obj->inferred_type->try_as(); + tolk_assert(t_tensor); + int stack_width = t_tensor->items[index_at]->get_width_on_stack(); + int stack_offset = 0; + for (int i = 0; i < index_at; ++i) { + stack_offset += t_tensor->items[i]->get_width_on_stack(); + } + std::vector field_ir_idx = {obj_ir_idx.begin() + stack_offset, obj_ir_idx.begin() + stack_offset + stack_width}; + tolk_assert(field_ir_idx.size() == lval_ir_idx.size()); + + vars_modification_watcher.trigger_callbacks(field_ir_idx, loc); + code.emplace_back(loc, Op::_Let, field_ir_idx, lval_ir_idx); + local_lval.after_let(std::move(field_ir_idx), code, loc); + } + }; + + // every tuple index used as lvalue is registered here + // example: `t.0 = 9`, implicit var is created `$tmp = 9`, as well as `$tmp_idx = 0` and `tupleSetAt()` is done after + // for `t.0.0` if t is `[[int, ...]]`, `tupleAt()` for it is done since it's rvalue, and `tupleSetAt()` is done 2 times + struct ModifiedTupleIndex { + AnyExprV tuple_obj; + int index_at; + std::vector lval_ir_idx; + + void apply(CodeBlob& code, SrcLocation loc) const { + LValContext local_lval; + local_lval.enter_rval_inside_lval(); + std::vector tuple_ir_idx = pre_compile_expr(tuple_obj, code, nullptr, &local_lval); + std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), loc, "(tuple-idx)"); + code.emplace_back(loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); + + vars_modification_watcher.trigger_callbacks(tuple_ir_idx, loc); + FunctionPtr builtin_sym = lookup_global_symbol("tupleSetAt")->try_as(); + code.emplace_back(loc, Op::_Call, std::vector{tuple_ir_idx}, std::vector{tuple_ir_idx[0], lval_ir_idx[0], index_ir_idx[0]}, builtin_sym); + local_lval.after_let(std::move(tuple_ir_idx), code, loc); + } + }; + + int level_rval_inside_lval = 0; + std::vector> modifications; + + static bool vector_contains(const std::vector& ir_vars, var_idx_t ir_idx) { + for (var_idx_t var_in_vector : ir_vars) { + if (var_in_vector == ir_idx) { + return true; + } + } + return false; + } + +public: + void enter_rval_inside_lval() { level_rval_inside_lval++; } + void exit_rval_inside_lval() { level_rval_inside_lval--; } + bool is_rval_inside_lval() const { return level_rval_inside_lval > 0; } + + void capture_global_modification(GlobalVarPtr glob_ref, std::vector lval_ir_idx) { + modifications.emplace_back(ModifiedGlobal{glob_ref, std::move(lval_ir_idx)}); + } + + void capture_field_of_global_modification(AnyExprV tensor_obj, int index_at, std::vector lval_ir_idx) { + modifications.emplace_back(ModifiedFieldOfGlobal{tensor_obj, index_at, std::move(lval_ir_idx)}); + } + + void capture_tuple_index_modification(AnyExprV tuple_obj, int index_at, std::vector lval_ir_idx) { + modifications.emplace_back(ModifiedTupleIndex{tuple_obj, index_at, std::move(lval_ir_idx)}); + } + + void after_let(std::vector&& let_left_vars, CodeBlob& code, SrcLocation loc) const { + for (const auto& modification : modifications) { + if (const auto* m_glob = std::get_if(&modification)) { + int n_modified_by_let = 0; + std::vector was_modified_by_let; + was_modified_by_let.resize(m_glob->lval_ir_idx.size()); + for (size_t i = 0; i < m_glob->lval_ir_idx.size(); ++i) { + if (vector_contains(let_left_vars, m_glob->lval_ir_idx[i])) { + was_modified_by_let[i] = true; + n_modified_by_let++; + } + } + if (n_modified_by_let == static_cast(m_glob->lval_ir_idx.size())) { + m_glob->apply_fully_rewrite(code, loc); + } else if (n_modified_by_let > 0) { + m_glob->apply_partially_rewrite(code, loc, std::move(was_modified_by_let)); + } + } else if (const auto* m_tup = std::get_if(&modification)) { + bool was_tuple_index_modified = false; + for (var_idx_t field_ir_idx : m_tup->lval_ir_idx) { + was_tuple_index_modified |= vector_contains(let_left_vars, field_ir_idx); + } + if (was_tuple_index_modified) { + m_tup->apply(code, loc); + } + } else if (const auto* m_tens = std::get_if(&modification)) { + bool was_tensor_index_modified = false; + for (var_idx_t field_ir_idx : m_tens->lval_ir_idx) { + was_tensor_index_modified |= vector_contains(let_left_vars, field_ir_idx); + } + if (was_tensor_index_modified) { + m_tens->apply(code, loc); + } + } + } + } +}; + +// given `{some_expr}!`, return some_expr +static AnyExprV unwrap_not_null_operator(AnyExprV v) { + while (auto v_notnull = v->try_as()) { + v = v_notnull->get_expr(); + } + return v; +} + +// given `{some_expr}.{i}`, check it for pattern `some_var.0` / `some_var.0.1` / etc. +// return some_var if satisfies (it may be a local or a global var, a tensor or a tuple) +// return nullptr otherwise: `f().0` / `(v = rhs).0` / `some_var.method().0` / etc. +static V calc_sink_leftmost_obj(V v) { + AnyExprV leftmost_obj = unwrap_not_null_operator(v->get_obj()); + while (auto v_dot = leftmost_obj->try_as()) { + if (!v_dot->is_target_indexed_access()) { + break; + } + leftmost_obj = unwrap_not_null_operator(v_dot->get_obj()); + } + return leftmost_obj->type == ast_reference ? leftmost_obj->as() : nullptr; +} static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, - LValContext* lval_ctx) { + const TypeDataTensor* tensor_target_type, LValContext* lval_ctx) { const int n = static_cast(args.size()); if (n == 0) { // just `()` return {}; } + tolk_assert(!tensor_target_type || tensor_target_type->size() == n); if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments - return {pre_compile_expr(args[0], code, lval_ctx)}; + TypePtr child_target_type = tensor_target_type ? tensor_target_type->items[0] : nullptr; + return {pre_compile_expr(args[0], code, child_target_type, lval_ctx)}; } // the purpose is to handle such cases: `return (x, x += y, x)` @@ -294,7 +341,8 @@ static std::vector> pre_compile_tensor_inner(CodeBlob& co WatchingVarList watched_vars(n); for (int arg_idx = 0; arg_idx < n; ++arg_idx) { - std::vector vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, lval_ctx); + TypePtr child_target_type = tensor_target_type ? tensor_target_type->items[arg_idx] : nullptr; + std::vector vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, child_target_type, lval_ctx); watched_vars.add_and_watch_modifications(std::move(vars_of_ith_arg), code); } return watched_vars.clear_and_stop_watching(); @@ -302,7 +350,13 @@ static std::vector> pre_compile_tensor_inner(CodeBlob& co static std::vector pre_compile_tensor(CodeBlob& code, const std::vector& args, LValContext* lval_ctx = nullptr) { - std::vector> res_lists = pre_compile_tensor_inner(code, args, lval_ctx); + std::vector types_list; + types_list.reserve(args.size()); + for (AnyExprV item : args) { + types_list.push_back(item->inferred_type); + } + const TypeDataTensor* tensor_target_type = TypeDataTensor::create(std::move(types_list))->try_as(); + std::vector> res_lists = pre_compile_tensor_inner(code, args, tensor_target_type, lval_ctx); std::vector res; for (const std::vector& list : res_lists) { res.insert(res.end(), list.cbegin(), list.cend()); @@ -313,48 +367,51 @@ static std::vector pre_compile_tensor(CodeBlob& code, const std::vect static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) { // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) { - std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); + // note: there are no type transitions (adding nullability flag, etc.), since only 1-slot elements allowed in tuples LValContext local_lval; std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); vars_modification_watcher.trigger_callbacks(left, loc); - code.emplace_back(loc, Op::_Let, std::move(left), right); - local_lval.gen_ops_if_nonempty(code, loc); + std::vector rvect = pre_compile_tensor(code, rhs->as()->get_items()); + code.emplace_back(loc, Op::_Let, left, rvect); + local_lval.after_let(std::move(left), code, loc); + std::vector right = code.create_tmp_var(TypeDataTuple::create(), loc, "(tuple)"); + code.emplace_back(lhs->loc, Op::_Tuple, right, std::move(rvect)); return right; } // [lhs] = rhs; it's un-tuple to N left vars if (lhs->type == ast_typed_tuple) { - std::vector right = pre_compile_expr(rhs, code); + LValContext local_lval; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); + vars_modification_watcher.trigger_callbacks(left, loc); + std::vector right = pre_compile_expr(rhs, code, nullptr); const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as(); std::vector types_list = inferred_tuple->items; std::vector rvect = code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc, "(unpack-tuple)"); code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); - LValContext local_lval; - std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &local_lval); - vars_modification_watcher.trigger_callbacks(left, loc); - code.emplace_back(loc, Op::_Let, std::move(left), rvect); - local_lval.gen_ops_if_nonempty(code, loc); - return rvect; + code.emplace_back(loc, Op::_Let, left, rvect); + local_lval.after_let(std::move(left), code, loc); + return right; } // small optimization: `var x = rhs` or `local_var = rhs` (90% cases), LValContext not needed actually - if (lhs->type == ast_local_var_lhs || (lhs->type == ast_reference && lhs->as()->sym->try_as())) { - std::vector right = pre_compile_expr(rhs, code); - std::vector left = pre_compile_expr(lhs, code); // effectively, local_var->ir_idx + if (lhs->type == ast_local_var_lhs || (lhs->type == ast_reference && lhs->as()->sym->try_as())) { + std::vector left = pre_compile_expr(lhs, code, nullptr); // effectively, local_var->ir_idx vars_modification_watcher.trigger_callbacks(left, loc); + std::vector right = pre_compile_expr(rhs, code, lhs->inferred_type); code.emplace_back(loc, Op::_Let, std::move(left), right); return right; } // lhs = rhs - std::vector right = pre_compile_expr(rhs, code); LValContext local_lval; - std::vector left = pre_compile_expr(lhs, code, &local_lval); + std::vector left = pre_compile_expr(lhs, code, nullptr, &local_lval); vars_modification_watcher.trigger_callbacks(left, loc); - code.emplace_back(loc, Op::_Let, std::move(left), right); - local_lval.gen_ops_if_nonempty(code, loc); + std::vector right = pre_compile_expr(rhs, code, lhs->inferred_type); + code.emplace_back(loc, Op::_Let, left, right); + local_lval.after_let(std::move(left), code, loc); return right; } static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation loc, - std::vector&& args_vars, const FunctionData* fun_ref, const char* debug_desc) { + std::vector&& args_vars, FunctionPtr fun_ref, const char* debug_desc) { std::vector rvect = code.create_tmp_var(ret_type, loc, debug_desc); Op& op = code.emplace_back(loc, Op::_Call, rvect, std::move(args_vars), fun_ref); if (!fun_ref->is_marked_as_pure()) { @@ -363,31 +420,207 @@ static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcL return rvect; } - -static std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { - if (const auto* glob_ref = sym->try_as()) { - if (!lval_ctx) { - // `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it - // note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type - std::vector local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)"); - code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); - return local_ir_idx; - } else { - // `globalVar = rhs` / `mutate globalVar` / `globalTuple.0 = rhs` - lval_ctx->register_lval(loc, glob_ref); - if (const std::vector* local_ir_idx = lval_ctx->exists_already_known_global(glob_ref)) { - return *local_ir_idx; // `f(mutate g.0, mutate g.1)`, then g will be read only once - } - std::vector local_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(glob-var)"); - if (lval_ctx->is_rval_inside_lval()) { // for `globalVar.0` "globalVar" is rvalue inside lvalue - // for `globalVar = rhs` don't read a global actually, but for `globalVar.0 = rhs` do - code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); - } - lval_ctx->register_modified_global(glob_ref, local_ir_idx); - return local_ir_idx; - } +// "Transition to target (runtime) type" is the following process. +// Imagine `fun analyze(t: (int,int)?)` and a call `analyze((1,2))`. +// `(1,2)` (inferred_type) is 2 stack slots, but `t` (target_type) is 3 (one for null-flag). +// So, this null flag should be implicitly added (non-zero, since a variable is not null). +// Another example: `var t: (int, int)? = null`. +// `null` (inferred_type) is 1 stack slots, but target_type is 3, we should add 2 nulls. +// Another example: `var t1 = (1, null); var t2: (int, (int,int)?) = t1;`. +// Then t1's rvect is 2 vars (1 and null), but t1's `null` should be converted to 3 stack slots (resulting in 4 total). +// The same mechanism will work for union types in the future. +// Here rvect is a list of IR vars for inferred_type, probably patched due to target_type. +GNU_ATTRIBUTE_NOINLINE +static std::vector transition_expr_to_runtime_type_impl(std::vector&& rvect, CodeBlob& code, TypePtr original_type, TypePtr target_type, SrcLocation loc) { + // pass `T` to `T` + // could occur for passing tensor `(..., T, ...)` to `(..., T, ...)` while traversing tensor's components + if (target_type == original_type) { + return rvect; } - if (const auto* const_ref = sym->try_as()) { + + int target_w = target_type->get_width_on_stack(); + const TypeDataNullable* t_nullable = target_type->try_as(); + const TypeDataNullable* o_nullable = original_type->try_as(); + + // handle `never` + // it may occur due to smart cast and in unreachable branches + // we can't do anything reasonable here, but (hopefully) execution will never reach this point, and stack won't be polluted + if (original_type == TypeDataNever::create()) { + std::vector dummy_rvect; + dummy_rvect.reserve(target_w); + for (int i = 0; i < target_w; ++i) { + dummy_rvect.push_back(code.create_tmp_var(TypeDataUnknown::create(), loc, "(never)")[0]); + } + return dummy_rvect; + } + if (target_type == TypeDataNever::create()) { + return {}; + } + + // pass `null` to `T?` + // for primitives like `int?`, no changes in rvect, null occupies the same TVM slot + // for tensors like `(int,int)?`, `null` is represented as N nulls + 1 null flag, insert N nulls + if (t_nullable && original_type == TypeDataNullLiteral::create()) { + tolk_assert(rvect.size() == 1); + if (target_w == 1 && !t_nullable->is_primitive_nullable()) { // `null` to `()?` + rvect = code.create_tmp_var(TypeDataInt::create(), loc, "(NNFlag)"); + code.emplace_back(loc, Op::_IntConst, rvect, td::make_refint(0)); + } + if (target_w > 1) { + FunctionPtr builtin_sym = lookup_global_symbol("__null")->try_as(); + rvect.reserve(target_w + 1); + for (int i = 1; i < target_w - 1; ++i) { + std::vector ith_null = code.create_tmp_var(TypeDataNullLiteral::create(), loc, "(null-literal)"); + code.emplace_back(loc, Op::_Call, ith_null, std::vector{}, builtin_sym); + rvect.push_back(ith_null[0]); + } + std::vector null_flag_ir = code.create_tmp_var(TypeDataInt::create(), loc, "(NNFlag)"); + var_idx_t null_flag_ir_idx = null_flag_ir[0]; + code.emplace_back(loc, Op::_IntConst, std::move(null_flag_ir), td::make_refint(0)); + rvect.push_back(null_flag_ir_idx); + } + return rvect; + } + // pass `T` to `T?` + // for primitives like `int?`, no changes in rvect: `int` and `int?` occupy the same TVM slot (null is represented as NULL TVM value) + // for passing `(int, int)` to `(int, int)?` / `(int, null)` to `(int, (int,int)?)?`, add a null flag equals to 0 + if (t_nullable && !o_nullable) { + if (!t_nullable->is_primitive_nullable()) { + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, original_type, t_nullable->inner, loc); + tolk_assert(target_w == static_cast(rvect.size() + 1)); + std::vector null_flag_ir = code.create_tmp_var(TypeDataInt::create(), loc, "(NNFlag)"); + var_idx_t null_flag_ir_idx = null_flag_ir[0]; + code.emplace_back(loc, Op::_IntConst, std::move(null_flag_ir), td::make_refint(-1)); + rvect.push_back(null_flag_ir_idx); + } + return rvect; + } + // pass `T1?` to `T2?` + // for example, `int8?` to `int16?` + // transition inner types, leaving nullable flag unchanged for tensors + if (t_nullable && o_nullable) { + if (target_w > 1) { + var_idx_t null_flag_ir_idx = rvect.back(); + rvect.pop_back(); + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, o_nullable->inner, t_nullable->inner, loc); + rvect.push_back(null_flag_ir_idx); + } + return rvect; + } + // pass `T?` to `null` + // it may occur due to smart cast, when a `T?` variable is guaranteed to be always null + // (for instance, always-null `(int,int)?` will be represented as 1 TVM NULL value, not 3) + if (target_type == TypeDataNullLiteral::create() && original_type->can_rhs_be_assigned(target_type)) { + tolk_assert(o_nullable || original_type == TypeDataUnknown::create()); + if (o_nullable && !o_nullable->is_primitive_nullable()) { + FunctionPtr builtin_sym = lookup_global_symbol("__null")->try_as(); + rvect = code.create_tmp_var(TypeDataNullLiteral::create(), loc, "(null-literal)"); + code.emplace_back(loc, Op::_Call, rvect, std::vector{}, builtin_sym); + } + return rvect; + } + // pass `T?` to `T` (or, more generally, `T1?` to `T2`) + // it may occur due to operator `!` or smart cast + // for primitives like `int?`, no changes in rvect + // for passing `(int, int)?` to `(int, int)`, drop the null flag from the tail + // for complex scenarios like passing `(int, (int,int)?)?` to `(int, null)`, recurse the call + // (it may occur on `someF(t = (3,null))` when `(3,null)` at first targeted to lhs, but actually its result is rhs) + if (!t_nullable && o_nullable) { + if (!o_nullable->is_primitive_nullable()) { + rvect.pop_back(); + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, original_type->try_as()->inner, target_type, loc); + } + return rvect; + } + // pass `bool` to `int` + // in code, it's done via `as` operator, like `boolVar as int` + // no changes in rvect, boolVar is guaranteed to be -1 or 0 at TVM level + if (target_type == TypeDataInt::create() && original_type == TypeDataBool::create()) { + return rvect; + } + // pass something to `unknown` + // probably, it comes from `_ = rhs`, type of `_` is unknown, it's target_type of rhs + // no changes in rvect + if (target_type == TypeDataUnknown::create()) { + return rvect; + } + // pass `unknown` to something + // probably, it comes from `arg` in exception, it's inferred as `unknown` and could be cast to any value + if (original_type == TypeDataUnknown::create()) { + tolk_assert(rvect.size() == 1); + return rvect; + } + // pass tensor to tensor, e.g. `(1, null)` to `(int, slice?)` / `(1, null)` to `(int, (int,int)?)` + // every element of rhs tensor should be transitioned + if (target_type->try_as() && original_type->try_as()) { + const TypeDataTensor* target_tensor = target_type->try_as(); + const TypeDataTensor* inferred_tensor = original_type->try_as(); + tolk_assert(target_tensor->size() == inferred_tensor->size()); + tolk_assert(inferred_tensor->get_width_on_stack() == static_cast(rvect.size())); + std::vector result_rvect; + result_rvect.reserve(target_w); + int stack_offset = 0; + for (int i = 0; i < inferred_tensor->size(); ++i) { + int ith_w = inferred_tensor->items[i]->get_width_on_stack(); + std::vector rvect_i{rvect.begin() + stack_offset, rvect.begin() + stack_offset + ith_w}; + std::vector result_i = transition_expr_to_runtime_type_impl(std::move(rvect_i), code, inferred_tensor->items[i], target_tensor->items[i], loc); + result_rvect.insert(result_rvect.end(), result_i.begin(), result_i.end()); + stack_offset += ith_w; + } + return result_rvect; + } + // pass tuple to tuple, e.g. `[1, null]` to `[int, int?]` / `[1, null]` to `[int, [int?,int?]?]` + // to changes to rvect, since tuples contain only 1-slot elements + if (target_type->try_as() && original_type->try_as()) { + tolk_assert(target_type->get_width_on_stack() == original_type->get_width_on_stack()); + return rvect; + } + + throw Fatal("unhandled transition_expr_to_runtime_type_impl() combination"); +} + +// invoke the function above only if potentially needed to +// (if an expression is targeted to another type) +#ifndef TOLK_DEBUG +GNU_ATTRIBUTE_ALWAYS_INLINE +#endif +static std::vector transition_to_target_type(std::vector&& rvect, CodeBlob& code, TypePtr target_type, AnyExprV v) { + if (target_type != nullptr && target_type != v->inferred_type) { + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, v->inferred_type, target_type, v->loc); + } + return rvect; +} + +// the second overload of the same function, invoke impl only when original and target differ +#ifndef TOLK_DEBUG +GNU_ATTRIBUTE_ALWAYS_INLINE +#endif +static std::vector transition_to_target_type(std::vector&& rvect, CodeBlob& code, TypePtr original_type, TypePtr target_type, SrcLocation loc) { + if (target_type != original_type) { + rvect = transition_expr_to_runtime_type_impl(std::move(rvect), code, original_type, target_type, loc); + } + return rvect; +} + + +std::vector pre_compile_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValContext* lval_ctx) { + if (GlobalVarPtr glob_ref = sym->try_as()) { + // handle `globalVar = rhs` / `mutate globalVar` + if (lval_ctx && !lval_ctx->is_rval_inside_lval()) { + std::vector lval_ir_idx = code.create_tmp_var(glob_ref->declared_type, loc, "(lval-glob)"); + lval_ctx->capture_global_modification(glob_ref, lval_ir_idx); + return lval_ir_idx; + } + // `globalVar` is used for reading, just create local IR var to represent its value, Op GlobVar will fill it + // note, that global tensors are stored as a tuple an unpacked to N vars on read, N determined by declared_type + std::vector local_ir_idx = code.create_var(glob_ref->declared_type, loc, "g_" + glob_ref->name); + code.emplace_back(loc, Op::_GlobVar, local_ir_idx, std::vector{}, glob_ref); + if (lval_ctx) { // `globalVar.0 = rhs`, globalVar is rval inside lval + lval_ctx->capture_global_modification(glob_ref, local_ir_idx); + } + return local_ir_idx; + } + if (GlobalConstPtr const_ref = sym->try_as()) { if (const_ref->is_int_const()) { std::vector rvect = code.create_tmp_var(TypeDataInt::create(), loc, "(glob-const)"); code.emplace_back(loc, Op::_IntConst, rvect, const_ref->as_int_const()); @@ -398,47 +631,72 @@ static std::vector pre_compile_symbol(SrcLocation loc, const Symbol* return rvect; } } - if (const auto* fun_ref = sym->try_as()) { + if (FunctionPtr fun_ref = sym->try_as()) { std::vector rvect = code.create_tmp_var(fun_ref->inferred_full_type, loc, "(glob-var-fun)"); code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); return rvect; } - if (const auto* var_ref = sym->try_as()) { + if (LocalVarPtr var_ref = sym->try_as()) { #ifdef TOLK_DEBUG - tolk_assert(static_cast(var_ref->ir_idx.size()) == var_ref->declared_type->calc_width_on_stack()); + tolk_assert(static_cast(var_ref->ir_idx.size()) == var_ref->declared_type->get_width_on_stack()); #endif - if (lval_ctx) { - lval_ctx->register_lval(loc, var_ref); - } return var_ref->ir_idx; } throw Fatal("pre_compile_symbol"); } -static std::vector process_assign(V v, CodeBlob& code) { - if (auto lhs_decl = v->get_lhs()->try_as()) { - return pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); +static std::vector process_reference(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + std::vector rvect = pre_compile_symbol(v->loc, v->sym, code, lval_ctx); + + // a local variable might be smart cast at this point, for example we're in `if (v != null)` + // it means that we must drop the null flag (if it's a tensor), or maybe perform other stack transformations + // (from original var_ref->ir_idx to fit smart cast) + if (LocalVarPtr var_ref = v->sym->try_as()) { + // note, inside `if (v != null)` when `v` is used for writing, v->inferred_type is an original (declared_type) + // (smart casts apply only for rvalue, not for lvalue, we don't check it here, it's a property of inferring) + rvect = transition_to_target_type(std::move(rvect), code, var_ref->declared_type, v->inferred_type, v->loc); + } + + return transition_to_target_type(std::move(rvect), code, target_type, v); +} + +static std::vector process_assignment(V v, CodeBlob& code, TypePtr target_type) { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + + if (auto lhs_decl = lhs->try_as()) { + std::vector rvect = pre_compile_let(code, lhs_decl->get_expr(), rhs, v->loc); + return transition_to_target_type(std::move(rvect), code, target_type, v); } else { - return pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); + std::vector rvect = pre_compile_let(code, lhs, rhs, v->loc); + // now rvect contains rhs IR vars constructed to fit lhs (for correct assignment, lhs type was target_type for rhs) + // but the type of `lhs = rhs` is RHS (see type inferring), so rvect now should fit rhs->inferred_type (= v->inferred_type) + // example: `t1 = t2 = null`, we're at `t2 = null`, earlier declared t1: `int?`, t2: `(int,int)?` + // currently "null" matches t2 (3 null slots), but type of this assignment is "plain null" (1 slot) assigned later to t1 + rvect = transition_to_target_type(std::move(rvect), code, lhs->inferred_type, v->inferred_type, v->loc); + return transition_to_target_type(std::move(rvect), code, target_type, v); } } -static std::vector process_set_assign(V v, CodeBlob& code) { +static std::vector process_set_assign(V v, CodeBlob& code, TypePtr target_type) { // for "a += b", emulate "a = a + b" // seems not beautiful, but it works; probably, this transformation should be done at AST level in advance std::string_view calc_operator = v->operator_name; // "+" for operator += auto v_apply = createV(v->loc, calc_operator, static_cast(v->tok - 1), v->get_lhs(), v->get_rhs()); v_apply->assign_inferred_type(v->inferred_type); v_apply->assign_fun_ref(v->fun_ref); - return pre_compile_let(code, v->get_lhs(), v_apply, v->loc); + + std::vector rvect = pre_compile_let(code, v->get_lhs(), v_apply, v->loc); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_binary_operator(V v, CodeBlob& code) { +static std::vector process_binary_operator(V v, CodeBlob& code, TypePtr target_type) { TokenType t = v->tok; if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(binary-op)"); + std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(binary-op)"); + return transition_to_target_type(std::move(rvect), code, target_type, v); } if (t == tok_logical_and || t == tok_logical_or) { // do the following transformations: @@ -450,122 +708,183 @@ static std::vector process_binary_operator(V v, v_1->mutate()->assign_inferred_type(TypeDataInt::create()); auto v_b_ne_0 = createV(v->loc, "!=", tok_neq, v->get_rhs(), v_0); v_b_ne_0->mutate()->assign_inferred_type(TypeDataInt::create()); - v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); - std::vector cond = pre_compile_expr(v->get_lhs(), code); + v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->try_as()); + std::vector cond = pre_compile_expr(v->get_lhs(), code, nullptr); tolk_assert(cond.size() == 1); - std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); + std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(ternary)"); Op& if_op = code.emplace_back(v->loc, Op::_If, cond); code.push_set_cur(if_op.block0); - code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code)); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code, nullptr)); code.close_pop_cur(v->loc); code.push_set_cur(if_op.block1); - code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code)); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code, nullptr)); code.close_pop_cur(v->loc); - return rvect; + return transition_to_target_type(std::move(rvect), code, target_type, v); } throw UnexpectedASTNodeType(v, "process_binary_operator"); } -static std::vector process_unary_operator(V v, CodeBlob& code) { - std::vector args_vars = pre_compile_tensor(code, {v->get_rhs()}); - return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref, "(unary-op)"); +static std::vector process_unary_operator(V v, CodeBlob& code, TypePtr target_type) { + std::vector rhs_vars = pre_compile_expr(v->get_rhs(), code, nullptr); + std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, std::move(rhs_vars), v->fun_ref, "(unary-op)"); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_ternary_operator(V v, CodeBlob& code) { - std::vector cond = pre_compile_expr(v->get_cond(), code); +static std::vector process_ternary_operator(V v, CodeBlob& code, TypePtr target_type) { + std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); tolk_assert(cond.size() == 1); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(cond)"); - Op& if_op = code.emplace_back(v->loc, Op::_If, cond); - code.push_set_cur(if_op.block0); - code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code)); - code.close_pop_cur(v->get_when_true()->loc); - code.push_set_cur(if_op.block1); - code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code)); - code.close_pop_cur(v->get_when_false()->loc); - return rvect; + + if (v->get_cond()->is_always_true) { + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code, v->inferred_type)); + } else if (v->get_cond()->is_always_false) { + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code, v->inferred_type)); + } else { + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code, v->inferred_type)); + code.close_pop_cur(v->get_when_true()->loc); + code.push_set_cur(if_op.block1); + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code, v->inferred_type)); + code.close_pop_cur(v->get_when_false()->loc); + } + + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_dot_access(V v, CodeBlob& code, LValContext* lval_ctx) { +static std::vector process_cast_as_operator(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + TypePtr child_target_type = v->cast_to_type; + std::vector rvect = pre_compile_expr(v->get_expr(), code, child_target_type, lval_ctx); + return transition_to_target_type(std::move(rvect), code, target_type, v); +} + +static std::vector process_not_null_operator(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + TypePtr child_target_type = v->get_expr()->inferred_type; + if (const auto* as_nullable = child_target_type->try_as()) { + child_target_type = as_nullable->inner; + } + std::vector rvect = pre_compile_expr(v->get_expr(), code, child_target_type, lval_ctx); + return transition_to_target_type(std::move(rvect), code, target_type, v); +} + +static std::vector process_is_null_check(V v, CodeBlob& code, TypePtr target_type) { + std::vector expr_ir_idx = pre_compile_expr(v->get_expr(), code, nullptr); + std::vector isnull_ir_idx = code.create_tmp_var(TypeDataBool::create(), v->loc, "(is-null)"); + TypePtr expr_type = v->get_expr()->inferred_type; + + if (const TypeDataNullable* t_nullable = expr_type->try_as()) { + if (!t_nullable->is_primitive_nullable()) { + std::vector zero_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->loc, "(zero)"); + code.emplace_back(v->loc, Op::_IntConst, zero_ir_idx, td::make_refint(0)); + FunctionPtr eq_sym = lookup_global_symbol("_==_")->try_as(); + code.emplace_back(v->loc, Op::_Call, isnull_ir_idx, std::vector{expr_ir_idx.back(), zero_ir_idx[0]}, eq_sym); + } else { + FunctionPtr builtin_sym = lookup_global_symbol("__isNull")->try_as(); + code.emplace_back(v->loc, Op::_Call, isnull_ir_idx, expr_ir_idx, builtin_sym); + } + } else { + bool always_null = expr_type == TypeDataNullLiteral::create(); + code.emplace_back(v->loc, Op::_IntConst, isnull_ir_idx, td::make_refint(always_null ? -1 : 0)); + } + + if (v->is_negated) { + FunctionPtr not_sym = lookup_global_symbol("!b_")->try_as(); + code.emplace_back(v->loc, Op::_Call, isnull_ir_idx, std::vector{isnull_ir_idx}, not_sym); + } + return transition_to_target_type(std::move(isnull_ir_idx), code, target_type, v); +} + +static std::vector process_dot_access(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { // it's NOT a method call `t.tupleSize()` (since such cases are handled by process_function_call) // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) if (!v->is_target_fun_ref()) { TypePtr obj_type = v->get_obj()->inferred_type; int index_at = std::get(v->target); - // `tensorVar.0`; since a tensor of N elems are N vars on a stack actually, calculate offset + // `tensorVar.0` if (const auto* t_tensor = obj_type->try_as()) { - if (lval_ctx) lval_ctx->register_lval(v->loc, v); - if (lval_ctx) lval_ctx->enter_rval_inside_lval(); - std::vector lhs_vars = pre_compile_expr(v->get_obj(), code, lval_ctx); - if (lval_ctx) lval_ctx->exit_rval_inside_lval(); - int stack_width = t_tensor->items[index_at]->calc_width_on_stack(); + // handle `tensorVar.0 = rhs` if tensors is a global, special case, then the global will be read on demand + if (lval_ctx && !lval_ctx->is_rval_inside_lval()) { + if (auto sink = calc_sink_leftmost_obj(v); sink && sink->sym->try_as()) { + std::vector lval_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(lval-global-tensor)"); + lval_ctx->capture_field_of_global_modification(v->get_obj(), index_at, lval_ir_idx); + return lval_ir_idx; + } + } + // since a tensor of N elems are N vars on a stack actually, calculate offset + std::vector lhs_vars = pre_compile_expr(v->get_obj(), code, nullptr, lval_ctx); + int stack_width = t_tensor->items[index_at]->get_width_on_stack(); int stack_offset = 0; for (int i = 0; i < index_at; ++i) { - stack_offset += t_tensor->items[i]->calc_width_on_stack(); + stack_offset += t_tensor->items[i]->get_width_on_stack(); } - return {lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width}; + std::vector rvect{lhs_vars.begin() + stack_offset, lhs_vars.begin() + stack_offset + stack_width}; + // a tensor index might be smart cast at this point, for example we're in `if (t.1 != null)` + // it means that we must drop the null flag (if `t.1` is a tensor), or maybe perform other stack transformations + // (from original rvect = (vars of t.1) to fit smart cast) + rvect = transition_to_target_type(std::move(rvect), code, t_tensor->items[index_at], v->inferred_type, v->loc); + return transition_to_target_type(std::move(rvect), code, target_type, v); } - // `tupleVar.0`; not to mess up, separate rvalue and lvalue cases + // `tupleVar.0` if (obj_type->try_as() || obj_type->try_as()) { - if (!lval_ctx) { - // `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars - std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code); - std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); - code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); - std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); - tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values - const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); - code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); - return field_ir_idx; - } else { - // `tupleVar.0 = rhs`: finally "tupleSetAt(tupleVar, rhs, 0)" will be done - uint64_t hash = lval_ctx->register_lval(v->loc, v); - if (const var_idx_t* field_ir_idx = lval_ctx->exists_already_known_tuple_index(hash)) { - return {*field_ir_idx}; // `(t.0.0, t.0.1) = rhs`, then "t.0" will be read (tupleAt) once - } - lval_ctx->enter_rval_inside_lval(); - std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code, lval_ctx); - lval_ctx->exit_rval_inside_lval(); - std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); - code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); - std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); - if (lval_ctx->is_rval_inside_lval()) { // for `t.0.1 = rhs` "t.0" is rvalue inside lvalue - // for `t.0 = rhs` don't call tupleAt, but for `t.0.1 = rhs` do for t.0 (still don't for t.0.1) - const FunctionData* builtin_sym = lookup_global_symbol("tupleAt")->as(); - code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); - } - lval_ctx->register_modified_tuple_index(hash, tuple_ir_idx[0], index_ir_idx[0], field_ir_idx[0]); - vars_modification_watcher.trigger_callbacks(tuple_ir_idx, v->loc); - return field_ir_idx; + // handle `tupleVar.0 = rhs`, "0 SETINDEX" will be called when this was is modified + if (lval_ctx && !lval_ctx->is_rval_inside_lval() && calc_sink_leftmost_obj(v)) { + std::vector lval_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(lval-tuple-field)"); + lval_ctx->capture_tuple_index_modification(v->get_obj(), index_at, lval_ir_idx); + return lval_ir_idx; } + // `tupleVar.0` as rvalue: the same as "tupleAt(tupleVar, 0)" written in terms of IR vars + std::vector tuple_ir_idx = pre_compile_expr(v->get_obj(), code); + std::vector index_ir_idx = code.create_tmp_var(TypeDataInt::create(), v->get_identifier()->loc, "(tuple-idx)"); + code.emplace_back(v->loc, Op::_IntConst, index_ir_idx, td::make_refint(index_at)); + std::vector field_ir_idx = code.create_tmp_var(v->inferred_type, v->loc, "(tuple-field)"); + tolk_assert(tuple_ir_idx.size() == 1 && field_ir_idx.size() == 1); // tuples contain only 1-slot values + FunctionPtr builtin_sym = lookup_global_symbol("tupleAt")->try_as(); + code.emplace_back(v->loc, Op::_Call, field_ir_idx, std::vector{tuple_ir_idx[0], index_ir_idx[0]}, builtin_sym); + if (lval_ctx && calc_sink_leftmost_obj(v)) { // `tupleVar.0.1 = rhs`, then `tupleVar.0` is rval inside lval + lval_ctx->capture_tuple_index_modification(v->get_obj(), index_at, field_ir_idx); + } + // like tensor index, `tupleVar.1` also might be smart cast, for example we're in `if (tupleVar.1 != null)` + // but since tuple's elements are only 1-slot width (no tensors and unions), no stack transformations required + return transition_to_target_type(std::move(field_ir_idx), code, target_type, v); } tolk_assert(false); } // okay, v->target refs a function, like `obj.method`, filled at type inferring // (currently, nothing except a global function can be referenced, no object-scope methods exist) - const FunctionData* fun_ref = std::get(v->target); + FunctionPtr fun_ref = std::get(v->target); tolk_assert(fun_ref); - return pre_compile_symbol(v->loc, fun_ref, code, lval_ctx); + std::vector rvect = pre_compile_symbol(v->loc, fun_ref, code, lval_ctx); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_function_call(V v, CodeBlob& code) { +static std::vector process_function_call(V v, CodeBlob& code, TypePtr target_type) { // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` - const FunctionData* fun_ref = v->fun_maybe; + FunctionPtr fun_ref = v->fun_maybe; if (!fun_ref) { + // it's `local_var(args)`, treat args like a tensor: + // 1) when variables are modified like `local_var(x, x += 2, x)`, regular mechanism of watching automatically works + // 2) when `null` is passed to `(int, int)?`, or any other type transitions, it automatically works std::vector args; args.reserve(v->get_num_args()); for (int i = 0; i < v->get_num_args(); ++i) { args.push_back(v->get_arg(i)->get_expr()); } - std::vector args_vars = pre_compile_tensor(code, args); - std::vector tfunc = pre_compile_expr(v->get_callee(), code); + std::vector params_types = v->get_callee()->inferred_type->try_as()->params_types; + const TypeDataTensor* tensor_tt = TypeDataTensor::create(std::move(params_types))->try_as(); + std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, tensor_tt, nullptr); + std::vector args_vars; + for (const std::vector& list : vars_per_arg) { + args_vars.insert(args_vars.end(), list.cbegin(), list.cend()); + } + std::vector tfunc = pre_compile_expr(v->get_callee(), code, nullptr); tolk_assert(tfunc.size() == 1); args_vars.push_back(tfunc[0]); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(call-ind)"); Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars)); op.set_impure_flag(); - return rvect; + return transition_to_target_type(std::move(rvect), code, target_type, v); } int delta_self = v->is_dot_call(); @@ -582,7 +901,11 @@ static std::vector process_function_call(V v, Code for (int i = 0; i < v->get_num_args(); ++i) { args.push_back(v->get_arg(i)->get_expr()); } - std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, nullptr); + // the purpose of tensor_tt ("tensor target type") is to transition `null` to `(int, int)?` and so on + // the purpose of calling `pre_compile_tensor_inner` is to have 0-th IR vars to handle return self + std::vector params_types = fun_ref->inferred_full_type->try_as()->params_types; + const TypeDataTensor* tensor_tt = TypeDataTensor::create(std::move(params_types))->try_as(); + std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, tensor_tt, nullptr); TypePtr op_call_type = v->inferred_type; TypePtr real_ret_type = v->inferred_type; @@ -596,7 +919,7 @@ static std::vector process_function_call(V v, Code std::vector types_list; for (int i = 0; i < delta_self + v->get_num_args(); ++i) { if (fun_ref->parameters[i].is_mutate_parameter()) { - types_list.push_back(args[i]->inferred_type); + types_list.push_back(fun_ref->parameters[i].declared_type); } } types_list.push_back(real_ret_type); @@ -617,7 +940,7 @@ static std::vector process_function_call(V v, Code AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[i]; tolk_assert(arg_i->is_lvalue || i == 0); if (arg_i->is_lvalue) { - std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_lval); + std::vector ith_var_idx = pre_compile_expr(arg_i, code, nullptr, &local_lval); left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); } else { left.insert(left.end(), vars_per_arg[0].begin(), vars_per_arg[0].end()); @@ -627,43 +950,46 @@ static std::vector process_function_call(V v, Code std::vector rvect = code.create_tmp_var(real_ret_type, v->loc, "(fun-call)"); left.insert(left.end(), rvect.begin(), rvect.end()); vars_modification_watcher.trigger_callbacks(left, v->loc); - code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); - local_lval.gen_ops_if_nonempty(code, v->loc); + code.emplace_back(v->loc, Op::_Let, left, rvect_apply); + local_lval.after_let(std::move(left), code, v->loc); rvect_apply = rvect; } if (obj_leftmost && fun_ref->does_return_self()) { if (obj_leftmost->is_lvalue) { // to handle if obj is global var, potentially re-assigned inside a chain - rvect_apply = pre_compile_expr(obj_leftmost, code); + rvect_apply = pre_compile_expr(obj_leftmost, code, nullptr); } else { // temporary object, not lvalue, pre_compile_expr rvect_apply = vars_per_arg[0]; } } - return rvect_apply; + return transition_to_target_type(std::move(rvect_apply), code, target_type, v); } -static std::vector process_tensor(V v, CodeBlob& code, LValContext* lval_ctx) { - return pre_compile_tensor(code, v->get_items(), lval_ctx); +static std::vector process_tensor(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { + // tensor is compiled "as is", for example `(1, null)` occupies 2 slots + // and if assigned/passed to something other, like `(int, (int,int)?)`, a whole tensor is transitioned, it works + std::vector rvect = pre_compile_tensor(code, v->get_items(), lval_ctx); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_typed_tuple(V v, CodeBlob& code, LValContext* lval_ctx) { +static std::vector process_typed_tuple(V v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { if (lval_ctx) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work v->error("[...] can not be used as lvalue here"); } std::vector left = code.create_tmp_var(v->inferred_type, v->loc, "(pack-tuple)"); std::vector right = pre_compile_tensor(code, v->get_items(), lval_ctx); code.emplace_back(v->loc, Op::_Tuple, left, std::move(right)); - return left; + return transition_to_target_type(std::move(left), code, target_type, v); } -static std::vector process_int_const(V v, CodeBlob& code) { +static std::vector process_int_const(V v, CodeBlob& code, TypePtr target_type) { std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(int-const)"); code.emplace_back(v->loc, Op::_IntConst, rvect, v->intval); - return rvect; + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_string_const(V v, CodeBlob& code) { +static std::vector process_string_const(V v, CodeBlob& code, TypePtr target_type) { ConstantValue value = eval_const_init_value(v); std::vector rvect = code.create_tmp_var(v->inferred_type, v->loc, "(str-const)"); if (value.is_int()) { @@ -671,27 +997,31 @@ static std::vector process_string_const(V v, CodeBl } else { code.emplace_back(v->loc, Op::_SliceConst, rvect, value.as_slice()); } - return rvect; + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_bool_const(V v, CodeBlob& code) { - const FunctionData* builtin_sym = lookup_global_symbol(v->bool_val ? "__true" : "__false")->as(); - return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(bool-const)"); +static std::vector process_bool_const(V v, CodeBlob& code, TypePtr target_type) { + FunctionPtr builtin_sym = lookup_global_symbol(v->bool_val ? "__true" : "__false")->try_as(); + std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(bool-const)"); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_null_keyword(V v, CodeBlob& code) { - const FunctionData* builtin_sym = lookup_global_symbol("__null")->as(); - return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(null-literal)"); +static std::vector process_null_keyword(V v, CodeBlob& code, TypePtr target_type) { + FunctionPtr builtin_sym = lookup_global_symbol("__null")->try_as(); + std::vector rvect = gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym, "(null-literal)"); + return transition_to_target_type(std::move(rvect), code, target_type, v); } -static std::vector process_local_var(V v, CodeBlob& code) { +static std::vector process_local_var(V v, CodeBlob& code, TypePtr target_type) { if (v->marked_as_redef) { - return pre_compile_symbol(v->loc, v->var_ref, code, nullptr); + std::vector rvect = pre_compile_symbol(v->loc, v->var_ref, code, nullptr); + return transition_to_target_type(std::move(rvect), code, target_type, v); } tolk_assert(v->var_ref->ir_idx.empty()); v->var_ref->mutate()->assign_ir_idx(code.create_var(v->inferred_type, v->loc, v->var_ref->name)); - return v->var_ref->ir_idx; + std::vector rvect = v->var_ref->ir_idx; + return transition_to_target_type(std::move(rvect), code, target_type, v); } static std::vector process_local_vars_declaration(V, CodeBlob&) { @@ -705,42 +1035,46 @@ static std::vector process_underscore(V v, CodeBlob& return code.create_tmp_var(v->inferred_type, v->loc, "(underscore)"); } -std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValContext* lval_ctx) { +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, TypePtr target_type, LValContext* lval_ctx) { switch (v->type) { case ast_reference: - return pre_compile_symbol(v->loc, v->as()->sym, code, lval_ctx); + return process_reference(v->as(), code, target_type, lval_ctx); case ast_assign: - return process_assign(v->as(), code); + return process_assignment(v->as(), code, target_type); case ast_set_assign: - return process_set_assign(v->as(), code); + return process_set_assign(v->as(), code, target_type); case ast_binary_operator: - return process_binary_operator(v->as(), code); + return process_binary_operator(v->as(), code, target_type); case ast_unary_operator: - return process_unary_operator(v->as(), code); + return process_unary_operator(v->as(), code, target_type); case ast_ternary_operator: - return process_ternary_operator(v->as(), code); + return process_ternary_operator(v->as(), code, target_type); case ast_cast_as_operator: - return pre_compile_expr(v->as()->get_expr(), code, lval_ctx); + return process_cast_as_operator(v->as(), code, target_type, lval_ctx); + case ast_not_null_operator: + return process_not_null_operator(v->as(), code, target_type, lval_ctx); + case ast_is_null_check: + return process_is_null_check(v->as(), code, target_type); case ast_dot_access: - return process_dot_access(v->as(), code, lval_ctx); + return process_dot_access(v->as(), code, target_type, lval_ctx); case ast_function_call: - return process_function_call(v->as(), code); + return process_function_call(v->as(), code, target_type); case ast_parenthesized_expression: - return pre_compile_expr(v->as()->get_expr(), code, lval_ctx); + return pre_compile_expr(v->as()->get_expr(), code, target_type, lval_ctx); case ast_tensor: - return process_tensor(v->as(), code, lval_ctx); + return process_tensor(v->as(), code, target_type, lval_ctx); case ast_typed_tuple: - return process_typed_tuple(v->as(), code, lval_ctx); + return process_typed_tuple(v->as(), code, target_type, lval_ctx); case ast_int_const: - return process_int_const(v->as(), code); + return process_int_const(v->as(), code, target_type); case ast_string_const: - return process_string_const(v->as(), code); + return process_string_const(v->as(), code, target_type); case ast_bool_const: - return process_bool_const(v->as(), code); + return process_bool_const(v->as(), code, target_type); case ast_null_keyword: - return process_null_keyword(v->as(), code); + return process_null_keyword(v->as(), code, target_type); case ast_local_var_lhs: - return process_local_var(v->as(), code); + return process_local_var(v->as(), code, target_type); case ast_local_vars_declaration: return process_local_vars_declaration(v->as(), code); case ast_underscore: @@ -771,14 +1105,14 @@ static void process_assert_statement(V v, CodeBlob& code) args[2]->mutate()->assign_inferred_type(TypeDataInt::create()); } - const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as(); + FunctionPtr builtin_sym = lookup_global_symbol("__throw_if_unless")->try_as(); std::vector args_vars = pre_compile_tensor(code, args); gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)"); } static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { if (auto v_ref = v_catch_var->try_as(); v_ref && v_ref->sym) { // not underscore - const LocalVarData* var_ref = v_ref->sym->as(); + LocalVarPtr var_ref = v_ref->sym->try_as(); tolk_assert(var_ref->ir_idx.empty()); var_ref->mutate()->assign_ir_idx(code.create_var(v_catch_var->inferred_type, v_catch_var->loc, var_ref->name)); } @@ -803,7 +1137,7 @@ static void process_try_catch_statement(V v, CodeBlob& } static void process_repeat_statement(V v, CodeBlob& code) { - std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); + std::vector tmp_vars = pre_compile_expr(v->get_cond(), code, nullptr); Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); process_any_statement(v->get_body(), code); @@ -811,8 +1145,19 @@ static void process_repeat_statement(V v, CodeBlob& code) } static void process_if_statement(V v, CodeBlob& code) { - std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); - Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars)); + std::vector cond = pre_compile_expr(v->get_cond(), code, nullptr); + tolk_assert(cond.size() == 1); + + if (v->get_cond()->is_always_true) { + process_any_statement(v->get_if_body(), code); // v->is_ifnot does not matter here + return; + } + if (v->get_cond()->is_always_false) { + process_any_statement(v->get_else_body(), code); + return; + } + + Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(cond)); code.push_set_cur(if_op.block0); process_any_statement(v->get_if_body(), code); code.close_pop_cur(v->get_if_body()->loc_end); @@ -856,19 +1201,21 @@ static void process_do_while_statement(V v, CodeBlob& co } until_cond->mutate()->assign_inferred_type(TypeDataInt::create()); if (auto v_bin = until_cond->try_as(); v_bin && !v_bin->fun_ref) { - v_bin->mutate()->assign_fun_ref(lookup_global_symbol("_" + static_cast(v_bin->operator_name) + "_")->as()); + v_bin->mutate()->assign_fun_ref(lookup_global_symbol("_" + static_cast(v_bin->operator_name) + "_")->try_as()); } else if (auto v_un = until_cond->try_as(); v_un && !v_un->fun_ref) { - v_un->mutate()->assign_fun_ref(lookup_global_symbol(static_cast(v_un->operator_name) + "_")->as()); + v_un->mutate()->assign_fun_ref(lookup_global_symbol(static_cast(v_un->operator_name) + "_")->try_as()); } - until_op.left = pre_compile_expr(until_cond, code); + until_op.left = pre_compile_expr(until_cond, code, nullptr); + tolk_assert(until_op.left.size() == 1); code.close_pop_cur(v->get_body()->loc_end); } static void process_while_statement(V v, CodeBlob& code) { Op& while_op = code.emplace_back(v->loc, Op::_While); code.push_set_cur(while_op.block0); - while_op.left = pre_compile_expr(v->get_cond(), code); + while_op.left = pre_compile_expr(v->get_cond(), code, nullptr); + tolk_assert(while_op.left.size() == 1); code.close_pop_cur(v->get_body()->loc); code.push_set_cur(while_op.block1); process_any_statement(v->get_body(), code); @@ -877,18 +1224,25 @@ static void process_while_statement(V v, CodeBlob& code) { static void process_throw_statement(V v, CodeBlob& code) { if (v->has_thrown_arg()) { - const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as(); + FunctionPtr builtin_sym = lookup_global_symbol("__throw_arg")->try_as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()}); gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)"); } else { - const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as(); + FunctionPtr builtin_sym = lookup_global_symbol("__throw")->try_as(); std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_code()}); gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym, "(throw-call)"); } } static void process_return_statement(V v, CodeBlob& code) { - std::vector return_vars = v->has_return_value() ? pre_compile_expr(v->get_return_value(), code) : std::vector{}; + std::vector return_vars; + if (v->has_return_value()) { + TypePtr child_target_type = code.fun_ref->inferred_return_type; + if (code.fun_ref->does_return_self()) { + child_target_type = code.fun_ref->parameters[0].declared_type; + } + return_vars = pre_compile_expr(v->get_return_value(), code, child_target_type); + } if (code.fun_ref->does_return_self()) { return_vars = {}; } @@ -904,6 +1258,10 @@ static void process_return_statement(V v, CodeBlob& code) code.emplace_back(v->loc, Op::_Return, std::move(return_vars)); } +// append "return" (void) to the end of the function +// if it's not reachable, it will be dropped +// (IR cfg reachability may differ from FlowContext in case of "never" types, so there may be situations, +// when IR will consider this "return" reachable and leave it, but actually execution will never reach it) static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code) { std::vector mutated_vars; if (code.fun_ref->has_mutate_params()) { @@ -940,18 +1298,18 @@ void process_any_statement(AnyV v, CodeBlob& code) { case ast_empty_statement: return; default: - pre_compile_expr(reinterpret_cast(v), code); + pre_compile_expr(reinterpret_cast(v), code, nullptr); } } -static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, FunctionBodyCode* code_body) { +static void convert_function_body_to_CodeBlob(FunctionPtr fun_ref, FunctionBodyCode* code_body) { auto v_body = fun_ref->ast_root->as()->get_body()->as(); CodeBlob* blob = new CodeBlob{fun_ref->name, fun_ref->loc, fun_ref}; std::vector rvect_import; int total_arg_width = 0; for (int i = 0; i < fun_ref->get_num_params(); ++i) { - total_arg_width += fun_ref->parameters[i].declared_type->calc_width_on_stack(); + total_arg_width += fun_ref->parameters[i].declared_type->get_width_on_stack(); } rvect_import.reserve(total_arg_width); @@ -968,18 +1326,16 @@ static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, Funct for (AnyV item : v_body->get_items()) { process_any_statement(item, *blob); } - if (fun_ref->is_implicit_return()) { - append_implicit_return_statement(v_body->loc_end, *blob); - } + append_implicit_return_statement(v_body->loc_end, *blob); blob->close_blk(v_body->loc_end); code_body->set_code(blob); tolk_assert(vars_modification_watcher.empty()); } -static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyAsm* asm_body) { +static void convert_asm_body_to_AsmOp(FunctionPtr fun_ref, FunctionBodyAsm* asm_body) { int cnt = fun_ref->get_num_params(); - int width = fun_ref->inferred_return_type->calc_width_on_stack(); + int width = fun_ref->inferred_return_type->get_width_on_stack(); std::vector asm_ops; for (AnyV v_child : fun_ref->ast_root->as()->get_body()->as()->get_asm_commands()) { std::string_view ops = v_child->as()->str_val; // \n\n... @@ -1010,15 +1366,15 @@ static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyA class UpdateArgRetOrderConsideringStackWidth final { public: - static bool should_visit_function(const FunctionData* fun_ref) { + static bool should_visit_function(FunctionPtr fun_ref) { return !fun_ref->is_generic_function() && (!fun_ref->ret_order.empty() || !fun_ref->arg_order.empty()); } - static void start_visiting_function(const FunctionData* fun_ref, V v_function) { + static void start_visiting_function(FunctionPtr fun_ref, V v_function) { int total_arg_mutate_width = 0; bool has_arg_width_not_1 = false; for (const LocalVarData& param : fun_ref->parameters) { - int arg_width = param.declared_type->calc_width_on_stack(); + int arg_width = param.declared_type->get_width_on_stack(); has_arg_width_not_1 |= arg_width != 1; total_arg_mutate_width += param.is_mutate_parameter() * arg_width; } @@ -1032,7 +1388,7 @@ public: cum_arg_width.reserve(1 + fun_ref->get_num_params()); cum_arg_width.push_back(0); for (const LocalVarData& param : fun_ref->parameters) { - cum_arg_width.push_back(total_arg_width += param.declared_type->calc_width_on_stack()); + cum_arg_width.push_back(total_arg_width += param.declared_type->get_width_on_stack()); } std::vector arg_order; for (int i = 0; i < fun_ref->get_num_params(); ++i) { @@ -1049,7 +1405,7 @@ public: // ret_order is a shuffled range 0...N // validate N: a function should return value and mutated arguments onto a stack if (!fun_ref->ret_order.empty()) { - size_t expected_width = fun_ref->inferred_return_type->calc_width_on_stack() + total_arg_mutate_width; + size_t expected_width = fun_ref->inferred_return_type->get_width_on_stack() + total_arg_mutate_width; if (expected_width != fun_ref->ret_order.size()) { v_function->get_body()->error("ret_order (after ->) expected to contain " + std::to_string(expected_width) + " numbers"); } @@ -1059,11 +1415,11 @@ public: class ConvertASTToLegacyOpVisitor final { public: - static bool should_visit_function(const FunctionData* fun_ref) { + static bool should_visit_function(FunctionPtr fun_ref) { return !fun_ref->is_generic_function(); } - static void start_visiting_function(const FunctionData* fun_ref, V) { + static void start_visiting_function(FunctionPtr fun_ref, V) { tolk_assert(fun_ref->is_type_inferring_done()); if (fun_ref->is_code_function()) { convert_function_body_to_CodeBlob(fun_ref, std::get(fun_ref->body)); diff --git a/tolk/pipe-calc-rvalue-lvalue.cpp b/tolk/pipe-calc-rvalue-lvalue.cpp index 041aec89..1f374bc8 100644 --- a/tolk/pipe-calc-rvalue-lvalue.cpp +++ b/tolk/pipe-calc-rvalue-lvalue.cpp @@ -177,6 +177,18 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v->get_expr()); // leave lvalue state unchanged, for `mutate (t.0 as int)` both `t.0 as int` and `t.0` are lvalue } + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v->get_expr()); // leave lvalue state unchanged, for `mutate x!` both `x!` and `x` are lvalue + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v->get_expr()); + restore_state(saved); + } + void visit(V v) override { tolk_assert(cur_state == MarkingState::LValue); mark_vertex_cur_or_rvalue(v); @@ -198,7 +210,7 @@ class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; @@ -207,7 +219,7 @@ void pipeline_calculate_rvalue_lvalue() { visit_ast_of_all_functions(); } -void pipeline_calculate_rvalue_lvalue(const FunctionData* fun_ref) { +void pipeline_calculate_rvalue_lvalue(FunctionPtr fun_ref) { CalculateRvalueLvalueVisitor visitor; if (visitor.should_visit_function(fun_ref)) { visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); diff --git a/tolk/pipe-check-inferred-types.cpp b/tolk/pipe-check-inferred-types.cpp new file mode 100644 index 00000000..bae67c5f --- /dev/null +++ b/tolk/pipe-check-inferred-types.cpp @@ -0,0 +1,586 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" +#include "type-system.h" + +namespace tolk { + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(TypePtr type) { + return "`" + type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(AnyExprV v_with_type) { + return "`" + v_with_type->inferred_type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string expression_as_string(AnyExprV v) { + if (auto v_ref = v->try_as()) { + if (v_ref->sym->try_as() || v_ref->sym->try_as()) { + return "variable `" + static_cast(v_ref->get_identifier()->name) + "`"; + } + } + if (auto v_par = v->try_as()) { + return expression_as_string(v_par->get_expr()); + } + return "expression"; +} + +// fire a general "type mismatch" error, just a wrapper over `throw` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire(FunctionPtr cur_f, SrcLocation loc, const std::string& message) { + throw ParseError(cur_f, loc, message); +} + +// fire an error on `!cell` / `+slice` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(FunctionPtr cur_f, SrcLocation loc, std::string_view operator_name, AnyExprV unary_expr) { + std::string op = static_cast(operator_name); + fire(cur_f, loc, "can not apply operator `" + op + "` to " + to_string(unary_expr->inferred_type)); +} + +// fire an error on `int + cell` / `slice & int` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(FunctionPtr cur_f, SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { + std::string op = static_cast(operator_name); + fire(cur_f, loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type)); +} + +GNU_ATTRIBUTE_NOINLINE +static void warning_condition_always_true_or_false(FunctionPtr cur_f, SrcLocation loc, AnyExprV cond, const char* operator_name) { + loc.show_warning("condition of " + static_cast(operator_name) + " is always " + (cond->is_always_true ? "true" : "false")); +} + +// given `f(x: int)` and a call `f(expr)`, check that expr_type is assignable to `int` +static void check_function_argument_passed(FunctionPtr cur_f, TypePtr param_type, AnyExprV ith_arg, bool is_obj_of_dot_call) { + if (!param_type->can_rhs_be_assigned(ith_arg->inferred_type)) { + if (is_obj_of_dot_call) { + fire(cur_f, ith_arg->loc, "can not call method for " + to_string(param_type) + " with object of type " + to_string(ith_arg)); + } else { + fire(cur_f, ith_arg->loc, "can not pass " + to_string(ith_arg) + " to " + to_string(param_type)); + } + } +} + +// given `f(x: mutate int?)` and a call `f(expr)`, check that `int?` is assignable to expr_type +// (for instance, can't call `f(mutate intVal)`, since f can potentially assign null to it) +static void check_function_argument_mutate_back(FunctionPtr cur_f, TypePtr param_type, AnyExprV ith_arg, bool is_obj_of_dot_call) { + if (!ith_arg->inferred_type->can_rhs_be_assigned(param_type)) { + if (is_obj_of_dot_call) { + fire(cur_f, ith_arg->loc,"can not call method for mutate " + to_string(param_type) + " with object of type " + to_string(ith_arg) + ", because mutation is not type compatible"); + } else { + fire(cur_f, ith_arg->loc,"can not pass " + to_string(ith_arg) + " to mutate " + to_string(param_type) + ", because mutation is not type compatible"); + } + } +} + +// fire an error on `var n = null` +// technically it's correct, type of `n` is TypeDataNullLiteral, but it's not what the user wanted +// so, it's better to see an error on assignment, that later, on `n` usage and types mismatch +// (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_assign_always_null_to_variable(FunctionPtr cur_f, SrcLocation loc, LocalVarPtr assigned_var, bool is_assigned_null_literal) { + std::string var_name = assigned_var->name; + fire(cur_f, loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); +} + +// fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_put_non1_stack_width_arg_to_tuple(FunctionPtr cur_f, SrcLocation loc, TypePtr inferred_type) { + fire(cur_f, loc, "a tuple can not have " + to_string(inferred_type) + " inside, because it occupies " + std::to_string(inferred_type->get_width_on_stack()) + " stack slots in TVM, not 1"); +} + +// handle __expect_type(expr, "type") call +// this is used in compiler tests +GNU_ATTRIBUTE_NOINLINE GNU_ATTRIBUTE_COLD +static void handle_possible_compiler_internal_call(FunctionPtr cur_f, V v) { + FunctionPtr fun_ref = v->fun_maybe; + tolk_assert(fun_ref && fun_ref->is_builtin_function()); + + if (fun_ref->name == "__expect_type") { + tolk_assert(v->get_num_args() == 2); + TypePtr expected_type = parse_type_from_string(v->get_arg(1)->get_expr()->as()->str_val); + TypePtr expr_type = v->get_arg(0)->inferred_type; + if (expected_type != expr_type) { + fire(cur_f, v->loc, "__expect_type failed: expected " + to_string(expected_type) + ", got " + to_string(expr_type)); + } + } +} + +static bool expect_integer(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataInt::create(); +} + +static bool expect_boolean(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataBool::create(); +} + + +class CheckInferredTypesVisitor final : public ASTVisitorFunctionBody { + FunctionPtr cur_f = nullptr; // may be nullptr if checking `const a = ...` init_value + +protected: + void visit(V v) override { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + parent::visit(lhs); + parent::visit(rhs); + + // all operators (+=, etc.) can work for integers (if both sides are integers) + bool types_ok = expect_integer(lhs) && expect_integer(rhs); + // bitwise operators &= |= ^= are "overloaded" for booleans also (if both sides are booleans) + if (!types_ok && (v->tok == tok_set_bitwise_and || v->tok == tok_set_bitwise_or || v->tok == tok_set_bitwise_xor)) { + types_ok = expect_boolean(lhs) && expect_boolean(rhs); + } + // using += for other types (e.g. `tensorVar += tensorVar`) is not allowed + if (!types_ok) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + } + + void visit(V v) override { + AnyExprV rhs = v->get_rhs(); + parent::visit(rhs); + + switch (v->tok) { + case tok_logical_not: + if (!expect_integer(rhs) && !expect_boolean(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, rhs); + } + break; + default: + if (!expect_integer(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, rhs); + } + } + } + + void visit(V v) override { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + parent::visit(lhs); + parent::visit(rhs); + + switch (v->tok) { + // == != can compare both integers and booleans, (int == bool) is NOT allowed + // note, that `int?` and `int?` can't be compared, since Fift `EQUAL` works with integers only + // (if to allow `int?` in the future, `==` must be expressed in a complicated Fift code considering TVM NULL) + case tok_eq: + case tok_neq: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + if (lhs->inferred_type == rhs->inferred_type) { // compare slice with slice, int? with int? + fire(cur_f, v->loc, "type " + to_string(lhs) + " can not be compared with `== !=`"); + } else { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + } + break; + } + // < > can compare only strict integers + case tok_lt: + case tok_gt: + case tok_leq: + case tok_geq: + case tok_spaceship: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + break; + // & | ^ are "overloaded" both for integers and booleans, (int & bool) is NOT allowed + case tok_bitwise_and: + case tok_bitwise_or: + case tok_bitwise_xor: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + break; + } + // && || can work with integers and booleans, (int && bool) is allowed, (int16 && int32) also + case tok_logical_and: + case tok_logical_or: { + bool lhs_ok = expect_integer(lhs) || expect_boolean(lhs); + bool rhs_ok = expect_integer(rhs) || expect_boolean(rhs); + if (!lhs_ok || !rhs_ok) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + break; + } + // others are mathematical: + * ... + default: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(cur_f, v->loc, v->operator_name, lhs, rhs); + } + } + } + + void visit(V v) override { + parent::visit(v->get_expr()); + + if (!v->get_expr()->inferred_type->can_be_casted_with_as_operator(v->cast_to_type)) { + fire(cur_f, v->loc, "type " + to_string(v->get_expr()) + " can not be cast to " + to_string(v->cast_to_type)); + } + } + + void visit(V v) override { + parent::visit(v->get_expr()); + + if (v->get_expr()->inferred_type == TypeDataNullLiteral::create()) { + // operator `!` used for always-null (proven by smart casts, for example), it's an error + fire(cur_f, v->loc, "operator `!` used for always null expression"); + } + // if operator `!` used for non-nullable, probably a warning should be printed + } + + void visit(V v) override { + parent::visit(v->get_expr()); + + if ((v->is_always_true && !v->is_negated) || (v->is_always_false && v->is_negated)) { + v->loc.show_warning(expression_as_string(v->get_expr()) + " is always null, this condition is always " + (v->is_always_true ? "true" : "false")); + } + if ((v->is_always_false && !v->is_negated) || (v->is_always_true && v->is_negated)) { + v->loc.show_warning(expression_as_string(v->get_expr()) + " of type " + to_string(v->get_expr()) + " is always not null, this condition is always " + (v->is_always_true ? "true" : "false")); + } + } + + void visit(V v) override { + parent::visit(v); + + for (int i = 0; i < v->size(); ++i) { + AnyExprV item = v->get_item(i); + if (item->inferred_type->get_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(cur_f, v->get_item(i)->loc, item->inferred_type); + } + } + } + + void visit(V v) override { + parent::visit(v); + + TypePtr obj_type = v->get_obj()->inferred_type; + if (v->is_target_indexed_access()) { + if (obj_type->try_as() && v->inferred_type->get_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(cur_f, v->loc, v->inferred_type); + } + } + } + + void visit(V v) override { + parent::visit(v); // check against type mismatch inside nested arguments + + FunctionPtr fun_ref = v->fun_maybe; + if (!fun_ref) { + // `local_var(args)` and similar + const TypeDataFunCallable* f_callable = v->get_callee()->inferred_type->try_as(); + tolk_assert(f_callable && f_callable->params_size() == v->get_num_args()); + for (int i = 0; i < v->get_num_args(); ++i) { + auto arg_i = v->get_arg(i)->get_expr(); + TypePtr param_type = f_callable->params_types[i]; + if (!param_type->can_rhs_be_assigned(arg_i->inferred_type)) { + fire(cur_f, arg_i->loc, "can not pass " + to_string(arg_i) + " to " + to_string(param_type)); + } + } + return; + } + + // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) + int delta_self = 0; + AnyExprV dot_obj = nullptr; + if (auto v_dot = v->get_callee()->try_as()) { + delta_self = 1; + dot_obj = v_dot->get_obj(); + } + + if (dot_obj) { + const LocalVarData& param_0 = fun_ref->parameters[0]; + TypePtr param_type = param_0.declared_type; + check_function_argument_passed(cur_f, param_type, dot_obj, true); + if (param_0.is_mutate_parameter()) { + check_function_argument_mutate_back(cur_f, param_type, dot_obj, true); + } + } + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& param_i = fun_ref->parameters[delta_self + i]; + AnyExprV arg_i = v->get_arg(i)->get_expr(); + TypePtr param_type = param_i.declared_type; + check_function_argument_passed(cur_f, param_type, arg_i, false); + if (param_i.is_mutate_parameter()) { + check_function_argument_mutate_back(cur_f, param_type, arg_i, false); + } + } + + if (fun_ref->is_builtin_function() && fun_ref->name[0] == '_') { + handle_possible_compiler_internal_call(cur_f, v); + } + } + + void visit(V v) override { + parent::visit(v->get_lhs()); + parent::visit(v->get_rhs()); + + process_assignment_lhs(v->get_lhs(), v->get_rhs()->inferred_type, v->get_rhs()); + } + + // handle (and dig recursively) into `var lhs = rhs` + // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` + // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) + // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 + void process_assignment_lhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; + + // `var ... = rhs` - dig into left part + if (auto lhs_decl = lhs->try_as()) { + process_assignment_lhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); + return; + } + + // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") + if (auto lhs_var = lhs->try_as()) { + TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr) + if (lhs_var->marked_as_redef) { + tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type); + declared_type = lhs_var->var_ref->declared_type; + } + if (declared_type) { + if (!declared_type->can_rhs_be_assigned(rhs_type)) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); + } + } else { + if (rhs_type == TypeDataNullLiteral::create()) { + fire_error_assign_always_null_to_variable(cur_f, err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); + } + } + return; + } + + // `(v1, v2) = rhs` / `var (v1, v2) = rhs` (rhs may be `(1,2)` or `tensorVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tensor + if (auto lhs_tensor = lhs->try_as()) { + const TypeDataTensor* rhs_type_tensor = rhs_type->try_as(); + if (!rhs_type_tensor) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to a tensor"); + } + if (lhs_tensor->size() != rhs_type_tensor->size()) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + for (int i = 0; i < lhs_tensor->size(); ++i) { + process_assignment_lhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); + } + return; + } + + // `[v1, v2] = rhs` / `var [v1, v2] = rhs` (rhs may be `[1,2]` or `tupleVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tuple + if (auto lhs_tuple = lhs->try_as()) { + const TypeDataTypedTuple* rhs_type_tuple = rhs_type->try_as(); + if (!rhs_type_tuple) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to a tuple"); + } + if (lhs_tuple->size() != rhs_type_tuple->size()) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + for (int i = 0; i < lhs_tuple->size(); ++i) { + process_assignment_lhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); + } + return; + } + + // check `untypedTuple.0 = rhs_tensor` and other non-1 width elements + if (auto lhs_dot = lhs->try_as()) { + if (lhs_dot->is_target_indexed_access() && lhs_dot->get_obj()->inferred_type == TypeDataTuple::create()) { + if (rhs_type->get_width_on_stack() != 1) { + fire_error_cannot_put_non1_stack_width_arg_to_tuple(cur_f, err_loc->loc, rhs_type); + } + } + } + + // here is `v = rhs` (just assignment, not `var v = rhs`) / `a.0 = rhs` / `getObj(z=f()).0 = rhs` etc. + // types were already inferred, so just check their compatibility + // for strange lhs like `f() = rhs` type checking will pass, but will fail lvalue check later + if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { + if (lhs->try_as()) { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); + } else { + fire(cur_f, err_loc->loc, "can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); + } + } + } + + void visit(V v) override { + parent::visit(v->get_return_value()); + + if (cur_f->does_return_self()) { + if (!is_expr_valid_as_return_self(v->get_return_value())) { + fire(cur_f, v->loc, "invalid return from `self` function"); + } + return; + } + + TypePtr expr_type = v->get_return_value()->inferred_type; + if (!cur_f->inferred_return_type->can_rhs_be_assigned(expr_type)) { + fire(cur_f, v->get_return_value()->loc, "can not convert type " + to_string(expr_type) + " to return type " + to_string(cur_f->inferred_return_type)); + } + } + + static bool is_expr_valid_as_return_self(AnyExprV return_expr) { + // `return self` + if (return_expr->type == ast_reference && return_expr->as()->get_name() == "self") { + return true; + } + // `return self.someMethod()` + if (auto v_call = return_expr->try_as(); v_call && v_call->is_dot_call()) { + return v_call->fun_maybe && v_call->fun_maybe->does_return_self() && is_expr_valid_as_return_self(v_call->get_dot_obj()); + } + // `return cond ? ... : ...` + if (auto v_ternary = return_expr->try_as()) { + return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); + } + return false; + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "ternary operator"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`if`"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond)) { + fire(cur_f, cond->loc, "condition of `repeat` must be an integer, got " + to_string(cond)); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`while`"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`do while`"); + } + } + + void visit(V v) override { + parent::visit(v); + + if (!expect_integer(v->get_thrown_code())) { + fire(cur_f, v->get_thrown_code()->loc, "excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); + } + if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->get_width_on_stack() != 1) { + fire(cur_f, v->get_thrown_arg()->loc, "can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); + } + } + + void visit(V v) override { + parent::visit(v); + + AnyExprV cond = v->get_cond(); + if (!expect_integer(cond) && !expect_boolean(cond)) { + fire(cur_f, cond->loc, "can not use " + to_string(cond) + " as a boolean condition"); + } + if (!expect_integer(v->get_thrown_code())) { + fire(cur_f, v->get_thrown_code()->loc, "thrown excNo of `assert` must be an integer, got " + to_string(v->get_thrown_code())); + } + + if (cond->is_always_true || cond->is_always_false) { + warning_condition_always_true_or_false(cur_f, v->loc, cond, "`assert`"); + } + } + + void visit(V v) override { + parent::visit(v); + + if (v->first_unreachable) { + // it's essential to print "unreachable code" warning AFTER type checking + // (printing it while inferring might be a false positive if types are incorrect, due to smart casts for example) + // a more correct approach would be to access cfg here somehow, but since cfg is now available only while inferring, + // a special v->first_unreachable was set specifically for this warning (again, which is correct if types match) + v->first_unreachable->loc.show_warning("unreachable code"); + } + } + + public: + bool should_visit_function(FunctionPtr fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } + + void start_visiting_function(FunctionPtr fun_ref, V v_function) override { + cur_f = fun_ref; + parent::visit(v_function->get_body()); + cur_f = nullptr; + + if (fun_ref->is_implicit_return() && fun_ref->declared_return_type) { + if (!fun_ref->declared_return_type->can_rhs_be_assigned(TypeDataVoid::create()) || fun_ref->does_return_self()) { + fire(fun_ref, v_function->get_body()->as()->loc_end, "missing return"); + } + } + } +}; + +void pipeline_check_inferred_types() { + visit_ast_of_all_functions(); +} + +} // namespace tolk diff --git a/tolk/pipe-check-pure-impure.cpp b/tolk/pipe-check-pure-impure.cpp index 2b2e1e67..366ff160 100644 --- a/tolk/pipe-check-pure-impure.cpp +++ b/tolk/pipe-check-pure-impure.cpp @@ -34,7 +34,7 @@ static void fire_error_impure_operation_inside_pure_function(AnyV v) { class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFunctionBody { static void fire_if_global_var(AnyExprV v) { if (auto v_ident = v->try_as()) { - if (v_ident->sym->try_as()) { + if (v_ident->sym->try_as()) { fire_error_impure_operation_inside_pure_function(v); } } @@ -81,7 +81,7 @@ class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFuncti } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function() && fun_ref->is_marked_as_pure(); } }; diff --git a/tolk/pipe-check-rvalue-lvalue.cpp b/tolk/pipe-check-rvalue-lvalue.cpp index a824cc5d..3ec47a16 100644 --- a/tolk/pipe-check-rvalue-lvalue.cpp +++ b/tolk/pipe-check-rvalue-lvalue.cpp @@ -37,7 +37,7 @@ static void fire_error_cannot_be_used_as_lvalue(AnyV v, const std::string& detai } GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_modifying_immutable_variable(AnyExprV v, const LocalVarData* var_ref) { +static void fire_error_modifying_immutable_variable(AnyExprV v, LocalVarPtr var_ref) { if (var_ref->param_idx == 0 && var_ref->name == "self") { v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); } else { @@ -47,7 +47,7 @@ static void fire_error_modifying_immutable_variable(AnyExprV v, const LocalVarDa // validate a function used as rvalue, like `var cb = f` // it's not a generic function (ensured earlier at type inferring) and has some more restrictions -static void validate_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) { +static void validate_function_used_as_noncall(AnyExprV v, FunctionPtr fun_ref) { if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { v->error("saving `" + fun_ref->name + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); } @@ -97,6 +97,18 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { parent::visit(v->get_expr()); } + void visit(V v) override { + // if `x!` is lvalue, then `x` is also lvalue, so check that `x` is ok + parent::visit(v->get_expr()); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, v->is_negated ? "operator !=" : "operator =="); + } + parent::visit(v->get_expr()); + } + void visit(V v) override { if (v->is_lvalue) { fire_error_cannot_be_used_as_lvalue(v, "literal"); @@ -124,7 +136,7 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { // a reference to a method used as rvalue, like `var v = t.tupleAt` if (v->is_rvalue && v->is_target_fun_ref()) { - validate_function_used_as_noncall(v, std::get(v->target)); + validate_function_used_as_noncall(v, std::get(v->target)); } } @@ -158,17 +170,17 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { if (v->is_lvalue) { tolk_assert(v->sym); - if (const auto* var_ref = v->sym->try_as(); var_ref && var_ref->is_immutable()) { + if (LocalVarPtr var_ref = v->sym->try_as(); var_ref && var_ref->is_immutable()) { fire_error_modifying_immutable_variable(v, var_ref); - } else if (v->sym->try_as()) { + } else if (v->sym->try_as()) { v->error("modifying immutable constant"); - } else if (v->sym->try_as()) { + } else if (v->sym->try_as()) { v->error("function can't be used as lvalue"); } } // a reference to a function used as rvalue, like `var v = someFunction` - if (const FunctionData* fun_ref = v->sym->try_as(); fun_ref && v->is_rvalue) { + if (FunctionPtr fun_ref = v->sym->try_as(); fun_ref && v->is_rvalue) { validate_function_used_as_noncall(v, fun_ref); } } @@ -186,7 +198,7 @@ class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp index 98996c28..9c27029b 100644 --- a/tolk/pipe-constant-folding.cpp +++ b/tolk/pipe-constant-folding.cpp @@ -25,6 +25,8 @@ * * Currently, it just replaces `-1` (ast_unary_operator ast_int_const) with a number -1 * and `!true` with false. + * Also, all parenthesized `((expr))` are replaced with `expr`, it's a constant transformation. + * (not to handle parenthesized in optimization passes, like `((x)) == true`) * More rich constant folding should be done some day, but even without this, IR optimizations * (operating low-level stack variables) pretty manage to do all related optimizations. * Constant folding in the future, done at AST level, just would slightly reduce amount of work for optimizer. @@ -47,6 +49,14 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { return v_bool; } + AnyExprV replace(V v) override { + AnyExprV inner = parent::replace(v->get_expr()); + if (v->is_lvalue) { + inner->mutate()->assign_lvalue_true(); + } + return inner; + } + AnyExprV replace(V v) override { parent::replace(v); @@ -78,8 +88,19 @@ class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { return v; } + AnyExprV replace(V v) override { + parent::replace(v); + + // `null == null` / `null != null` + if (v->get_expr()->type == ast_null_keyword) { + return create_bool_const(v->loc, !v->is_negated); + } + + return v; + } + public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; diff --git a/tolk/pipe-detect-unreachable.cpp b/tolk/pipe-detect-unreachable.cpp deleted file mode 100644 index 15824cf3..00000000 --- a/tolk/pipe-detect-unreachable.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - This file is part of TON Blockchain source code. - - TON Blockchain is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - TON Blockchain is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with TON Blockchain. If not, see . -*/ -#include "tolk.h" -#include "ast.h" -#include "ast-visitor.h" - -/* - * This pipe does two things: - * 1) detects unreachable code and prints warnings about it - * example: `fun main() { if(1){return;}else{return;} var x = 0; }` — var is unreachable - * 2) if control flow reaches end of function, store a flag to insert an implicit return - * example: `fun main() { assert(...); }` — has an implicit `return ()` statement before a brace - * - * Note, that it does not delete unreachable code, only prints warnings. - * Actual deleting is done much later (in "legacy" part), after AST is converted to Op. - * - * Note, that it's not CFG, it's just a shallow reachability detection. - * In the future, a true CFG should be introduced. For instance, in order to have nullable types, - * I'll need to implement smart casts. Then I'll think of a complicated granular control flow graph, - * considering data flow and exceptions (built before type inferring, of course), - * and detecting unreachable code will be a part of it. - */ - -namespace tolk { - -class UnreachableStatementsDetectVisitor final { - bool always_returns(AnyV v) { - switch (v->type) { - case ast_sequence: return always_returns(v->as()); - case ast_return_statement: return always_returns(v->as()); - case ast_throw_statement: return always_returns(v->as()); - case ast_function_call: return always_returns(v->as()); - case ast_repeat_statement: return always_returns(v->as()); - case ast_while_statement: return always_returns(v->as()); - case ast_do_while_statement: return always_returns(v->as()); - case ast_try_catch_statement: return always_returns(v->as()); - case ast_if_statement: return always_returns(v->as()); - default: - // unhandled statements (like assert) and statement expressions - return false; - } - } - - bool always_returns(V v) { - bool always = false; - for (AnyV item : v->get_items()) { - if (always && item->type != ast_empty_statement) { - item->loc.show_warning("unreachable code"); - break; - } - always |= always_returns(item); - } - return always; - } - - static bool always_returns([[maybe_unused]] V v) { - // quite obvious: `return expr` interrupts control flow - return true; - } - - static bool always_returns([[maybe_unused]] V v) { - // todo `throw excNo` currently does not interrupt control flow - // (in other words, `throw 1; something` - something is reachable) - // the reason is that internally it's transformed to a call of built-in function __throw(), - // which is a regular function, like __throw_if() or loadInt() - // to fix this later on, it should be deeper, introducing Op::_Throw for example, - // to make intermediate representations and stack optimizer also be aware that after it there is unreachable - return false; - } - - static bool always_returns([[maybe_unused]] V v) { - // neither annotations like @noreturn nor auto-detection of always-throwing functions also doesn't exist - // in order to do this in the future, it should be handled not only at AST/CFG level, - // but inside Op and low-level optimizer (at least if reachability detection is not moved out of there) - // see comments for `throw` above, similar to this case - return false; - } - - bool always_returns(V v) { - return always_returns(v->get_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_try_body()) && always_returns(v->get_catch_body()); - } - - bool always_returns(V v) { - return always_returns(v->get_if_body()) && always_returns(v->get_else_body()); - } - -public: - static bool should_visit_function(const FunctionData* fun_ref) { - return fun_ref->is_code_function() && !fun_ref->is_generic_function(); - } - - void start_visiting_function(const FunctionData* fun_ref, V v_function) { - bool control_flow_reaches_end = !always_returns(v_function->get_body()->as()); - if (control_flow_reaches_end) { - fun_ref->mutate()->assign_is_implicit_return(); - } - } -}; - - -void pipeline_detect_unreachable_statements() { - visit_ast_of_all_functions(); -} - -void pipeline_detect_unreachable_statements(const FunctionData* fun_ref) { - UnreachableStatementsDetectVisitor visitor; - if (UnreachableStatementsDetectVisitor::should_visit_function(fun_ref)) { - visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); - } -} - -} // namespace tolk diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp index 29584cbf..2b7e5557 100644 --- a/tolk/pipe-find-unused-symbols.cpp +++ b/tolk/pipe-find-unused-symbols.cpp @@ -36,7 +36,7 @@ namespace tolk { static void mark_function_used_dfs(const std::unique_ptr& op); -static void mark_function_used(const FunctionData* fun_ref) { +static void mark_function_used(FunctionPtr fun_ref) { if (!fun_ref->is_code_function() || fun_ref->is_really_used()) { // already handled return; } @@ -45,7 +45,7 @@ static void mark_function_used(const FunctionData* fun_ref) { mark_function_used_dfs(std::get(fun_ref->body)->code->ops); } -static void mark_global_var_used(const GlobalVarData* glob_ref) { +static void mark_global_var_used(GlobalVarPtr glob_ref) { glob_ref->mutate()->assign_is_really_used(); } @@ -66,7 +66,7 @@ static void mark_function_used_dfs(const std::unique_ptr& op) { } void pipeline_find_unused_symbols() { - for (const FunctionData* fun_ref : G.all_functions) { + for (FunctionPtr fun_ref : G.all_functions) { if (fun_ref->is_method_id_not_empty()) { // get methods, main and other entrypoints, regular functions with @method_id mark_function_used(fun_ref); } diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 7ef6ba7b..57f481f0 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -39,7 +39,7 @@ void FunctionBodyAsm::set_code(std::vector&& code) { } -static void generate_output_func(const FunctionData* fun_ref) { +static void generate_output_func(FunctionPtr fun_ref) { tolk_assert(fun_ref->is_code_function()); if (G.is_verbosity(2)) { std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->inferred_return_type << std::endl; @@ -119,7 +119,7 @@ void pipeline_generate_fif_output_to_std_cout() { std::cout << "PROGRAM{\n"; bool has_main_procedure = false; - for (const FunctionData* fun_ref : G.all_functions) { + for (FunctionPtr fun_ref : G.all_functions) { if (!fun_ref->does_need_codegen()) { if (G.is_verbosity(2) && fun_ref->is_code_function()) { std::cerr << fun_ref->name << ": code not generated, function does not need codegen\n"; @@ -143,7 +143,7 @@ void pipeline_generate_fif_output_to_std_cout() { throw Fatal("the contract has no entrypoint; forgot `fun onInternalMessage(...)`?"); } - for (const GlobalVarData* var_ref : G.all_global_vars) { + for (GlobalVarPtr var_ref : G.all_global_vars) { if (!var_ref->is_really_used() && G.settings.remove_unused_functions) { if (G.is_verbosity(2)) { std::cerr << var_ref->name << ": variable not generated, it's unused\n"; @@ -154,7 +154,7 @@ void pipeline_generate_fif_output_to_std_cout() { std::cout << std::string(2, ' ') << "DECLGLOBVAR " << var_ref->name << "\n"; } - for (const FunctionData* fun_ref : G.all_functions) { + for (FunctionPtr fun_ref : G.all_functions) { if (!fun_ref->does_need_codegen()) { continue; } diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp index ba5f77a7..5fb12059 100644 --- a/tolk/pipe-infer-types-and-calls.cpp +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -20,20 +20,22 @@ #include "ast-visitor.h" #include "generics-helpers.h" #include "type-system.h" +#include "smart-casts-cfg.h" /* * This is a complicated and crucial part of the pipeline. It simultaneously does the following: * * infers types of all expressions; example: `2 + 3` both are TypeDataInt, result is also - * * AND checks types for assignment, arguments passing, etc.; example: `fInt(cs)` is error passing slice to int * * AND binds function/method calls (assigns fun_ref); example: `globalF()`, fun_ref is assigned to `globalF` (unless generic) * * AND instantiates generic functions; example: `t.tuplePush(2)` creates `tuplePush` and assigns fun_ref to dot field * * AND infers return type of functions if it's omitted (`fun f() { ... }` means "auto infer", not "void") + * * AND builds data flow graph, mostly used for smart casts (right at the time of inferring) + * Note, that type checking (errors about types mismatch) is a later compilation step, due to loops. * * It's important to do all these parts simultaneously, they can't be split or separated. * For example, we can't bind `f(2)` earlier, because if `f` is a generic `f`, we should instantiate it, * and in order to do it, we need to know argument types. - * For example, we can't bind `c.cellHash()` earlier, because in the future we'll have overloads (`cell.hash()` and `slice.hash()`), - * and in order to bind it, we need to know object type. + * For example, we can't bind `c.cellHash()` earlier, because in order to bind it, we need to know object type. + * For example, we can't infer `var y = x` without smart casts, because if x's type is refined, it affects y. * And vice versa, to infer type of expression in the middle, we need to have inferred all expressions preceding it, * which may also include generics, etc. * @@ -52,6 +54,36 @@ * Example: `fun tupleAt(t: tuple, idx: int):T`, just `t.tupleGet(2)` can't be deduced (T left unspecified), * but for assignment with left-defined type, or a call to `fInt(t.tupleGet(2))` hint "int" helps deduce T. * + * Control flow is represented NOT as a "graph with edges". Instead, it's a "structured DFS" for the AST: + * 1) at every point of inferring, we have "current flow facts" (FlowContext) + * 2) when we see an `if (...)`, we create two derived contexts (by cloning current) + * 3) after `if`, finalize them at the end and unify + * 4) if we detect unreachable code, we mark that path's context as "unreachable" + * In other words, we get the effect of a CFG but in a more direct approach. That's enough for AST-level data-flow. + * FlowContext contains "data-flow facts that are definitely known". + * // current facts: x is int?, t is (int, int) + * if (x != null && t.0 > 0) + * // current facts: x is int, t is (int, int), t.0 is positive + * else + * // current facts: x is null, t is (int, int), t.0 is not positive + * When branches rejoin, facts are merged back (int+null = int? and so on, here they would be equal to before if). + * See smart-casts-cfg.cpp for detailed comments. + * + * About loops and partial re-entering. Consider the following: + * var x: int? = 5; + * // <- here x is `int` (smart cast) + * while (true) { + * // <- but here x is `int?` (not `int`) due to assignment in a loop + * if (...) { x = getNullableInt(); } + * } + * When building control flow, loops are inferred twice. In the above, at first iteration, x will be `int`, + * but at the second, x will be `int?` (after merged with loop end). + * That's why type checking is done later, not to make false errors on the first iteration. + * Note, that it would also be better to postpone generics "materialization" also: here only to infer type arguments, + * but to instantiate and re-assign fun_ref later. But it complicates the architecture significantly. + * For now, generics may encounter problems within loops on first iteration, though it's unlikely to face this + * in practice. (example: in the loop above, `genericFn(x)` will at first instantiate and then ) + * * Unlike other pipes, inferring can dig recursively on demand. * Example: * fun getInt() { return 1; } @@ -63,9 +95,9 @@ namespace tolk { -static void infer_and_save_return_type_of_function(const FunctionData* fun_ref); +static void infer_and_save_return_type_of_function(FunctionPtr fun_ref); -static TypePtr get_or_infer_return_type(const FunctionData* fun_ref) { +static TypePtr get_or_infer_return_type(FunctionPtr fun_ref) { if (!fun_ref->inferred_return_type) { infer_and_save_return_type_of_function(fun_ref); } @@ -83,193 +115,31 @@ static std::string to_string(AnyExprV v_with_type) { } GNU_ATTRIBUTE_NOINLINE -static std::string to_string(const LocalVarData& var_ref) { - return "`" + var_ref.declared_type->as_human_readable() + "`"; +static std::string to_string(FunctionPtr fun_ref) { + return "`" + fun_ref->as_human_readable() + "`"; } -GNU_ATTRIBUTE_NOINLINE -static std::string to_string(const FunctionData* fun_ref) { - return "`" + fun_ref->as_human_readable() + "`"; +// fire a general error, just a wrapper over `throw` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire(FunctionPtr cur_f, SrcLocation loc, const std::string& message) { + throw ParseError(cur_f, loc, message); } // fire an error when `fun f(...) asm ...` is called with T=(int,int) or other non-1 width on stack // asm functions generally can't handle it, they expect T to be a TVM primitive // (in FunC, `forall` type just couldn't be unified with non-primitives; in Tolk, generic T is expectedly inferred) GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_calling_asm_function_with_non1_stack_width_arg(SrcLocation loc, const FunctionData* fun_ref, const std::vector& substitutions, int arg_idx) { - throw ParseError(loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->calc_width_on_stack()) + " stack slots in TVM, not 1"); -} - -// fire an error on `var n = null` -// technically it's correct, type of `n` is TypeDataNullLiteral, but it's not what the user wanted -// so, it's better to see an error on assignment, that later, on `n` usage and types mismatch -// (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_assign_always_null_to_variable(SrcLocation loc, const LocalVarData* assigned_var, bool is_assigned_null_literal) { - std::string var_name = assigned_var->name; - throw ParseError(loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); -} - -// fire an error on `!cell` / `+slice` -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV unary_expr) { - std::string op = static_cast(operator_name); - throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(unary_expr->inferred_type)); -} - -// fire an error on `int + cell` / `slice & int` -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { - std::string op = static_cast(operator_name); - throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type)); +static void fire_error_calling_asm_function_with_non1_stack_width_arg(FunctionPtr cur_f, SrcLocation loc, FunctionPtr fun_ref, const std::vector& substitutions, int arg_idx) { + fire(cur_f, loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->get_width_on_stack()) + " stack slots in TVM, not 1"); } // fire an error on `untypedTupleVar.0` when used without a hint GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_deduce_untyped_tuple_access(SrcLocation loc, int index) { +static void fire_error_cannot_deduce_untyped_tuple_access(FunctionPtr cur_f, SrcLocation loc, int index) { std::string idx_access = "." + std::to_string(index); - throw ParseError(loc, "can not deduce type of `" + idx_access + "`; either assign it to variable like `var c: int = " + idx_access + "` or cast the result like `" + idx_access + " as int`"); + fire(cur_f, loc, "can not deduce type of `" + idx_access + "`; either assign it to variable like `var c: int = " + idx_access + "` or cast the result like `" + idx_access + " as int`"); } -// fire an error on `untypedTupleVar.0` when inferred as (int,int), or `[int, (int,int)]`, or other non-1 width in a tuple -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_cannot_put_non1_stack_width_arg_to_tuple(SrcLocation loc, TypePtr inferred_type) { - throw ParseError(loc, "can not put " + to_string(inferred_type) + " into a tuple, because it occupies " + std::to_string(inferred_type->calc_width_on_stack()) + " stack slots in TVM, not 1"); -} - -// check correctness of called arguments counts and their type matching -static void check_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { - int delta_self = lhs_of_dot_call ? 1 : 0; - int n_arguments = v->size() + delta_self; - int n_parameters = fun_ref->get_num_params(); - - // Tolk doesn't have optional parameters currently, so just compare counts - if (!n_parameters && lhs_of_dot_call) { - v->error("`" + fun_ref->name + "` has no parameters and can not be called as method"); - } - if (n_parameters < n_arguments) { - v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - if (n_arguments < n_parameters) { - v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - - if (lhs_of_dot_call) { - if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(lhs_of_dot_call->inferred_type)) { - lhs_of_dot_call->error("can not call method for " + to_string(fun_ref->parameters[0]) + " with object of type " + to_string(lhs_of_dot_call)); - } - } - for (int i = 0; i < v->size(); ++i) { - if (!fun_ref->parameters[i + delta_self].declared_type->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { - v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(fun_ref->parameters[i + delta_self])); - } - } -} - -/* - * TypeInferringUnifyStrategy unifies types from various branches to a common result (lca). - * It's used to auto infer function return type based on return statements, like in TypeScript. - * Example: `fun f() { ... return 1; ... return null; }` inferred as `int`. - * - * Besides function returns, it's also useful for ternary `return cond ? 1 : null` and `match` expression. - * If types can't be unified (a function returns int and cell, for example), `unify()` returns false, handled outside. - * BTW, don't confuse this way of inferring with Hindley-Milner, they have nothing in common. - */ -class TypeInferringUnifyStrategy { - TypePtr unified_result = nullptr; - - static TypePtr calculate_type_lca(TypePtr t1, TypePtr t2) { - if (t1 == t2) { - return t1; - } - if (t1->can_rhs_be_assigned(t2)) { - return t1; - } - if (t2->can_rhs_be_assigned(t1)) { - return t2; - } - - const auto* tensor1 = t1->try_as(); - const auto* tensor2 = t2->try_as(); - if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { - std::vector types_lca; - types_lca.reserve(tensor1->size()); - for (int i = 0; i < tensor1->size(); ++i) { - TypePtr next = calculate_type_lca(tensor1->items[i], tensor2->items[i]); - if (next == nullptr) { - return nullptr; - } - types_lca.push_back(next); - } - return TypeDataTensor::create(std::move(types_lca)); - } - - const auto* tuple1 = t1->try_as(); - const auto* tuple2 = t2->try_as(); - if (tuple1 && tuple2 && tuple1->size() == tuple2->size()) { - std::vector types_lca; - types_lca.reserve(tuple1->size()); - for (int i = 0; i < tuple1->size(); ++i) { - TypePtr next = calculate_type_lca(tuple1->items[i], tuple2->items[i]); - if (next == nullptr) { - return nullptr; - } - types_lca.push_back(next); - } - return TypeDataTypedTuple::create(std::move(types_lca)); - } - - return nullptr; - } - -public: - bool unify_with(TypePtr next) { - if (unified_result == nullptr) { - unified_result = next; - return true; - } - if (unified_result == next) { - return true; - } - - TypePtr combined = calculate_type_lca(unified_result, next); - if (!combined) { - return false; - } - - unified_result = combined; - return true; - } - - bool unify_with_implicit_return_void() { - if (unified_result == nullptr) { - unified_result = TypeDataVoid::create(); - return true; - } - - return unified_result == TypeDataVoid::create(); - } - - TypePtr get_result() const { return unified_result; } -}; - -// handle __expect_type(expr, "type") call -// this is used in compiler tests -GNU_ATTRIBUTE_NOINLINE GNU_ATTRIBUTE_COLD -static void handle_possible_compiler_internal_call(const FunctionData* current_function, V v) { - const FunctionData* fun_ref = v->fun_maybe; - tolk_assert(fun_ref && fun_ref->is_builtin_function()); - static_cast(current_function); - - if (fun_ref->name == "__expect_type") { - tolk_assert(v->get_num_args() == 2); - TypePtr expected_type = parse_type_from_string(v->get_arg(1)->get_expr()->as()->str_val); - TypePtr expr_type = v->get_arg(0)->inferred_type; - if (expected_type != expr_type) { - v->error("__expect_type failed: expected " + to_string(expected_type) + ", got " + to_string(expr_type)); - } - } -} /* * This class handles all types of AST vertices and traverses them, filling all AnyExprV::inferred_type. @@ -278,9 +148,9 @@ static void handle_possible_compiler_internal_call(const FunctionData* current_f * 1) when a new AST node type is introduced, I want it to fail here, not to be left un-inferred with UB at next steps * 2) easy to maintain a hint (see comments at the top of the file) */ -class InferCheckTypesAndCallsAndFieldsVisitor final { - const FunctionData* current_function = nullptr; - TypeInferringUnifyStrategy return_unifier; +class InferTypesAndCallsAndFieldsVisitor final { + FunctionPtr cur_f = nullptr; + std::vector return_statements; GNU_ATTRIBUTE_ALWAYS_INLINE static void assign_inferred_type(AnyExprV dst, AnyExprV src) { @@ -298,14 +168,14 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { dst->mutate()->assign_inferred_type(inferred_type); } - static void assign_inferred_type(const LocalVarData* local_var_or_param, TypePtr inferred_type) { + static void assign_inferred_type(LocalVarPtr local_var_or_param, TypePtr inferred_type) { #ifdef TOLK_DEBUG tolk_assert(inferred_type != nullptr && !inferred_type->has_unresolved_inside() && !inferred_type->has_genericT_inside()); #endif local_var_or_param->mutate()->assign_inferred_type(inferred_type); } - static void assign_inferred_type(const FunctionData* fun_ref, TypePtr inferred_return_type, TypePtr inferred_full_type) { + static void assign_inferred_type(FunctionPtr fun_ref, TypePtr inferred_return_type, TypePtr inferred_full_type) { #ifdef TOLK_DEBUG tolk_assert(inferred_return_type != nullptr && !inferred_return_type->has_unresolved_inside() && !inferred_return_type->has_genericT_inside()); #endif @@ -313,233 +183,222 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } // traverse children in any statement - void process_any_statement(AnyV v) { + FlowContext process_any_statement(AnyV v, FlowContext&& flow) { switch (v->type) { case ast_sequence: - return process_sequence(v->as()); + return process_sequence(v->as(), std::move(flow)); case ast_return_statement: - return process_return_statement(v->as()); + return process_return_statement(v->as(), std::move(flow)); case ast_if_statement: - return process_if_statement(v->as()); + return process_if_statement(v->as(), std::move(flow)); case ast_repeat_statement: - return process_repeat_statement(v->as()); + return process_repeat_statement(v->as(), std::move(flow)); case ast_while_statement: - return process_while_statement(v->as()); + return process_while_statement(v->as(), std::move(flow)); case ast_do_while_statement: - return process_do_while_statement(v->as()); + return process_do_while_statement(v->as(), std::move(flow)); case ast_throw_statement: - return process_throw_statement(v->as()); + return process_throw_statement(v->as(), std::move(flow)); case ast_assert_statement: - return process_assert_statement(v->as()); + return process_assert_statement(v->as(), std::move(flow)); case ast_try_catch_statement: - return process_try_catch_statement(v->as()); + return process_try_catch_statement(v->as(), std::move(flow)); case ast_empty_statement: - return; + return flow; default: - infer_any_expr(reinterpret_cast(v)); + return process_expression_statement(reinterpret_cast(v), std::move(flow)); } } // assigns inferred_type for any expression (by calling assign_inferred_type) - void infer_any_expr(AnyExprV v, TypePtr hint = nullptr) { + // returns ExprFlow: out_facts that are "definitely known" after evaluating the whole expression + // if used_as_condition, true_facts/false_facts are also calculated (don't calculate them always for optimization) + ExprFlow infer_any_expr(AnyExprV v, FlowContext&& flow, bool used_as_condition, TypePtr hint = nullptr) { switch (v->type) { case ast_int_const: - return infer_int_const(v->as()); + return infer_int_const(v->as(), std::move(flow), used_as_condition); case ast_string_const: - return infer_string_const(v->as()); + return infer_string_const(v->as(), std::move(flow), used_as_condition); case ast_bool_const: - return infer_bool_const(v->as()); + return infer_bool_const(v->as(), std::move(flow), used_as_condition); case ast_local_vars_declaration: - return infer_local_vars_declaration(v->as()); + return infer_local_vars_declaration(v->as(), std::move(flow), used_as_condition); + case ast_local_var_lhs: + return infer_local_var_lhs(v->as(), std::move(flow), used_as_condition); case ast_assign: - return infer_assignment(v->as()); + return infer_assignment(v->as(), std::move(flow), used_as_condition); case ast_set_assign: - return infer_set_assign(v->as()); + return infer_set_assign(v->as(), std::move(flow), used_as_condition); case ast_unary_operator: - return infer_unary_operator(v->as()); + return infer_unary_operator(v->as(), std::move(flow), used_as_condition); case ast_binary_operator: - return infer_binary_operator(v->as()); + return infer_binary_operator(v->as(), std::move(flow), used_as_condition); case ast_ternary_operator: - return infer_ternary_operator(v->as(), hint); + return infer_ternary_operator(v->as(), std::move(flow), used_as_condition, hint); case ast_cast_as_operator: - return infer_cast_as_operator(v->as()); + return infer_cast_as_operator(v->as(), std::move(flow), used_as_condition); + case ast_not_null_operator: + return infer_not_null_operator(v->as(), std::move(flow), used_as_condition); + case ast_is_null_check: + return infer_is_null_check(v->as(), std::move(flow), used_as_condition); case ast_parenthesized_expression: - return infer_parenthesized(v->as(), hint); + return infer_parenthesized(v->as(), std::move(flow), used_as_condition, hint); case ast_reference: - return infer_reference(v->as()); + return infer_reference(v->as(), std::move(flow), used_as_condition); case ast_dot_access: - return infer_dot_access(v->as(), hint); + return infer_dot_access(v->as(), std::move(flow), used_as_condition, hint); case ast_function_call: - return infer_function_call(v->as(), hint); + return infer_function_call(v->as(), std::move(flow), used_as_condition, hint); case ast_tensor: - return infer_tensor(v->as(), hint); + return infer_tensor(v->as(), std::move(flow), used_as_condition, hint); case ast_typed_tuple: - return infer_typed_tuple(v->as(), hint); + return infer_typed_tuple(v->as(), std::move(flow), used_as_condition, hint); case ast_null_keyword: - return infer_null_keyword(v->as()); + return infer_null_keyword(v->as(), std::move(flow), used_as_condition); case ast_underscore: - return infer_underscore(v->as(), hint); + return infer_underscore(v->as(), std::move(flow), used_as_condition, hint); case ast_empty_expression: - return infer_empty_expression(v->as()); + return infer_empty_expression(v->as(), std::move(flow), used_as_condition); default: throw UnexpectedASTNodeType(v, "infer_any_expr"); } } - static bool expect_integer(AnyExprV v_inferred) { - return v_inferred->inferred_type == TypeDataInt::create(); - } - - static bool expect_boolean(AnyExprV v_inferred) { - return v_inferred->inferred_type == TypeDataBool::create(); - } - - static void infer_int_const(V v) { + static ExprFlow infer_int_const(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataInt::create()); + + ExprFlow after_v(std::move(flow), used_as_condition); + if (used_as_condition) { // `if (0)` always false + if (v->intval == 0) { + after_v.true_flow.mark_unreachable(UnreachableKind::CantHappen); + } else { + after_v.false_flow.mark_unreachable(UnreachableKind::CantHappen); + } + } + return after_v; } - static void infer_string_const(V v) { + static ExprFlow infer_string_const(V v, FlowContext&& flow, bool used_as_condition) { if (v->is_bitslice()) { assign_inferred_type(v, TypeDataSlice::create()); } else { assign_inferred_type(v, TypeDataInt::create()); } + + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_bool_const(V v) { + static ExprFlow infer_bool_const(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataBool::create()); + + ExprFlow after_v(std::move(flow), used_as_condition); + if (used_as_condition) { // `if (false)` always false + if (v->bool_val == false) { + after_v.true_flow.mark_unreachable(UnreachableKind::CantHappen); + } else { + after_v.false_flow.mark_unreachable(UnreachableKind::CantHappen); + } + } + return after_v; } - static void infer_local_vars_declaration(V) { - // it can not appear as a standalone expression - // `var ... = rhs` is handled by ast_assign - tolk_assert(false); + ExprFlow infer_local_vars_declaration(V v, FlowContext&& flow, bool used_as_condition) { + flow = infer_any_expr(v->get_expr(), std::move(flow), used_as_condition).out_flow; + assign_inferred_type(v, v->get_expr()); + return ExprFlow(std::move(flow), used_as_condition); } - void infer_assignment(V v) { + static ExprFlow infer_local_var_lhs(V v, FlowContext&& flow, bool used_as_condition) { + // `var v = rhs`, inferring is called for `v` + // at the moment of inferring left side of assignment, we don't know type of rhs (since lhs is executed first) + // so, mark `v` as unknown + // later, v's inferred_type will be reassigned; see process_assignment_lhs_after_infer_rhs() + if (v->marked_as_redef) { + assign_inferred_type(v, v->var_ref->declared_type); + } else { + assign_inferred_type(v, v->declared_type ? v->declared_type : TypeDataUnknown::create()); + } + return ExprFlow(std::move(flow), used_as_condition); + } + + ExprFlow infer_assignment(V v, FlowContext&& flow, bool used_as_condition) { // v is assignment: `x = 5` / `var x = 5` / `var x: slice = 5` / `(cs,_) = f()` / `val (a,[b],_) = (a,t,0)` - // it's a tricky node to handle, because to infer rhs, at first we need to create hint from lhs - // and then to apply/check inferred rhs onto lhs - // about a hint: `var i: int = t.tupleAt(0)` is ok, but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` + // execution flow is: lhs first, rhs second (at IR generation, also lhs is evaluated first, unlike FunC) + // after inferring lhs, use it for hint when inferring rhs + // example: `var i: int = t.tupleAt(0)` is ok (hint=int, T=int), but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(rhs, calc_hint_from_assignment_lhs(lhs)); - process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, rhs); - assign_inferred_type(v, lhs); + flow = infer_left_side_of_assignment(lhs, std::move(flow)); + flow = infer_any_expr(rhs, std::move(flow), false, lhs->inferred_type).out_flow; + process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, flow); + assign_inferred_type(v, rhs); // note, that the resulting type is rhs, not lhs + + return ExprFlow(std::move(flow), used_as_condition); } - // having assignment like `var (i: int, s) = rhs` (its lhs is local vars declaration), - // create a contextual infer hint for rhs, `(int, unknown)` in this case - // this hint helps to deduce generics and to resolve unknown types while inferring rhs - static TypePtr calc_hint_from_assignment_lhs(AnyExprV lhs) { - // `var ... = rhs` - dig into left part - if (auto lhs_decl = lhs->try_as()) { - return calc_hint_from_assignment_lhs(lhs_decl->get_expr()); - } - - // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") - if (auto lhs_var = lhs->try_as()) { - if (lhs_var->marked_as_redef) { - return lhs_var->var_ref->declared_type; - } - if (lhs_var->declared_type) { - return lhs_var->declared_type; - } - return TypeDataUnknown::create(); - } - - // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) - if (auto lhs_ref = lhs->try_as()) { - if (const auto* var_ref = lhs_ref->sym->try_as()) { - return var_ref->declared_type; - } - if (const auto* glob_ref = lhs_ref->sym->try_as()) { - return glob_ref->declared_type; - } - return TypeDataUnknown::create(); - } - - // `(v1, v2) = rhs` / `var (v1, v2) = rhs` + // for `v = rhs` (NOT `var v = lhs`), variable `v` may be smart cast at this point + // the purpose of this function is to drop smart casts from expressions used as left side of assignments + // another example: `x.0 = rhs`, smart cast is dropped for `x.0` (not for `x`) + // the goal of dropping smart casts is to have lhs->inferred_type as actually declared, used as hint to infer rhs + FlowContext infer_left_side_of_assignment(AnyExprV lhs, FlowContext&& flow) { if (auto lhs_tensor = lhs->try_as()) { - std::vector sub_hints; - sub_hints.reserve(lhs_tensor->size()); - for (AnyExprV item : lhs_tensor->get_items()) { - sub_hints.push_back(calc_hint_from_assignment_lhs(item)); + std::vector types_list; + types_list.reserve(lhs_tensor->size()); + for (int i = 0; i < lhs_tensor->size(); ++i) { + flow = infer_left_side_of_assignment(lhs_tensor->get_item(i), std::move(flow)); + types_list.push_back(lhs_tensor->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); + + } else if (auto lhs_tuple = lhs->try_as()) { + std::vector types_list; + types_list.reserve(lhs_tuple->size()); + for (int i = 0; i < lhs_tuple->size(); ++i) { + flow = infer_left_side_of_assignment(lhs_tuple->get_item(i), std::move(flow)); + types_list.push_back(lhs_tuple->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); + + } else if (auto lhs_par = lhs->try_as()) { + flow = infer_left_side_of_assignment(lhs_par->get_expr(), std::move(flow)); + assign_inferred_type(lhs, lhs_par->get_expr()->inferred_type); + + } else { + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + if (extract_sink_expression_from_vertex(lhs)) { + TypePtr lhs_declared_type = calc_declared_type_before_smart_cast(lhs); + assign_inferred_type(lhs, lhs_declared_type); } - return TypeDataTensor::create(std::move(sub_hints)); } - // `[v1, v2] = rhs` / `var [v1, v2] = rhs` - if (auto lhs_tuple = lhs->try_as()) { - std::vector sub_hints; - sub_hints.reserve(lhs_tuple->size()); - for (AnyExprV item : lhs_tuple->get_items()) { - sub_hints.push_back(calc_hint_from_assignment_lhs(item)); - } - return TypeDataTypedTuple::create(std::move(sub_hints)); - } - - // `a.0 = rhs` / `b.1.0 = rhs` (remember, its target is not assigned yet) - if (auto lhs_dot = lhs->try_as()) { - TypePtr obj_hint = calc_hint_from_assignment_lhs(lhs_dot->get_obj()); - std::string_view field_name = lhs_dot->get_field_name(); - if (field_name[0] >= '0' && field_name[0] <= '9') { - int index_at = std::stoi(std::string(field_name)); - if (const auto* t_tensor = obj_hint->try_as(); t_tensor && index_at < t_tensor->size()) { - return t_tensor->items[index_at]; - } - if (const auto* t_tuple = obj_hint->try_as(); t_tuple && index_at < t_tuple->size()) { - return t_tuple->items[index_at]; - } - } - return TypeDataUnknown::create(); - } - - return TypeDataUnknown::create(); + return flow; } // handle (and dig recursively) into `var lhs = rhs` + // at this point, both lhs and rhs are already inferred, but lhs newly-declared vars are unknown (unless have declared_type) // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` + // the purpose is to update inferred_type of lhs vars (z, x, y) + // and to re-assign types of tensors/tuples inside: `var (x,[y]) = ...` was `(unknown,[unknown])`, becomes `(int,[int])` // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 - void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { - AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; + static void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, FlowContext& out_flow) { + tolk_assert(lhs->inferred_type != nullptr); // `var ... = rhs` - dig into left part if (auto lhs_decl = lhs->try_as()) { - process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); - assign_inferred_type(lhs, lhs_decl->get_expr()->inferred_type); + process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, out_flow); return; } // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") if (auto lhs_var = lhs->try_as()) { - TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr) - if (lhs_var->marked_as_redef) { - tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type); - declared_type = lhs_var->var_ref->declared_type; - } - if (declared_type) { - if (!declared_type->can_rhs_be_assigned(rhs_type)) { - err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); - } - assign_inferred_type(lhs, declared_type); - } else { - if (rhs_type == TypeDataNullLiteral::create()) { - fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); - } - assign_inferred_type(lhs, rhs_type); - assign_inferred_type(lhs_var->var_ref, lhs_var->inferred_type); - } - return; - } - - // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) - if (lhs->try_as()) { - infer_any_expr(lhs); - if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { - err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); + TypePtr declared_type = lhs_var->marked_as_redef ? lhs_var->var_ref->declared_type : lhs_var->declared_type; + if (lhs_var->inferred_type == TypeDataUnknown::create()) { + assign_inferred_type(lhs_var, rhs_type); + assign_inferred_type(lhs_var->var_ref, rhs_type); } + TypePtr smart_casted_type = declared_type ? calc_smart_cast_type_on_assignment(declared_type, rhs_type) : rhs_type; + out_flow.register_known_type(SinkExpression(lhs_var->var_ref), smart_casted_type); return; } @@ -547,17 +406,11 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // dig recursively into v1 and v2 with corresponding rhs i-th item of a tensor if (auto lhs_tensor = lhs->try_as()) { const TypeDataTensor* rhs_type_tensor = rhs_type->try_as(); - if (!rhs_type_tensor) { - err_loc->error("can not assign " + to_string(rhs_type) + " to a tensor"); - } - if (lhs_tensor->size() != rhs_type_tensor->size()) { - err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); - } - V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; std::vector types_list; types_list.reserve(lhs_tensor->size()); for (int i = 0; i < lhs_tensor->size(); ++i) { - process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); + TypePtr ith_rhs_type = rhs_type_tensor && i < rhs_type_tensor->size() ? rhs_type_tensor->items[i] : TypeDataUnknown::create(); + process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), ith_rhs_type, out_flow); types_list.push_back(lhs_tensor->get_item(i)->inferred_type); } assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); @@ -568,76 +421,57 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // dig recursively into v1 and v2 with corresponding rhs i-th item of a tuple if (auto lhs_tuple = lhs->try_as()) { const TypeDataTypedTuple* rhs_type_tuple = rhs_type->try_as(); - if (!rhs_type_tuple) { - err_loc->error("can not assign " + to_string(rhs_type) + " to a tuple"); - } - if (lhs_tuple->size() != rhs_type_tuple->size()) { - err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); - } - V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; std::vector types_list; types_list.reserve(lhs_tuple->size()); for (int i = 0; i < lhs_tuple->size(); ++i) { - process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); + TypePtr ith_rhs_type = rhs_type_tuple && i < rhs_type_tuple->size() ? rhs_type_tuple->items[i] : TypeDataUnknown::create(); + process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), ith_rhs_type, out_flow); types_list.push_back(lhs_tuple->get_item(i)->inferred_type); } assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); return; } - // `_ = rhs` - if (lhs->type == ast_underscore) { - assign_inferred_type(lhs, TypeDataUnknown::create()); + // `(v) = (rhs)`, just surrounded by parenthesis + if (auto lhs_par = lhs->try_as()) { + process_assignment_lhs_after_infer_rhs(lhs_par->get_expr(), rhs_type, out_flow); + assign_inferred_type(lhs, lhs_par->get_expr()); return; } - // here is something unhandled like `a.0 = rhs`, run regular inferring on rhs - // for something strange like `f() = rhs` type inferring will pass, but will fail later - infer_any_expr(lhs, rhs_type); - if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { - err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); + // here is `v = rhs` (just assignment, not `var v = rhs`) / `a.0 = rhs` / `getObj(z=f()).0 = rhs` etc. + // for instance, `tensorVar.0 = rhs` / `obj.field = rhs` has already checked index correctness while inferring lhs + // for strange lhs like `f() = rhs` type inferring (and later checking) will pass, but will fail lvalue check later + if (SinkExpression s_expr = extract_sink_expression_from_vertex(lhs)) { + TypePtr lhs_declared_type = calc_declared_type_before_smart_cast(lhs); + TypePtr smart_casted_type = calc_smart_cast_type_on_assignment(lhs_declared_type, rhs_type); + out_flow.register_known_type(s_expr, smart_casted_type); + assign_inferred_type(lhs, lhs_declared_type); } } - void infer_set_assign(V v) { + ExprFlow infer_set_assign(V v, FlowContext&& flow, bool used_as_condition) { AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(lhs); - infer_any_expr(rhs, lhs->inferred_type); + ExprFlow after_lhs = infer_any_expr(lhs, std::move(flow), false); + FlowContext rhs_flow = std::move(after_lhs.out_flow); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(rhs_flow), false, lhs->inferred_type); // almost all operators implementation is hardcoded by built-in functions `_+_` and similar std::string_view builtin_func = v->operator_name; // "+" for operator += - switch (v->tok) { - // &= |= ^= are "overloaded" both for integers and booleans, (int &= bool) is NOT allowed - case tok_set_bitwise_and: - case tok_set_bitwise_or: - case tok_set_bitwise_xor: { - bool both_int = expect_integer(lhs) && expect_integer(rhs); - bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); - if (!both_int && !both_bool) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } - break; - } - // others are mathematical: += *= ... - default: - if (!expect_integer(lhs) || !expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } - } - assign_inferred_type(v, lhs); if (!builtin_func.empty()) { - const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); - tolk_assert(builtin_sym); + FunctionPtr builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->try_as(); v->mutate()->assign_fun_ref(builtin_sym); } + + return ExprFlow(std::move(after_rhs.out_flow), used_as_condition); } - void infer_unary_operator(V v) { + ExprFlow infer_unary_operator(V v, FlowContext&& flow, bool used_as_condition) { AnyExprV rhs = v->get_rhs(); - infer_any_expr(rhs); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(flow), used_as_condition); // all operators implementation is hardcoded by built-in functions `~_` and similar std::string_view builtin_func = v->operator_name; @@ -646,154 +480,217 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { case tok_minus: case tok_plus: case tok_bitwise_not: - if (!expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); - } assign_inferred_type(v, TypeDataInt::create()); break; case tok_logical_not: - if (expect_boolean(rhs)) { + if (rhs->inferred_type == TypeDataBool::create()) { builtin_func = "!b"; // "overloaded" for bool - } else if (!expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); } assign_inferred_type(v, TypeDataBool::create()); + std::swap(after_rhs.false_flow, after_rhs.true_flow); break; default: tolk_assert(false); } - if (!builtin_func.empty()) { - const FunctionData* builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->as(); - tolk_assert(builtin_sym); - v->mutate()->assign_fun_ref(builtin_sym); - } + FunctionPtr builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->try_as(); + v->mutate()->assign_fun_ref(builtin_sym); + + return after_rhs; } - void infer_binary_operator(V v) { + ExprFlow infer_binary_operator(V v, FlowContext&& flow, bool used_as_condition) { AnyExprV lhs = v->get_lhs(); AnyExprV rhs = v->get_rhs(); - infer_any_expr(lhs); - infer_any_expr(rhs); // almost all operators implementation is hardcoded by built-in functions `_+_` and similar std::string_view builtin_func = v->operator_name; switch (v->tok) { - // == != can compare both integers and booleans, (int == bool) is NOT allowed + // comparison operators, returning bool case tok_eq: - case tok_neq: { - bool both_int = expect_integer(lhs) && expect_integer(rhs); - bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); - if (!both_int && !both_bool) { - if (lhs->inferred_type == rhs->inferred_type) { // compare slice with slice - v->error("type " + to_string(lhs) + " can not be compared with `== !=`"); - } else { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } - } - assign_inferred_type(v, TypeDataBool::create()); - break; - } - // < > can compare only integers + case tok_neq: case tok_lt: case tok_gt: case tok_leq: case tok_geq: - case tok_spaceship: { - if (!expect_integer(lhs) || !expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } + case tok_spaceship: + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + flow = infer_any_expr(rhs, std::move(flow), false).out_flow; assign_inferred_type(v, TypeDataBool::create()); break; - } - // & | ^ are "overloaded" both for integers and booleans, (int & bool) is NOT allowed + // & | ^ are "overloaded" both for integers and booleans case tok_bitwise_and: case tok_bitwise_or: - case tok_bitwise_xor: { - bool both_int = expect_integer(lhs) && expect_integer(rhs); - bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); - if (!both_int && !both_bool) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + case tok_bitwise_xor: + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + flow = infer_any_expr(rhs, std::move(flow), false).out_flow; + if (lhs->inferred_type == TypeDataBool::create() && rhs->inferred_type == TypeDataBool::create()) { + assign_inferred_type(v, TypeDataBool::create()); + } else { + assign_inferred_type(v, TypeDataInt::create()); } assign_inferred_type(v, rhs); // (int & int) is int, (bool & bool) is bool break; - } - // && || can work with integers and booleans, (int && bool) is allowed - case tok_logical_and: - case tok_logical_or: { - bool lhs_ok = expect_integer(lhs) || expect_boolean(lhs); - bool rhs_ok = expect_integer(rhs) || expect_boolean(rhs); - if (!lhs_ok || !rhs_ok) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } + // && || result in booleans, but building flow facts is tricky due to short-circuit + case tok_logical_and: { + ExprFlow after_lhs = infer_any_expr(lhs, std::move(flow), true); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(after_lhs.true_flow), true); assign_inferred_type(v, TypeDataBool::create()); - builtin_func = {}; // no built-in functions, logical operators are expressed as IFs at IR level - break; + if (!used_as_condition) { + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.false_flow), std::move(after_rhs.out_flow)); + return ExprFlow(std::move(out_flow), false); + } + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.out_flow), std::move(after_rhs.out_flow)); + FlowContext true_flow = std::move(after_rhs.true_flow); + FlowContext false_flow = FlowContext::merge_flow(std::move(after_lhs.false_flow), std::move(after_rhs.false_flow)); + return ExprFlow(std::move(out_flow), std::move(true_flow), std::move(false_flow)); + } + case tok_logical_or: { + ExprFlow after_lhs = infer_any_expr(lhs, std::move(flow), true); + ExprFlow after_rhs = infer_any_expr(rhs, std::move(after_lhs.false_flow), true); + assign_inferred_type(v, TypeDataBool::create()); + if (!used_as_condition) { + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.true_flow), std::move(after_rhs.out_flow)); + return ExprFlow(std::move(after_rhs.out_flow), false); + } + FlowContext out_flow = FlowContext::merge_flow(std::move(after_lhs.out_flow), std::move(after_rhs.out_flow)); + FlowContext true_flow = FlowContext::merge_flow(std::move(after_lhs.true_flow), std::move(after_rhs.true_flow)); + FlowContext false_flow = std::move(after_rhs.false_flow); + return ExprFlow(std::move(out_flow), std::move(true_flow), std::move(false_flow)); } // others are mathematical: + * ... default: - if (!expect_integer(lhs) || !expect_integer(rhs)) { - fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); - } + flow = infer_any_expr(lhs, std::move(flow), false).out_flow; + flow = infer_any_expr(rhs, std::move(flow), false).out_flow; assign_inferred_type(v, TypeDataInt::create()); } if (!builtin_func.empty()) { - const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); - tolk_assert(builtin_sym); + FunctionPtr builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->try_as(); v->mutate()->assign_fun_ref(builtin_sym); } + + return ExprFlow(std::move(flow), used_as_condition); } - void infer_ternary_operator(V v, TypePtr hint) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); + ExprFlow infer_ternary_operator(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond.get_always_true_false_state()); + + ExprFlow after_true = infer_any_expr(v->get_when_true(), std::move(after_cond.true_flow), used_as_condition, hint); + ExprFlow after_false = infer_any_expr(v->get_when_false(), std::move(after_cond.false_flow), used_as_condition, hint); + + if (v->get_cond()->is_always_true) { + assign_inferred_type(v, v->get_when_true()); + return after_true; + } + if (v->get_cond()->is_always_false) { + assign_inferred_type(v, v->get_when_false()); + return after_false; } - infer_any_expr(v->get_when_true(), hint); - infer_any_expr(v->get_when_false(), hint); TypeInferringUnifyStrategy tern_type; tern_type.unify_with(v->get_when_true()->inferred_type); if (!tern_type.unify_with(v->get_when_false()->inferred_type)) { - v->error("types of ternary branches are incompatible"); + fire(cur_f, v->loc, "types of ternary branches are incompatible: " + to_string(v->get_when_true()) + " and " + to_string(v->get_when_false())); } assign_inferred_type(v, tern_type.get_result()); + + FlowContext out_flow = FlowContext::merge_flow(std::move(after_true.out_flow), std::move(after_false.out_flow)); + return ExprFlow(std::move(out_flow), std::move(after_true.true_flow), std::move(after_false.false_flow)); } - void infer_cast_as_operator(V v) { + ExprFlow infer_cast_as_operator(V v, FlowContext&& flow, bool used_as_condition) { // for `expr as `, use this type for hint, so that `t.tupleAt(0) as int` is ok - infer_any_expr(v->get_expr(), v->cast_to_type); - if (!v->get_expr()->inferred_type->can_be_casted_with_as_operator(v->cast_to_type)) { - v->error("type " + to_string(v->get_expr()) + " can not be cast to " + to_string(v->cast_to_type)); - } + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), false, v->cast_to_type); assign_inferred_type(v, v->cast_to_type); + + if (!used_as_condition) { + return after_expr; + } + return ExprFlow(std::move(after_expr.out_flow), true); } - void infer_parenthesized(V v, TypePtr hint) { - infer_any_expr(v->get_expr(), hint); + ExprFlow infer_is_null_check(V v, FlowContext&& flow, bool used_as_condition) { + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), false); + assign_inferred_type(v, TypeDataBool::create()); + + TypePtr expr_type = v->get_expr()->inferred_type; + TypePtr non_null_type = calculate_type_subtract_null(expr_type); + if (expr_type == TypeDataNullLiteral::create()) { // `expr == null` is always true + v->mutate()->assign_always_true_or_false(v->is_negated ? 2 : 1); + } else if (non_null_type == TypeDataNever::create()) { // `expr == null` is always false + v->mutate()->assign_always_true_or_false(v->is_negated ? 1 : 2); + } else { + v->mutate()->assign_always_true_or_false(0); + } + + if (!used_as_condition) { + return after_expr; + } + + FlowContext true_flow = after_expr.out_flow.clone(); + FlowContext false_flow = after_expr.out_flow.clone(); + if (SinkExpression s_expr = extract_sink_expression_from_vertex(v->get_expr())) { + if (v->is_always_true) { + false_flow.mark_unreachable(UnreachableKind::CantHappen); + false_flow.register_known_type(s_expr, TypeDataNever::create()); + } else if (v->is_always_false) { + true_flow.mark_unreachable(UnreachableKind::CantHappen); + true_flow.register_known_type(s_expr, TypeDataNever::create()); + } else if (!v->is_negated) { + true_flow.register_known_type(s_expr, TypeDataNullLiteral::create()); + false_flow.register_known_type(s_expr, non_null_type); + } else { + true_flow.register_known_type(s_expr, non_null_type); + false_flow.register_known_type(s_expr, TypeDataNullLiteral::create()); + } + } + return ExprFlow(std::move(after_expr.out_flow), std::move(true_flow), std::move(false_flow)); + } + + ExprFlow infer_not_null_operator(V v, FlowContext&& flow, bool used_as_condition) { + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), false); + + if (const auto* as_nullable = v->get_expr()->inferred_type->try_as()) { + assign_inferred_type(v, as_nullable->inner); + } else { + assign_inferred_type(v, v->get_expr()); + } + + if (!used_as_condition) { + return after_expr; + } + return ExprFlow(std::move(after_expr.out_flow), true); + } + + ExprFlow infer_parenthesized(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { + ExprFlow after_expr = infer_any_expr(v->get_expr(), std::move(flow), used_as_condition, hint); assign_inferred_type(v, v->get_expr()); + return after_expr; } - static void infer_reference(V v) { - if (const auto* var_ref = v->sym->try_as()) { - assign_inferred_type(v, var_ref->declared_type); + ExprFlow infer_reference(V v, FlowContext&& flow, bool used_as_condition) { + if (LocalVarPtr var_ref = v->sym->try_as()) { + TypePtr declared_or_smart_casted = flow.smart_cast_if_exists(SinkExpression(var_ref)); + tolk_assert(declared_or_smart_casted != nullptr); // all local vars are presented in flow + assign_inferred_type(v, declared_or_smart_casted); - } else if (const auto* const_ref = v->sym->try_as()) { + } else if (GlobalConstPtr const_ref = v->sym->try_as()) { assign_inferred_type(v, const_ref->is_int_const() ? TypeDataInt::create() : TypeDataSlice::create()); - } else if (const auto* glob_ref = v->sym->try_as()) { + } else if (GlobalVarPtr glob_ref = v->sym->try_as()) { + // there are no smart casts for globals, it's a way of preventing reading one global multiple times, it costs gas assign_inferred_type(v, glob_ref->declared_type); - } else if (const auto* fun_ref = v->sym->try_as()) { + } else if (FunctionPtr fun_ref = v->sym->try_as()) { // it's `globalF` / `globalF` - references to functions used as non-call V v_instantiationTs = v->get_instantiationTs(); if (fun_ref->is_generic_function() && !v_instantiationTs) { // `genericFn` is invalid as non-call, can't be used without - v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + fire(cur_f, v->loc, "can not use a generic function " + to_string(fun_ref) + " as non-call"); } else if (fun_ref->is_generic_function()) { // `genericFn` is valid, it's a reference to instantiation @@ -801,15 +698,15 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); v->mutate()->assign_sym(fun_ref); - } else if (UNLIKELY(v_instantiationTs != nullptr)) { + } else if (v_instantiationTs != nullptr && !fun_ref->is_instantiation_of_generic_function()) { // non-generic function referenced like `return beginCell;` - v_instantiationTs->error("not generic function used with generic T"); + fire(cur_f, v_instantiationTs->loc, "not generic function used with generic T"); } fun_ref->mutate()->assign_is_used_as_noncall(); get_or_infer_return_type(fun_ref); assign_inferred_type(v, fun_ref->inferred_full_type); - return; + return ExprFlow(std::move(flow), used_as_condition); } else { tolk_assert(false); @@ -817,16 +714,17 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // for non-functions: `local_var` and similar not allowed if (UNLIKELY(v->has_instantiationTs())) { - v->get_instantiationTs()->error("generic T not expected here"); + fire(cur_f, v->get_instantiationTs()->loc, "generic T not expected here"); } + return ExprFlow(std::move(flow), used_as_condition); } // given `genericF` / `t.tupleFirst` (the user manually specified instantiation Ts), // validate and collect them // returns: [int, slice] / [cell] - static std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, const FunctionData* fun_ref, V instantiationT_list) { + std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, FunctionPtr fun_ref, V instantiationT_list) const { if (fun_ref->genericTs->size() != instantiationT_list->get_items().size()) { - throw ParseError(loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); + fire(cur_f, loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); } std::vector substitutions; @@ -844,30 +742,27 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // example: was `t.tuplePush(2)`, read , instantiate `tuplePush` (will later fail type check) // example: was `var cb = t.tupleFirst;` (used as reference, as non-call), instantiate `tupleFirst` // returns fun_ref to instantiated function - static const FunctionData* check_and_instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, std::vector&& substitutionTs) { + FunctionPtr check_and_instantiate_generic_function(SrcLocation loc, FunctionPtr fun_ref, std::vector&& substitutionTs) const { // T for asm function must be a TVM primitive (width 1), otherwise, asm would act incorrectly if (fun_ref->is_asm_function() || fun_ref->is_builtin_function()) { for (int i = 0; i < static_cast(substitutionTs.size()); ++i) { - if (substitutionTs[i]->calc_width_on_stack() != 1) { - fire_error_calling_asm_function_with_non1_stack_width_arg(loc, fun_ref, substitutionTs, i); + if (substitutionTs[i]->get_width_on_stack() != 1) { + fire_error_calling_asm_function_with_non1_stack_width_arg(cur_f, loc, fun_ref, substitutionTs, i); } } } std::string inst_name = generate_instantiated_name(fun_ref->name, substitutionTs); - try { - // make deep clone of `f` with substitutionTs - // (if `f` was already instantiated, it will be immediately returned from a symbol table) - return instantiate_generic_function(loc, fun_ref, inst_name, std::move(substitutionTs)); - } catch (const ParseError& ex) { - throw ParseError(ex.where, "while instantiating generic function `" + inst_name + "` at " + loc.to_string() + ": " + ex.message); - } + // make deep clone of `f` with substitutionTs + // (if `f` was already instantiated, it will be immediately returned from a symbol table) + return instantiate_generic_function(loc, fun_ref, inst_name, std::move(substitutionTs)); } - void infer_dot_access(V v, TypePtr hint) { + ExprFlow infer_dot_access(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { // it's NOT a method call `t.tupleSize()` (since such cases are handled by infer_function_call) // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) - infer_any_expr(v->get_obj()); + flow = infer_any_expr(v->get_obj(), std::move(flow), false).out_flow; + TypePtr obj_type = v->get_obj()->inferred_type; // our goal is to fill v->target knowing type of obj V v_ident = v->get_identifier(); // field/method name vertex @@ -880,49 +775,64 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { int index_at = std::stoi(std::string(field_name)); if (const auto* t_tensor = obj_type->try_as()) { if (index_at >= t_tensor->size()) { - v_ident->error("invalid tensor index, expected 0.." + std::to_string(t_tensor->items.size() - 1)); + fire(cur_f, v_ident->loc, "invalid tensor index, expected 0.." + std::to_string(t_tensor->items.size() - 1)); } v->mutate()->assign_target(index_at); - assign_inferred_type(v, t_tensor->items[index_at]); - return; + TypePtr inferred_type = t_tensor->items[index_at]; + if (SinkExpression s_expr = extract_sink_expression_from_vertex(v)) { + if (TypePtr smart_casted = flow.smart_cast_if_exists(s_expr)) { + inferred_type = smart_casted; + } + } + assign_inferred_type(v, inferred_type); + return ExprFlow(std::move(flow), used_as_condition); } if (const auto* t_tuple = obj_type->try_as()) { if (index_at >= t_tuple->size()) { - v_ident->error("invalid tuple index, expected 0.." + std::to_string(t_tuple->items.size() - 1)); + fire(cur_f, v_ident->loc, "invalid tuple index, expected 0.." + std::to_string(t_tuple->items.size() - 1)); } v->mutate()->assign_target(index_at); - assign_inferred_type(v, t_tuple->items[index_at]); - return; + TypePtr inferred_type = t_tuple->items[index_at]; + if (SinkExpression s_expr = extract_sink_expression_from_vertex(v)) { + if (TypePtr smart_casted = flow.smart_cast_if_exists(s_expr)) { + inferred_type = smart_casted; + } + } + assign_inferred_type(v, inferred_type); + return ExprFlow(std::move(flow), used_as_condition); } if (obj_type->try_as()) { - if (hint == nullptr) { - fire_error_cannot_deduce_untyped_tuple_access(v->loc, index_at); - } - if (hint->calc_width_on_stack() != 1) { - fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->loc, hint); + TypePtr item_type = nullptr; + if (v->is_lvalue && !hint) { // left side of assignment + item_type = TypeDataUnknown::create(); + } else { + if (hint == nullptr) { + fire_error_cannot_deduce_untyped_tuple_access(cur_f, v->loc, index_at); + } + item_type = hint; } v->mutate()->assign_target(index_at); - assign_inferred_type(v, hint); - return; + assign_inferred_type(v, item_type); + return ExprFlow(std::move(flow), used_as_condition); } - v_ident->error("type " + to_string(obj_type) + " is not indexable"); + fire(cur_f, v_ident->loc, "type " + to_string(obj_type) + " is not indexable"); } // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` const Symbol* sym = lookup_global_symbol(field_name); - const FunctionData* fun_ref = sym ? sym->try_as() : nullptr; + FunctionPtr fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref) { - v_ident->error("non-existing field `" + static_cast(field_name) + "` of type " + to_string(obj_type)); + fire(cur_f, v_ident->loc, "non-existing field `" + static_cast(field_name) + "` of type " + to_string(obj_type)); } // `t.tupleSize` is ok, `cs.tupleSize` not if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(obj_type)) { - v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0]) + " with object of type " + to_string(obj_type)); + fire(cur_f, v_ident->loc, "referencing a method for " + to_string(fun_ref->parameters[0].declared_type) + " with object of type " + to_string(obj_type)); } if (fun_ref->is_generic_function() && !v_instantiationTs) { // `genericFn` and `t.tupleAt` are invalid as non-call, they can't be used without - v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + fire(cur_f, v->loc, "can not use a generic function " + to_string(fun_ref) + " as non-call"); } else if (fun_ref->is_generic_function()) { // `t.tupleAt` is valid, it's a reference to instantiation @@ -931,27 +841,28 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { } else if (UNLIKELY(v_instantiationTs != nullptr)) { // non-generic method referenced like `var cb = c.cellHash;` - v_instantiationTs->error("not generic function used with generic T"); + fire(cur_f, v_instantiationTs->loc, "not generic function used with generic T"); } fun_ref->mutate()->assign_is_used_as_noncall(); v->mutate()->assign_target(fun_ref); get_or_infer_return_type(fun_ref); assign_inferred_type(v, fun_ref->inferred_full_type); // type of `t.tupleSize` is TypeDataFunCallable + return ExprFlow(std::move(flow), used_as_condition); } - void infer_function_call(V v, TypePtr hint) { + ExprFlow infer_function_call(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { AnyExprV callee = v->get_callee(); // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` int delta_self = 0; AnyExprV dot_obj = nullptr; - const FunctionData* fun_ref = nullptr; + FunctionPtr fun_ref = nullptr; V v_instantiationTs = nullptr; if (auto v_ref = callee->try_as()) { // `globalF()` / `globalF()` / `local_var()` / `SOME_CONST()` - fun_ref = v_ref->sym->try_as(); // not null for `globalF` + fun_ref = v_ref->sym->try_as(); // not null for `globalF` v_instantiationTs = v_ref->get_instantiationTs(); // present for `globalF()` } else if (auto v_dot = callee->try_as()) { @@ -960,19 +871,19 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { delta_self = 1; dot_obj = v_dot->get_obj(); v_instantiationTs = v_dot->get_instantiationTs(); // present for `obj.someMethod()` - infer_any_expr(dot_obj); + flow = infer_any_expr(dot_obj, std::move(flow), false).out_flow; // it can be indexed access (`tensorVar.0()`, `tupleVar.1()`) or a method (`t.tupleSize()`) std::string_view field_name = v_dot->get_field_name(); if (field_name[0] >= '0' && field_name[0] <= '9') { // indexed access `ab.2()`, then treat `ab.2` just like an expression, fun_ref remains nullptr - // infer_dot_access() will be called for a callee, it will check type, index correctness, etc. + // infer_dot_access() will be called for a callee, it will check index correctness } else { // for now, Tolk doesn't have fields and object-scoped methods; `t.tupleSize` is a global function `tupleSize` const Symbol* sym = lookup_global_symbol(field_name); - fun_ref = sym ? sym->try_as() : nullptr; + fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref) { - v_dot->get_identifier()->error("non-existing method `" + static_cast(field_name) + "` of type " + to_string(dot_obj)); + fire(cur_f, v_dot->get_identifier()->loc, "non-existing method `" + static_cast(field_name) + "` of type " + to_string(dot_obj)); } } @@ -981,262 +892,305 @@ class InferCheckTypesAndCallsAndFieldsVisitor final { // fun_ref remains nullptr } - // infer argument types, looking at fun_ref's parameters as hints - for (int i = 0; i < v->get_num_args(); ++i) { - TypePtr param_type = fun_ref && i < fun_ref->get_num_params() - delta_self ? fun_ref->parameters[delta_self + i].declared_type : nullptr; - auto arg_i = v->get_arg(i); - infer_any_expr(arg_i->get_expr(), param_type && !param_type->has_genericT_inside() ? param_type : nullptr); - assign_inferred_type(arg_i, arg_i->get_expr()); - } - // handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` / `tensorVar.0()` if (!fun_ref) { - // treat callee like a usual expression, which must have "callable" inferred type - infer_any_expr(callee); + // treat callee like a usual expression + flow = infer_any_expr(callee, std::move(flow), false).out_flow; + // it must have "callable" inferred type const TypeDataFunCallable* f_callable = callee->inferred_type->try_as(); if (!f_callable) { // `5()` / `SOME_CONST()` / `null()` - v->error("calling a non-function"); + fire(cur_f, v->loc, "calling a non-function " + to_string(callee->inferred_type)); } - // check arguments count and their types + // check arguments count (their types will be checked in a later pipe) if (v->get_num_args() != static_cast(f_callable->params_types.size())) { - v->error("expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); + fire(cur_f, v->loc, "expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); } for (int i = 0; i < v->get_num_args(); ++i) { - if (!f_callable->params_types[i]->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { - v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(f_callable->params_types[i])); - } + auto arg_i = v->get_arg(i)->get_expr(); + flow = infer_any_expr(arg_i, std::move(flow), false, f_callable->params_types[i]).out_flow; + assign_inferred_type(v->get_arg(i), arg_i); } v->mutate()->assign_fun_ref(nullptr); // no fun_ref to a global function assign_inferred_type(v, f_callable->return_type); - return; + return ExprFlow(std::move(flow), used_as_condition); } // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) + // we're going to iterate over passed arguments, and (if generic) infer substitutionTs + // at first, check arguments count (Tolk doesn't have optional parameters, so just compare counts) + int n_arguments = v->get_num_args() + delta_self; + int n_parameters = fun_ref->get_num_params(); + if (!n_parameters && dot_obj) { + fire(cur_f, v->loc, "`" + fun_ref->name + "` has no parameters and can not be called as method"); + } + if (n_parameters < n_arguments) { + fire(cur_f, v->loc, "too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + if (n_arguments < n_parameters) { + fire(cur_f, v->loc, "too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + + // now, for every passed argument, we need to infer its type + // for regular functions, it's obvious + // but for generic functions, we need to infer type arguments (substitutionTs) on the fly + // (unless Ts are specified by a user like `f(args)` / `t.tupleAt()`, take them) + GenericSubstitutionsDeduceForCall* deducingTs = fun_ref->is_generic_function() ? new GenericSubstitutionsDeduceForCall(fun_ref) : nullptr; + if (deducingTs && v_instantiationTs) { + deducingTs->provide_manually_specified(collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs)); + } + + // loop over every argument, for `obj.method()` obj is the first one + // if genericT deducing has a conflict, ParseError is thrown + // note, that deducing Ts one by one is important to manage control flow (mutate params work like assignments) + // a corner case, e.g. `f(v1:T?, v2:T?)` and `f(null,2)` will fail on first argument, won't try the second one + if (dot_obj) { + const LocalVarData& param_0 = fun_ref->parameters[0]; + TypePtr param_type = param_0.declared_type; + if (param_type->has_genericT_inside()) { + param_type = deducingTs->auto_deduce_from_argument(cur_f, dot_obj->loc, param_type, dot_obj->inferred_type); + } + if (param_0.is_mutate_parameter() && dot_obj->inferred_type != param_type) { + if (SinkExpression s_expr = extract_sink_expression_from_vertex(dot_obj)) { + assign_inferred_type(dot_obj, calc_declared_type_before_smart_cast(dot_obj)); + flow.register_known_type(s_expr, param_type); + } + } + } + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& param_i = fun_ref->parameters[delta_self + i]; + AnyExprV arg_i = v->get_arg(i)->get_expr(); + TypePtr param_type = param_i.declared_type; + if (param_type->has_genericT_inside() && deducingTs->is_manually_specified()) { // `f(a)` + param_type = deducingTs->replace_by_manually_specified(param_type); + } + if (param_type->has_genericT_inside()) { // `f(a)` where f is generic: use `a` to infer param type + // then arg_i is inferred without any hint + flow = infer_any_expr(arg_i, std::move(flow), false).out_flow; + param_type = deducingTs->auto_deduce_from_argument(cur_f, arg_i->loc, param_type, arg_i->inferred_type); + } else { + // param_type is hint, helps infer arg_i + flow = infer_any_expr(arg_i, std::move(flow), false, param_type).out_flow; + } + assign_inferred_type(v->get_arg(i), arg_i); // arg itself is an expression + if (param_i.is_mutate_parameter() && arg_i->inferred_type != param_type) { + if (SinkExpression s_expr = extract_sink_expression_from_vertex(arg_i)) { + assign_inferred_type(arg_i, calc_declared_type_before_smart_cast(arg_i)); + flow.register_known_type(s_expr, param_type); + } + } + } + // if it's a generic function `f`, we need to instantiate it, like `f` // same for generic methods `t.tupleAt`, need to achieve `t.tupleAt` - if (fun_ref->is_generic_function() && v_instantiationTs) { - // if Ts are specified by a user like `f(args)` / `t.tupleAt()`, take them - std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); - fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); - - } else if (fun_ref->is_generic_function()) { - // if `f` called like `f(args)`, deduce T from arg types - std::vector arg_types; - arg_types.reserve(delta_self + v->get_num_args()); - if (dot_obj) { - arg_types.push_back(dot_obj->inferred_type); + if (fun_ref->is_generic_function()) { + // if `f(args)` was called, Ts were inferred; check that all of them are known + int idx = deducingTs->get_first_not_deduced_idx(); + if (idx != -1 && hint && fun_ref->declared_return_type->has_genericT_inside()) { + // example: `t.tupleFirst()`, T doesn't depend on arguments, but is determined by return type + // if used like `var x: int = t.tupleFirst()` / `t.tupleFirst() as int` / etc., use hint + deducingTs->auto_deduce_from_argument(cur_f, v->loc, fun_ref->declared_return_type, hint); + idx = deducingTs->get_first_not_deduced_idx(); } - for (int i = 0; i < v->get_num_args(); ++i) { - arg_types.push_back(v->get_arg(i)->inferred_type); + if (idx != -1) { + fire(cur_f, v->loc, "can not deduce " + fun_ref->genericTs->get_nameT(idx)); } - - td::Result> deduced = deduce_substitutionTs_on_generic_func_call(fun_ref, std::move(arg_types), hint); - if (deduced.is_error()) { - v->error(deduced.error().message().str() + " for generic function " + to_string(fun_ref)); - } - fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, deduced.move_as_ok()); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, deducingTs->flush()); + delete deducingTs; } else if (UNLIKELY(v_instantiationTs != nullptr)) { // non-generic function/method called with type arguments, like `c.cellHash()` / `beginCell()` - v_instantiationTs->error("calling a not generic function with generic T"); + fire(cur_f, v_instantiationTs->loc, "calling a not generic function with generic T"); } v->mutate()->assign_fun_ref(fun_ref); // since for `t.tupleAt()`, infer_dot_access() not called for callee = "t.tupleAt", assign its target here if (v->is_dot_call()) { v->get_callee()->as()->mutate()->assign_target(fun_ref); - v->get_callee()->as()->mutate()->assign_inferred_type(fun_ref->inferred_full_type); } - // check arguments count and their types - check_function_arguments(fun_ref, v->get_arg_list(), dot_obj); // get return type either from user-specified declaration or infer here on demand traversing its body get_or_infer_return_type(fun_ref); TypePtr inferred_type = dot_obj && fun_ref->does_return_self() ? dot_obj->inferred_type : fun_ref->inferred_return_type; assign_inferred_type(v, inferred_type); assign_inferred_type(callee, fun_ref->inferred_full_type); - if (fun_ref->is_builtin_function() && fun_ref->name[0] == '_') { - handle_possible_compiler_internal_call(current_function, v); + if (inferred_type == TypeDataNever::create()) { + flow.mark_unreachable(UnreachableKind::CallNeverReturnFunction); } // note, that mutate params don't affect typing, they are handled when converting to IR + return ExprFlow(std::move(flow), used_as_condition); } - void infer_tensor(V v, TypePtr hint) { + ExprFlow infer_tensor(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { const TypeDataTensor* tensor_hint = hint ? hint->try_as() : nullptr; std::vector types_list; types_list.reserve(v->get_items().size()); for (int i = 0; i < v->size(); ++i) { AnyExprV item = v->get_item(i); - infer_any_expr(item, tensor_hint && i < tensor_hint->size() ? tensor_hint->items[i] : nullptr); + flow = infer_any_expr(item, std::move(flow), false, tensor_hint && i < tensor_hint->size() ? tensor_hint->items[i] : nullptr).out_flow; types_list.emplace_back(item->inferred_type); } assign_inferred_type(v, TypeDataTensor::create(std::move(types_list))); + return ExprFlow(std::move(flow), used_as_condition); } - void infer_typed_tuple(V v, TypePtr hint) { + ExprFlow infer_typed_tuple(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { const TypeDataTypedTuple* tuple_hint = hint ? hint->try_as() : nullptr; std::vector types_list; types_list.reserve(v->get_items().size()); for (int i = 0; i < v->size(); ++i) { AnyExprV item = v->get_item(i); - infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); - if (item->inferred_type->calc_width_on_stack() != 1) { - fire_error_cannot_put_non1_stack_width_arg_to_tuple(v->get_item(i)->loc, item->inferred_type); - } + flow = infer_any_expr(item, std::move(flow), false, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr).out_flow; types_list.emplace_back(item->inferred_type); } assign_inferred_type(v, TypeDataTypedTuple::create(std::move(types_list))); + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_null_keyword(V v) { + static ExprFlow infer_null_keyword(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataNullLiteral::create()); + + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_underscore(V v, TypePtr hint) { + static ExprFlow infer_underscore(V v, FlowContext&& flow, bool used_as_condition, TypePtr hint) { // if execution is here, underscore is either used as lhs of assignment, or incorrectly, like `f(_)` // more precise is to always set unknown here, but for incorrect usages, instead of an error // "can not pass unknown to X" would better be an error it can't be used as a value, at later steps assign_inferred_type(v, hint ? hint : TypeDataUnknown::create()); + return ExprFlow(std::move(flow), used_as_condition); } - static void infer_empty_expression(V v) { + static ExprFlow infer_empty_expression(V v, FlowContext&& flow, bool used_as_condition) { assign_inferred_type(v, TypeDataUnknown::create()); + return ExprFlow(std::move(flow), used_as_condition); } - void process_sequence(V v) { + FlowContext process_sequence(V v, FlowContext&& flow) { + // we'll print a warning if after some statement, control flow became unreachable + // (but don't print a warning if it's already unreachable, for example we're inside always-false if) + bool initially_unreachable = flow.is_unreachable(); for (AnyV item : v->get_items()) { - process_any_statement(item); + if (flow.is_unreachable() && !initially_unreachable && !v->first_unreachable && item->type != ast_empty_statement) { + v->mutate()->assign_first_unreachable(item); // a warning will be printed later, after type checking + } + flow = process_any_statement(item, std::move(flow)); } + return flow; } - static bool is_expr_valid_as_return_self(AnyExprV return_expr) { - // `return self` - if (return_expr->type == ast_reference && return_expr->as()->get_name() == "self") { - return true; - } - // `return self.someMethod()` - if (auto v_call = return_expr->try_as(); v_call && v_call->is_dot_call()) { - return v_call->fun_maybe && v_call->fun_maybe->does_return_self() && is_expr_valid_as_return_self(v_call->get_dot_obj()); - } - // `return cond ? ... : ...` - if (auto v_ternary = return_expr->try_as()) { - return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); - } - return false; - } - - void process_return_statement(V v) { + FlowContext process_return_statement(V v, FlowContext&& flow) { if (v->has_return_value()) { - infer_any_expr(v->get_return_value(), current_function->declared_return_type); + flow = infer_any_expr(v->get_return_value(), std::move(flow), false, cur_f->declared_return_type).out_flow; } else { assign_inferred_type(v->get_return_value(), TypeDataVoid::create()); } - if (current_function->does_return_self()) { - return_unifier.unify_with(current_function->parameters[0].declared_type); - if (!is_expr_valid_as_return_self(v->get_return_value())) { - v->error("invalid return from `self` function"); - } - return; - } + flow.mark_unreachable(UnreachableKind::ReturnStatement); - TypePtr expr_type = v->get_return_value()->inferred_type; - if (current_function->declared_return_type) { - if (!current_function->declared_return_type->can_rhs_be_assigned(expr_type)) { - v->get_return_value()->error("can not convert type " + to_string(expr_type) + " to return type " + to_string(current_function->declared_return_type)); - } - } else { - if (!return_unifier.unify_with(expr_type)) { - v->get_return_value()->error("can not unify type " + to_string(expr_type) + " with previous return type " + to_string(return_unifier.get_result())); - } + if (!cur_f->declared_return_type) { + return_statements.push_back(v->get_return_value()); // for future unification } + return flow; } - void process_if_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } - process_any_statement(v->get_if_body()); - process_any_statement(v->get_else_body()); + FlowContext process_if_statement(V v, FlowContext&& flow) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond.get_always_true_false_state()); + + FlowContext true_flow = process_any_statement(v->get_if_body(), std::move(after_cond.true_flow)); + FlowContext false_flow = process_any_statement(v->get_else_body(), std::move(after_cond.false_flow)); + + return FlowContext::merge_flow(std::move(true_flow), std::move(false_flow)); } - void process_repeat_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond)) { - cond->error("condition of `repeat` must be an integer, got " + to_string(cond)); - } - process_any_statement(v->get_body()); + FlowContext process_repeat_statement(V v, FlowContext&& flow) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), false); + + return process_any_statement(v->get_body(), std::move(after_cond.out_flow)); } - void process_while_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } - process_any_statement(v->get_body()); + FlowContext process_while_statement(V v, FlowContext&& flow) { + // loops are inferred twice, to merge body outcome with the state before the loop + // (a more correct approach would be not "twice", but "find a fixed point when state stop changing") + // also remember, we don't have a `break` statement, that's why when loop exits, condition became false + FlowContext loop_entry_facts = flow.clone(); + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + FlowContext body_out = process_any_statement(v->get_body(), std::move(after_cond.true_flow)); + // second time, to refine all types + flow = FlowContext::merge_flow(std::move(loop_entry_facts), std::move(body_out)); + ExprFlow after_cond2 = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond2.get_always_true_false_state()); + + process_any_statement(v->get_body(), std::move(after_cond2.true_flow)); + + return std::move(after_cond2.false_flow); } - void process_do_while_statement(V v) { - process_any_statement(v->get_body()); - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } + FlowContext process_do_while_statement(V v, FlowContext&& flow) { + // do while is also handled twice; read comments above + FlowContext loop_entry_facts = flow.clone(); + flow = process_any_statement(v->get_body(), std::move(flow)); + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + // second time + flow = FlowContext::merge_flow(std::move(loop_entry_facts), std::move(after_cond.true_flow)); + flow = process_any_statement(v->get_body(), std::move(flow)); + ExprFlow after_cond2 = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond2.get_always_true_false_state()); + + return std::move(after_cond2.false_flow); } - void process_throw_statement(V v) { - infer_any_expr(v->get_thrown_code()); - if (!expect_integer(v->get_thrown_code())) { - v->get_thrown_code()->error("excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); - } - infer_any_expr(v->get_thrown_arg()); - if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->calc_width_on_stack() != 1) { - v->get_thrown_arg()->error("can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); - } + FlowContext process_throw_statement(V v, FlowContext&& flow) { + flow = infer_any_expr(v->get_thrown_code(), std::move(flow), false).out_flow; + flow = infer_any_expr(v->get_thrown_arg(), std::move(flow), false).out_flow; + flow.mark_unreachable(UnreachableKind::ThrowStatement); + return flow; } - void process_assert_statement(V v) { - AnyExprV cond = v->get_cond(); - infer_any_expr(cond); - if (!expect_integer(cond) && !expect_boolean(cond)) { - cond->error("can not use " + to_string(cond) + " as a boolean condition"); - } - infer_any_expr(v->get_thrown_code()); - if (!expect_integer(v->get_thrown_code())) { - v->get_cond()->error("thrown excNo of `assert` must be an integer, got " + to_string(v->get_cond())); - } + FlowContext process_assert_statement(V v, FlowContext&& flow) { + ExprFlow after_cond = infer_any_expr(v->get_cond(), std::move(flow), true); + v->get_cond()->mutate()->assign_always_true_or_false(after_cond.get_always_true_false_state()); + + ExprFlow after_throw = infer_any_expr(v->get_thrown_code(), std::move(after_cond.false_flow), false); + return std::move(after_cond.true_flow); } - static void process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type) { + static FlowContext process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type, FlowContext&& flow) { if (auto v_ref = catch_var->try_as(); v_ref && v_ref->sym) { // not underscore - assign_inferred_type(v_ref->sym->as(), catch_var_type); + LocalVarPtr var_ref = v_ref->sym->try_as(); + assign_inferred_type(var_ref, catch_var_type); + flow.register_known_type(SinkExpression(var_ref), catch_var_type); } assign_inferred_type(catch_var, catch_var_type); + return flow; } - void process_try_catch_statement(V v) { - process_any_statement(v->get_try_body()); + FlowContext process_try_catch_statement(V v, FlowContext&& flow) { + FlowContext before_try = flow.clone(); + FlowContext try_end = process_any_statement(v->get_try_body(), std::move(flow)); // `catch` has exactly 2 variables: excNo and arg (when missing, they are implicit underscores) // `arg` is a curious thing, it can be any TVM primitive, so assign unknown to it - // hence, using `fInt(arg)` (int from parameter is a hint) or `arg as slice` works well + // hence, using `fInt(arg)` (int from parameter is a target type) or `arg as slice` works well // it's not truly correct, because `arg as (int,int)` also compiles, but can never happen, but let it be user responsibility + FlowContext catch_flow = std::move(before_try); tolk_assert(v->get_catch_expr()->size() == 2); std::vector types_list = {TypeDataInt::create(), TypeDataUnknown::create()}; - process_catch_variable(v->get_catch_expr()->get_item(0), types_list[0]); - process_catch_variable(v->get_catch_expr()->get_item(1), types_list[1]); + catch_flow = process_catch_variable(v->get_catch_expr()->get_item(0), types_list[0], std::move(catch_flow)); + catch_flow = process_catch_variable(v->get_catch_expr()->get_item(1), types_list[1], std::move(catch_flow)); assign_inferred_type(v->get_catch_expr(), TypeDataTensor::create(std::move(types_list))); - process_any_statement(v->get_catch_body()); + FlowContext catch_end = process_any_statement(v->get_catch_body(), std::move(catch_flow)); + return FlowContext::merge_flow(std::move(try_end), std::move(catch_end)); + } + + FlowContext process_expression_statement(AnyExprV v, FlowContext&& flow) { + ExprFlow after_v = infer_any_expr(v, std::move(flow), false); + return std::move(after_v.out_flow); } public: - static void assign_fun_full_type(const FunctionData* fun_ref, TypePtr inferred_return_type) { - // calculate function full type `fun(params) -> ret_type` + static void assign_fun_full_type(FunctionPtr fun_ref, TypePtr inferred_return_type) { + // calculate function full type `(params) -> ret_type` std::vector params_types; params_types.reserve(fun_ref->get_num_params()); for (const LocalVarData& param : fun_ref->parameters) { @@ -1245,18 +1199,43 @@ public: assign_inferred_type(fun_ref, inferred_return_type, TypeDataFunCallable::create(std::move(params_types), inferred_return_type)); } - void start_visiting_function(const FunctionData* fun_ref, V v_function) { + void start_visiting_function(FunctionPtr fun_ref, V v_function) { + TypePtr inferred_return_type = fun_ref->declared_return_type; if (fun_ref->is_code_function()) { - current_function = fun_ref; - process_any_statement(v_function->get_body()); - current_function = nullptr; + FlowContext body_start; + for (const LocalVarData& param : fun_ref->parameters) { + body_start.register_known_type(SinkExpression(¶m), param.declared_type); + } - if (fun_ref->is_implicit_return()) { - bool is_ok_with_void = fun_ref->declared_return_type - ? fun_ref->declared_return_type->can_rhs_be_assigned(TypeDataVoid::create()) - : return_unifier.unify_with_implicit_return_void(); - if (!is_ok_with_void || fun_ref->does_return_self()) { - throw ParseError(v_function->get_body()->as()->loc_end, "missing return"); + cur_f = fun_ref; + FlowContext body_end = process_any_statement(v_function->get_body(), std::move(body_start)); + cur_f = nullptr; + + if (!body_end.is_unreachable()) { + fun_ref->mutate()->assign_is_implicit_return(); + if (fun_ref->declared_return_type == TypeDataNever::create()) { // `never` can only be declared, it can't be inferred + fire(fun_ref, v_function->get_body()->as()->loc_end, "a function returning `never` can not have a reachable endpoint"); + } + } + + if (!fun_ref->declared_return_type) { + TypeInferringUnifyStrategy return_unifier; + if (fun_ref->does_return_self()) { + return_unifier.unify_with(fun_ref->parameters[0].declared_type); + } + for (AnyExprV return_value : return_statements) { + if (!return_unifier.unify_with(return_value->inferred_type)) { + fire(cur_f, return_value->loc, "can not unify type " + to_string(return_value) + " with previous return type " + to_string(return_unifier.get_result())); + } + } + if (!body_end.is_unreachable()) { + if (!return_unifier.unify_with_implicit_return_void()) { + fire(cur_f, v_function->get_body()->as()->loc_end, "missing return"); + } + } + inferred_return_type = return_unifier.get_result(); + if (inferred_return_type == nullptr && body_end.is_unreachable()) { + inferred_return_type = TypeDataVoid::create(); } } } else { @@ -1264,7 +1243,6 @@ public: tolk_assert(fun_ref->declared_return_type); } - TypePtr inferred_return_type = fun_ref->declared_return_type ? fun_ref->declared_return_type : return_unifier.get_result(); assign_fun_full_type(fun_ref, inferred_return_type); fun_ref->mutate()->assign_is_type_inferring_done(); } @@ -1272,13 +1250,13 @@ public: class LaunchInferTypesAndMethodsOnce final { public: - static bool should_visit_function(const FunctionData* fun_ref) { + static bool should_visit_function(FunctionPtr fun_ref) { // since inferring can be requested on demand, prevent second execution from a regular pipeline launcher return !fun_ref->is_type_inferring_done() && !fun_ref->is_generic_function(); } - static void start_visiting_function(const FunctionData* fun_ref, V v_function) { - InferCheckTypesAndCallsAndFieldsVisitor visitor; + static void start_visiting_function(FunctionPtr fun_ref, V v_function) { + InferTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, v_function); } }; @@ -1287,26 +1265,26 @@ public: // example: `fun f() { return g(); } fun g() { ... }` // when analyzing `f()`, we need to infer what fun_ref=g returns // (if `g` is generic, it was already instantiated, so fun_ref=g is here) -static void infer_and_save_return_type_of_function(const FunctionData* fun_ref) { - static std::vector called_stack; +static void infer_and_save_return_type_of_function(FunctionPtr fun_ref) { + static std::vector called_stack; tolk_assert(!fun_ref->is_generic_function() && !fun_ref->is_type_inferring_done()); // if `g` has return type declared, like `fun g(): int { ... }`, don't traverse its body if (fun_ref->declared_return_type) { - InferCheckTypesAndCallsAndFieldsVisitor::assign_fun_full_type(fun_ref, fun_ref->declared_return_type); + InferTypesAndCallsAndFieldsVisitor::assign_fun_full_type(fun_ref, fun_ref->declared_return_type); return; } // prevent recursion of untyped functions, like `fun f() { return g(); } fun g() { return f(); }` bool contains = std::find(called_stack.begin(), called_stack.end(), fun_ref) != called_stack.end(); if (contains) { - fun_ref->ast_root->error("could not infer return type of " + to_string(fun_ref) + ", because it appears in a recursive call chain; specify `: ` manually"); + fire(fun_ref, fun_ref->loc, "could not infer return type of " + to_string(fun_ref) + ", because it appears in a recursive call chain; specify `: ` manually"); } // dig into g's body; it's safe, since the compiler is single-threaded // on finish, fun_ref->inferred_return_type is filled, and won't be called anymore called_stack.push_back(fun_ref); - InferCheckTypesAndCallsAndFieldsVisitor visitor; + InferTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); called_stack.pop_back(); } @@ -1315,8 +1293,8 @@ void pipeline_infer_types_and_calls_and_fields() { visit_ast_of_all_functions(); } -void pipeline_infer_types_and_calls_and_fields(const FunctionData* fun_ref) { - InferCheckTypesAndCallsAndFieldsVisitor visitor; +void pipeline_infer_types_and_calls_and_fields(FunctionPtr fun_ref) { + InferTypesAndCallsAndFieldsVisitor visitor; visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); } diff --git a/tolk/pipe-optimize-boolean-expr.cpp b/tolk/pipe-optimize-boolean-expr.cpp index 03750256..c4c5d1dc 100644 --- a/tolk/pipe-optimize-boolean-expr.cpp +++ b/tolk/pipe-optimize-boolean-expr.cpp @@ -25,7 +25,6 @@ * * Example: `boolVar == true` -> `boolVar`. * Example: `!!boolVar` -> `boolVar`. - * Also in unwraps parenthesis inside if condition and similar: `assert(((x)), 404)` -> `assert(x, 404)` * * todo some day, replace && || with & | when it's safe (currently, && always produces IFs in Fift) * It's tricky to implement whether replacing is safe. @@ -35,13 +34,6 @@ namespace tolk { -static AnyExprV unwrap_parenthesis(AnyExprV v) { - while (v->type == ast_parenthesized_expression) { - v = v->as()->get_expr(); - } - return v; -} - struct OptimizerBooleanExpressionsReplacer final : ASTReplacerInFunctionBody { static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { auto v_int = createV(loc, std::move(intval), {}); @@ -61,7 +53,7 @@ struct OptimizerBooleanExpressionsReplacer final : ASTReplacerInFunctionBody { auto v_not = createV(loc, "!", tok_logical_not, rhs); v_not->assign_inferred_type(TypeDataBool::create()); v_not->assign_rvalue_true(); - v_not->assign_fun_ref(lookup_global_symbol("!b_")->as()); + v_not->assign_fun_ref(lookup_global_symbol("!b_")->try_as()); return v_not; } @@ -83,7 +75,7 @@ protected: auto v_neq = createV(v->loc, "!=", tok_neq, cond_not_not, v_zero); v_neq->mutate()->assign_rvalue_true(); v_neq->mutate()->assign_inferred_type(TypeDataBool::create()); - v_neq->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); + v_neq->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->try_as()); return v_neq; } } @@ -117,9 +109,6 @@ protected: AnyV replace(V v) override { parent::replace(v); - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, v->is_ifnot, unwrap_parenthesis(v->get_cond()), v->get_if_body(), v->get_else_body()); - } // `if (!x)` -> ifnot(x) while (auto v_cond_unary = v->get_cond()->try_as()) { @@ -128,39 +117,17 @@ protected: } v = createV(v->loc, !v->is_ifnot, v_cond_unary->get_rhs(), v->get_if_body(), v->get_else_body()); } - - return v; - } - - AnyV replace(V v) override { - parent::replace(v); - - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_body()); + // `if (x != null)` -> ifnot(x == null) + if (auto v_cond_isnull = v->get_cond()->try_as(); v_cond_isnull && v_cond_isnull->is_negated) { + v_cond_isnull->mutate()->assign_is_negated(!v_cond_isnull->is_negated); + v = createV(v->loc, !v->is_ifnot, v_cond_isnull, v->get_if_body(), v->get_else_body()); } - return v; - } - AnyV replace(V v) override { - parent::replace(v); - - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, v->get_body(), unwrap_parenthesis(v->get_cond())); - } - return v; - } - - AnyV replace(V v) override { - parent::replace(v); - - if (v->get_cond()->type == ast_parenthesized_expression) { - v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_thrown_code()); - } return v; } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; diff --git a/tolk/pipe-refine-lvalue-for-mutate.cpp b/tolk/pipe-refine-lvalue-for-mutate.cpp index 540d7413..a8b4f1ae 100644 --- a/tolk/pipe-refine-lvalue-for-mutate.cpp +++ b/tolk/pipe-refine-lvalue-for-mutate.cpp @@ -34,7 +34,7 @@ namespace tolk { GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_invalid_mutate_arg_passed(AnyExprV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { +static void fire_error_invalid_mutate_arg_passed(AnyExprV v, FunctionPtr fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { std::string arg_str(arg_expr->type == ast_reference ? arg_expr->as()->get_name() : "obj"); // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` @@ -60,7 +60,7 @@ static void fire_error_invalid_mutate_arg_passed(AnyExprV v, const FunctionData* class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBody { void visit(V v) override { // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` - const FunctionData* fun_ref = v->fun_maybe; + FunctionPtr fun_ref = v->fun_maybe; if (!fun_ref) { parent::visit(v); for (int i = 0; i < v->get_num_args(); ++i) { @@ -86,6 +86,8 @@ class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBod leftmost_obj = as_par->get_expr(); } else if (auto as_cast = leftmost_obj->try_as()) { leftmost_obj = as_cast->get_expr(); + } else if (auto as_nn = leftmost_obj->try_as()) { + leftmost_obj = as_nn->get_expr(); } else { break; } @@ -114,7 +116,7 @@ class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBod } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { return fun_ref->is_code_function() && !fun_ref->is_generic_function(); } }; diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index 2dae0d23..45246d6b 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -176,8 +176,8 @@ static void register_function(V v) { genericTs = construct_genericTs(v->genericsT_list); } if (v->is_builtin_function()) { - const Symbol* builtin_func = lookup_global_symbol(func_name); - const FunctionData* fun_ref = builtin_func ? builtin_func->as() : nullptr; + const Symbol* sym = lookup_global_symbol(func_name); + FunctionPtr fun_ref = sym ? sym->try_as() : nullptr; if (!fun_ref || !fun_ref->is_builtin_function()) { v->error("`builtin` used for non-builtin function"); } @@ -202,7 +202,7 @@ static void register_function(V v) { f_sym->method_id = static_cast(v->method_id->to_long()); } else if (v->flags & FunctionData::flagGetMethod) { f_sym->method_id = calculate_method_id_by_func_name(func_name); - for (const FunctionData* other : G.all_get_methods) { + for (FunctionPtr other : G.all_get_methods) { if (other->method_id == f_sym->method_id) { v->error(PSTRING() << "GET methods hash collision: `" << other->name << "` and `" << f_sym->name << "` produce the same hash. Consider renaming one of these functions."); } diff --git a/tolk/pipe-resolve-identifiers.cpp b/tolk/pipe-resolve-identifiers.cpp index 03b23c3c..5a735885 100644 --- a/tolk/pipe-resolve-identifiers.cpp +++ b/tolk/pipe-resolve-identifiers.cpp @@ -59,20 +59,20 @@ namespace tolk { GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_undefined_symbol(V v) { +static void fire_error_undefined_symbol(FunctionPtr cur_f, V v) { if (v->name == "self") { - v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + throw ParseError(cur_f, v->loc, "using `self` in a non-member function (it does not accept the first `self` parameter)"); } else { - v->error("undefined symbol `" + static_cast(v->name) + "`"); + throw ParseError(cur_f, v->loc, "undefined symbol `" + static_cast(v->name) + "`"); } } GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_unknown_type_name(SrcLocation loc, const std::string &text) { - throw ParseError(loc, "unknown type name `" + text + "`"); +static void fire_error_unknown_type_name(FunctionPtr cur_f, SrcLocation loc, const std::string &text) { + throw ParseError(cur_f, loc, "unknown type name `" + text + "`"); } -static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { +static void check_import_exists_when_using_sym(FunctionPtr cur_f, AnyV v_usage, const Symbol* used_sym) { SrcLocation sym_loc = used_sym->loc; if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { const SrcFile* declared_in = sym_loc.get_src_file(); @@ -83,7 +83,7 @@ static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_ } } if (!has_import) { - v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + throw ParseError(cur_f, v_usage->loc, "Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); } } } @@ -119,7 +119,7 @@ struct NameAndScopeResolver { return G.symtable.lookup(name); } - void add_local_var(const LocalVarData* v_sym) { + void add_local_var(LocalVarPtr v_sym) { if (UNLIKELY(scopes.empty())) { throw Fatal("unexpected scope_level = 0"); } @@ -137,40 +137,41 @@ struct NameAndScopeResolver { struct TypeDataResolver { GNU_ATTRIBUTE_NOINLINE - static TypePtr resolve_identifiers_in_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { - return type_data->replace_children_custom([genericTs](TypePtr child) { + static TypePtr resolve_identifiers_in_type_data(FunctionPtr cur_f, TypePtr type_data, const GenericsDeclaration* genericTs) { + return type_data->replace_children_custom([cur_f, genericTs](TypePtr child) { if (const TypeDataUnresolved* un = child->try_as()) { if (genericTs && genericTs->has_nameT(un->text)) { std::string nameT = un->text; return TypeDataGenericT::create(std::move(nameT)); } if (un->text == "auto") { - throw ParseError(un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed"); + throw ParseError(cur_f, un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed"); } if (un->text == "self") { - throw ParseError(un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); + throw ParseError(cur_f, un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); } - fire_error_unknown_type_name(un->loc, un->text); + fire_error_unknown_type_name(cur_f, un->loc, un->text); } return child; }); } }; -static TypePtr finalize_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { +static TypePtr finalize_type_data(FunctionPtr cur_f, TypePtr type_data, const GenericsDeclaration* genericTs) { if (!type_data || !type_data->has_unresolved_inside()) { return type_data; } - return TypeDataResolver::resolve_identifiers_in_type_data(type_data, genericTs); + return TypeDataResolver::resolve_identifiers_in_type_data(cur_f, type_data, genericTs); } class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { // more correctly this field shouldn't be static, but currently there is no need to make it a part of state static NameAndScopeResolver current_scope; - static const FunctionData* current_function; + static FunctionPtr cur_f; + static const GenericsDeclaration* current_genericTs; - static const LocalVarData* create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) { + static LocalVarPtr create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) { LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, declared_type, immutable * LocalVarData::flagImmutable, -1); current_scope.add_local_var(v_sym); return v_sym; @@ -178,7 +179,7 @@ class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { static void process_catch_variable(AnyExprV catch_var) { if (auto v_ref = catch_var->try_as()) { - const LocalVarData* var_ref = create_local_var_sym(v_ref->get_name(), catch_var->loc, nullptr, true); + LocalVarPtr var_ref = create_local_var_sym(v_ref->get_name(), catch_var->loc, nullptr, true); v_ref->mutate()->assign_sym(var_ref); } } @@ -188,16 +189,16 @@ protected: if (v->marked_as_redef) { const Symbol* sym = current_scope.lookup_symbol(v->get_name()); if (sym == nullptr) { - v->error("`redef` for unknown variable"); + throw ParseError(cur_f, v->loc, "`redef` for unknown variable"); } - const LocalVarData* var_ref = sym->try_as(); + LocalVarPtr var_ref = sym->try_as(); if (!var_ref) { - v->error("`redef` for unknown variable"); + throw ParseError(cur_f, v->loc, "`redef` for unknown variable"); } v->mutate()->assign_var_ref(var_ref); } else { - TypePtr declared_type = finalize_type_data(v->declared_type, current_function->genericTs); - const LocalVarData* var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable); + TypePtr declared_type = finalize_type_data(cur_f, v->declared_type, current_genericTs); + LocalVarPtr var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable); v->mutate()->assign_resolved_type(declared_type); v->mutate()->assign_var_ref(var_ref); } @@ -211,20 +212,20 @@ protected: void visit(V v) override { const Symbol* sym = current_scope.lookup_symbol(v->get_name()); if (!sym) { - fire_error_undefined_symbol(v->get_identifier()); + fire_error_undefined_symbol(cur_f, v->get_identifier()); } v->mutate()->assign_sym(sym); // for global functions, global vars and constants, `import` must exist - if (!sym->try_as()) { - check_import_exists_when_using_sym(v, sym); + if (!sym->try_as()) { + check_import_exists_when_using_sym(cur_f, v, sym); } // for `f` / `f`, resolve "MyAlias" and "T" // (for function call `f()`, this v (ast_reference `f`) is callee) if (auto v_instantiationTs = v->get_instantiationTs()) { for (int i = 0; i < v_instantiationTs->size(); ++i) { - TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + TypePtr substituted_type = finalize_type_data(cur_f, v_instantiationTs->get_item(i)->substituted_type, current_genericTs); v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); } } @@ -235,7 +236,7 @@ protected: // (for function call `t.tupleAt()`, this v (ast_dot_access `t.tupleAt`) is callee) if (auto v_instantiationTs = v->get_instantiationTs()) { for (int i = 0; i < v_instantiationTs->size(); ++i) { - TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + TypePtr substituted_type = finalize_type_data(cur_f, v_instantiationTs->get_item(i)->substituted_type, current_genericTs); v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); } } @@ -243,7 +244,7 @@ protected: } void visit(V v) override { - TypePtr cast_to_type = finalize_type_data(v->cast_to_type, current_function->genericTs); + TypePtr cast_to_type = finalize_type_data(cur_f, v->cast_to_type, current_genericTs); v->mutate()->assign_resolved_type(cast_to_type); parent::visit(v->get_expr()); } @@ -276,24 +277,25 @@ protected: } public: - bool should_visit_function(const FunctionData* fun_ref) override { + bool should_visit_function(FunctionPtr fun_ref) override { // this pipe is done just after parsing // visit both asm and code functions, resolve identifiers in parameter/return types everywhere // for generic functions, unresolved "T" will be replaced by TypeDataGenericT return true; } - void start_visiting_function(const FunctionData* fun_ref, V v) override { - current_function = fun_ref; + void start_visiting_function(FunctionPtr fun_ref, V v) override { + cur_f = fun_ref; + current_genericTs = fun_ref->genericTs; for (int i = 0; i < v->get_num_params(); ++i) { const LocalVarData& param_var = fun_ref->parameters[i]; - TypePtr declared_type = finalize_type_data(param_var.declared_type, fun_ref->genericTs); + TypePtr declared_type = finalize_type_data(cur_f, param_var.declared_type, fun_ref->genericTs); v->get_param(i)->mutate()->assign_param_ref(¶m_var); v->get_param(i)->mutate()->assign_resolved_type(declared_type); param_var.mutate()->assign_resolved_type(declared_type); } - TypePtr return_type = finalize_type_data(fun_ref->declared_return_type, fun_ref->genericTs); + TypePtr return_type = finalize_type_data(cur_f, fun_ref->declared_return_type, fun_ref->genericTs); v->mutate()->assign_resolved_type(return_type); fun_ref->mutate()->assign_resolved_type(return_type); @@ -308,12 +310,14 @@ public: tolk_assert(current_scope.scopes.empty()); } - current_function = nullptr; + current_genericTs = nullptr; + cur_f = nullptr; } }; NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; -const FunctionData* AssignSymInsideFunctionVisitor::current_function = nullptr; +FunctionPtr AssignSymInsideFunctionVisitor::cur_f = nullptr; +const GenericsDeclaration* AssignSymInsideFunctionVisitor::current_genericTs = nullptr; void pipeline_resolve_identifiers_and_assign_symbols() { AssignSymInsideFunctionVisitor visitor; @@ -324,20 +328,22 @@ void pipeline_resolve_identifiers_and_assign_symbols() { visitor.start_visiting_function(v_func->fun_ref, v_func); } else if (auto v_global = v->try_as()) { - TypePtr declared_type = finalize_type_data(v_global->var_ref->declared_type, nullptr); + TypePtr declared_type = finalize_type_data(nullptr, v_global->var_ref->declared_type, nullptr); v_global->mutate()->assign_resolved_type(declared_type); v_global->var_ref->mutate()->assign_resolved_type(declared_type); - } else if (auto v_const = v->try_as(); v_const && v_const->declared_type) { - TypePtr declared_type = finalize_type_data(v_const->const_ref->declared_type, nullptr); - v_const->mutate()->assign_resolved_type(declared_type); - v_const->const_ref->mutate()->assign_resolved_type(declared_type); + } else if (auto v_const = v->try_as()) { + if (v_const->declared_type) { + TypePtr declared_type = finalize_type_data(nullptr, v_const->const_ref->declared_type, nullptr); + v_const->mutate()->assign_resolved_type(declared_type); + v_const->const_ref->mutate()->assign_resolved_type(declared_type); + } } } } } -void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData* fun_ref) { +void pipeline_resolve_identifiers_and_assign_symbols(FunctionPtr fun_ref) { AssignSymInsideFunctionVisitor visitor; if (visitor.should_visit_function(fun_ref)) { visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); diff --git a/tolk/pipeline.h b/tolk/pipeline.h index 6aec2b5e..0a71d751 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -35,8 +35,8 @@ void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, con void pipeline_register_global_symbols(); void pipeline_resolve_identifiers_and_assign_symbols(); void pipeline_calculate_rvalue_lvalue(); -void pipeline_detect_unreachable_statements(); void pipeline_infer_types_and_calls_and_fields(); +void pipeline_check_inferred_types(); void pipeline_refine_lvalue_for_mutate_arguments(); void pipeline_check_rvalue_lvalue(); void pipeline_check_pure_impure_operations(); @@ -49,10 +49,10 @@ void pipeline_generate_fif_output_to_std_cout(); // these pipes also can be called per-function individually // they are called for instantiated generics functions, when `f` is deeply cloned as `f` -void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData*); -void pipeline_calculate_rvalue_lvalue(const FunctionData*); -void pipeline_detect_unreachable_statements(const FunctionData*); -void pipeline_infer_types_and_calls_and_fields(const FunctionData*); +void pipeline_resolve_identifiers_and_assign_symbols(FunctionPtr); +void pipeline_calculate_rvalue_lvalue(FunctionPtr); +void pipeline_detect_unreachable_statements(FunctionPtr); +void pipeline_infer_types_and_calls_and_fields(FunctionPtr); } // namespace tolk diff --git a/tolk/smart-casts-cfg.cpp b/tolk/smart-casts-cfg.cpp new file mode 100644 index 00000000..7b86f519 --- /dev/null +++ b/tolk/smart-casts-cfg.cpp @@ -0,0 +1,472 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "smart-casts-cfg.h" +#include "ast.h" +#include "tolk.h" + +/* + * This file represents internals of AST-level control flow and data flow analysis. + * Data flow is mostly used for smart casts and is calculated AT THE TIME of type inferring. + * Not before, not after, but simultaneously with type inferring, because any local variable can be smart cast, + * which affects other expressions/variables types, generics instantiation, return auto-infer, etc. + * Though it's a part of type inferring, it's extracted as a separate file to keep inferring a bit clearer. + * + * Control flow is represented NOT as a "graph with edges". Instead, it's a "structured DFS" for the AST: + * 1) at every point of inferring, we have "current flow facts" (FlowContext) + * 2) when we see an `if (...)`, we create two derived contexts (by cloning current) + * 3) after `if`, finalize them at the end and unify + * 4) if we detect unreachable code, we mark that path's context as "unreachable" + * In other words, we get the effect of a CFG but in a more direct approach. That's enough for AST-level data-flow. + * + * FlowContext contains "data-flow facts that are definitely known": variables types (original or refined), + * sign state (definitely positive, definitely zero, etc.), boolean state (definitely true, definitely false). + * Each local variable is contained there, and possibly sub-fields of tensors/objects if definitely known: + * // current facts: x is int?, t is (int, int) + * if (x != null && t.0 > 0) + * // current facts: x is int, t is (int, int), t.0 is positive + * else + * // current facts: x is null, t is (int, int), t.0 is not positive + * When branches rejoin, facts are merged back (int+null = int? and so on, here they would be equal to before if). + * Another example: + * // current facts: x is int? + * if (x == null) { + * // current facts: x is null + * x = 1; + * // current facts: x is int + * } // else branch is empty, its facts are: x is int + * // current facts (after rejoin): x is int + * + * Every expression analysis result (performed along with type inferring) returns ExprFlow: + * 1) out_flow: facts after evaluating the whole expression, no matter how it evaluates (true or false) + * 2) true_flow: the environment if expression is definitely true + * 3) false_flow: the environment if expression is definitely false + * + * Note, that globals are NOT analyzed (smart casts work for locals only). The explanation is simple: + * don't encourage to use a global twice, it costs gas, better assign it to a local. + * See SinkExpression. + * + * An important highlight about internal structure of tensors / tuples / objects and `t.1` sink expressions. + * When a tensor/object is assigned, its fields are NOT tracked individually. + * For better understanding, I'll give some examples in TypeScript (having the same behavior): + * interface User { id: number | string, ... } + * var u: User = { id: 123, ... } + * u.id // it's number|string, not number + * u = { id: 'asdf', ... } + * u.id // it's number|string, not string + * if (typeof u.id === 'string') { + * // here `u.id` is string (smart cast) + * } + * u.id = 123; + * u.id // now it's number (smart cast) (until `u.id` or `u` are reassigned) + * // but `u` still has type `{ id: number | string, ... }`, not `{ id: number, ... }`; only `u.id` is refined + * The same example, but with nullable tensor in Tolk: + * var t: (int?, ...) = (123, ...) + * t.0 // it's int?, not int + * t = (null, ...) + * t.0 // it's int?, not null + * if (t.0 == null) { + * // here `t.0` is null (smart cast) + * } + * t.0 = 123; + * t.0 // now it's int (smart cast) (until `t.0` or `t` are reassigned) + * // but `t` still has type `(int?, ...)`, not `(int, ...)`; only `t.0` is refined + * + * In the future, not only smart casts, but other data-flow analysis can be implemented. + * 1) detect signs: `if (x > 0) { ... if (x < 0)` to warn always false + * 2) detect always true/false: `if (x) { return; } ... if (!x)` to warn always true + * These potential improvements are SignState and BoolState. Now they are NOT IMPLEMENTED, though declared. + * Their purpose is to show, that data flow is not only about smart casts, but eventually for other facts also. + * (though it's not obvious whether they should be analyzed at AST level or at IR level, like constants now) + */ + +namespace tolk { + +std::string SinkExpression::to_string() const { + std::string result = var_ref->name; + uint64_t cur_path = index_path; + while (cur_path != 0) { + result += "."; + result += std::to_string((cur_path & 0xFF) - 1); + cur_path >>= 8; + } + return result; +} + +static std::string to_string(SignState s) { + static const char* txt[6 + 1] = {"sign=unknown", ">0", "<0", "=0", ">=0", "<=0", "sign=never"}; + return txt[static_cast(s)]; +} + +static std::string to_string(BoolState s) { + static const char* txt[4 + 1] = {"unknown", "always_true", "always_false", "bool=never"}; + return txt[static_cast(s)]; +} + +// from `expr!` get `expr` +static AnyExprV unwrap_not_null_operator(AnyExprV expr) { + while (auto v_not_null = expr->try_as()) { + expr = v_not_null->get_expr(); + } + return expr; +} + +// "type lca" for a and b is T, so that both are assignable to T +// it's used +// 1) for auto-infer return type of the function if not specified +// example: `fun f(x: int?) { ... return 1; ... return x; }`; lca(`int`,`int?`) = `int?` +// 2) for auto-infer type of ternary and `match` expressions +// example: `cond ? beginCell() : null`; lca(`builder`,`null`) = `builder?` +// 3) when two data flows rejoin +// example: `if (tensorVar != null) ... else ...` rejoin `(int,int)` and `null` into `(int,int)?` +// when lca can't be calculated (example: `(int,int)` and `(int,int,int)`), nullptr is returned +static TypePtr calculate_type_lca(TypePtr a, TypePtr b) { + if (a == b) { + return a; + } + if (a == TypeDataNever::create()) { + return b; + } + if (b == TypeDataNever::create()) { + return a; + } + + if (a->can_rhs_be_assigned(b)) { + return a; + } + if (b->can_rhs_be_assigned(a)) { + return b; + } + + if (a == TypeDataUnknown::create() || b == TypeDataUnknown::create()) { + return TypeDataUnknown::create(); + } + + if (a == TypeDataNullLiteral::create()) { + return TypeDataNullable::create(b); + } + if (b == TypeDataNullLiteral::create()) { + return TypeDataNullable::create(a); + } + + const auto* tensor1 = a->try_as(); + const auto* tensor2 = b->try_as(); + if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { + std::vector types_lca; + types_lca.reserve(tensor1->size()); + for (int i = 0; i < tensor1->size(); ++i) { + TypePtr next = calculate_type_lca(tensor1->items[i], tensor2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTensor::create(std::move(types_lca)); + } + + const auto* tuple1 = a->try_as(); + const auto* tuple2 = b->try_as(); + if (tuple1 && tuple2 && tuple1->size() == tuple2->size()) { + std::vector types_lca; + types_lca.reserve(tuple1->size()); + for (int i = 0; i < tuple1->size(); ++i) { + TypePtr next = calculate_type_lca(tuple1->items[i], tuple2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTypedTuple::create(std::move(types_lca)); + } + + return nullptr; +} + +// merge (unify) of two sign states: what sign do we definitely have +// it's used on data flow rejoin +// example: `if (x > 0) ... else ...`; lca(Positive, NonPositive) = Unknown +SignState calculate_sign_lca(SignState a, SignState b) { + using s = SignState; + // a transformation lookup table, using the following rules: + // 1) if one is Unknown, the result is Unknown ("no definite constraints") + // 2) if one is Never (can't happen), the result is the other + // example: x is known > 0 already, given code `if (x > 0) {} else {}` merges Positive (always true) and Never + // 3) handle all other combinations carefully + static constexpr SignState transformations[7][7] = { + // b= Unknown | Positive | Negative | Zero | NonNegative | NonPositive | Never | + /* a=Unknown */ {s::Unknown, s::Unknown, s::Unknown, s::Unknown, s::Unknown, s::Unknown, s::Unknown }, + /* a=Positive */ {s::Unknown, s::Positive, s::Unknown, s::NonNegative, s::NonNegative, s::Unknown, s::Positive }, + /* a=Negative */ {s::Unknown, s::Unknown, s::Negative, s::NonPositive, s::Unknown, s::NonPositive, s::Negative }, + /* a=Zero */ {s::Unknown, s::NonNegative, s::NonPositive, s::Zero, s::NonNegative, s::NonPositive, s::Zero }, + /* a=NonNegative */ {s::Unknown, s::NonNegative, s::Unknown, s::NonNegative, s::NonNegative, s::Unknown, s::NonNegative}, + /* a=NonPositive */ {s::Unknown, s::Unknown, s::NonPositive, s::NonPositive, s::Unknown, s::NonPositive, s::NonPositive}, + /* a=Never */ {s::Unknown, s::Positive, s::Negative, s::Zero, s::NonNegative, s::NonPositive, s::Never } + }; + + return transformations[static_cast(a)][static_cast(b)]; +} + +// merge (unify) two bool state: what state do we definitely have +// it's used on data flow rejoin +// example: `if (x) ... else ...`; lca(AlwaysTrue, AlwaysFalse) = Unknown +BoolState calculate_bool_lca(BoolState a, BoolState b) { + using s = BoolState; + static constexpr BoolState transformations[4][4] = { + // b= Unknown | AlwaysTrue | AlwaysFalse | Never | + /* a=Unknown */ {s::Unknown, s::Unknown, s::Unknown, s::Unknown }, + /* a=AlwaysTrue */ {s::Unknown, s::AlwaysTrue, s::Unknown, s::AlwaysTrue }, + /* a=AlwaysFalse */ {s::Unknown, s::Unknown, s::AlwaysFalse, s::AlwaysFalse}, + /* a=Never */ {s::Unknown, s::AlwaysTrue, s::AlwaysFalse, s::Never } + }; + + return transformations[static_cast(a)][static_cast(b)]; +} + +// see comments above TypeInferringUnifyStrategy +// this function calculates lca or currently stored result and next +bool TypeInferringUnifyStrategy::unify_with(TypePtr next) { + if (unified_result == nullptr) { + unified_result = next; + return true; + } + if (unified_result == next) { + return true; + } + + TypePtr combined = calculate_type_lca(unified_result, next); + if (!combined) { + return false; + } + + unified_result = combined; + return true; +} + +bool TypeInferringUnifyStrategy::unify_with_implicit_return_void() { + if (unified_result == nullptr) { + unified_result = TypeDataVoid::create(); + return true; + } + + return unified_result == TypeDataVoid::create(); +} + +// invalidate knowledge about sub-fields of a variable or its field +// example: `tensorVar = 2`, invalidate facts about `tensorVar`, `tensorVar.0`, `tensorVar.1.2`, and all others +// example: `user.id = rhs`, invalidate facts about `user.id` (sign, etc.) and `user.id.*` if exist +void FlowContext::invalidate_all_subfields(LocalVarPtr var_ref, uint64_t parent_path, uint64_t parent_mask) { + for (auto it = known_facts.begin(); it != known_facts.end();) { + bool is_self_or_field = it->first.var_ref == var_ref && (it->first.index_path & parent_mask) == parent_path; + if (is_self_or_field) { + it = known_facts.erase(it); + } else { + ++it; + } + } +} + +// update current type of `local_var` / `tensorVar.0` / `obj.field` +// example: `local_var = rhs` +// example: `f(mutate obj.field)` +// example: `if (t.0 != null)`, in true_flow `t.0` assigned to "not-null of current", in false_flow to null +void FlowContext::register_known_type(SinkExpression s_expr, TypePtr assigned_type) { + // having index_path = (some bytes filled in the end), + // calc index_mask: replace every filled byte with 0xFF + // example: `t.0.1`, index_path = (1<<8) + 2, index_mask = 0xFFFF + uint64_t index_path = s_expr.index_path; + uint64_t index_mask = 0; + while (index_path > 0) { + index_mask = index_mask << 8 | 0xFF; + index_path >>= 8; + } + invalidate_all_subfields(s_expr.var_ref, s_expr.index_path, index_mask); + + // if just `int` assigned, we have no considerations about its sign + // so, even if something existed by the key s_expr, drop all knowledge + known_facts[s_expr] = FactsAboutExpr(assigned_type, SignState::Unknown, BoolState::Unknown); +} + +// mark control flow unreachable / interrupted +void FlowContext::mark_unreachable(UnreachableKind reason) { + unreachable = true; + // currently we don't save why control flow became unreachable (it's not obvious how, there may be consequent reasons), + // but it helps debugging and reading outer code + static_cast(reason); +} + + +// "merge" two data-flow contexts occurs on control flow rejoins (if/else branches merging, for example) +// it's generating a new context that describes "knowledge that definitely outcomes from these two" +// example: in one branch x is `int`, in x is `null`, result is `int?` unless any of them is unreachable +FlowContext FlowContext::merge_flow(FlowContext&& c1, FlowContext&& c2) { + if (!c1.unreachable && c2.unreachable) { + return merge_flow(std::move(c2), std::move(c1)); + } + + std::map unified; + + if (c1.unreachable && !c2.unreachable) { + // `if (...) return; else ...;` — copy facts about common variables only from else (c2) + for (const auto& [s_expr, i2] : c2.known_facts) { + auto it1 = c1.known_facts.find(s_expr); + bool need_add = it1 != c1.known_facts.end() || s_expr.index_path != 0; + if (need_add) { + unified.emplace(s_expr, i2); + } + } + + } else { + // either both reachable, or both not — merge types and restrictions of common variables and fields + for (const auto& [s_expr, i1] : c1.known_facts) { + if (auto it2 = c2.known_facts.find(s_expr); it2 != c2.known_facts.end()) { + const FactsAboutExpr& i2 = it2->second; + unified.emplace(s_expr, i1 == i2 ? i1 : FactsAboutExpr( + calculate_type_lca(i1.expr_type, i2.expr_type), + calculate_sign_lca(i1.sign_state, i2.sign_state), + calculate_bool_lca(i1.bool_state, i2.bool_state) + )); + } + } + } + + return FlowContext(std::move(unified), c1.unreachable && c2.unreachable); +} + +// return `T`, so that `T?` = type +// what for: `if (x != null)`, to smart cast x inside if +TypePtr calculate_type_subtract_null(TypePtr type) { + if (const auto* as_nullable = type->try_as()) { + return as_nullable->inner; + } + // union types will be handled here + return TypeDataNever::create(); +} + +// given any expression vertex, extract SinkExpression is possible +// example: `x.0` is { var_ref: x, index_path: 1 } +// example: `x.1` is { var_ref: x, index_path: 2 } +// example: `x!.1` is the same +// example: `x.1.2` is { var_ref: x, index_path: 2<<8 + 3 } +// example: `x!.1!.2` is the same +// not SinkExpressions: `globalVar` / `f()` / `obj.method().1` +SinkExpression extract_sink_expression_from_vertex(AnyExprV v) { + if (auto as_ref = v->try_as()) { + if (LocalVarPtr var_ref = as_ref->sym->try_as()) { + return SinkExpression(var_ref); + } + } + + if (auto as_dot = v->try_as(); as_dot && as_dot->is_target_indexed_access()) { + V cur_dot = as_dot; + uint64_t index_path = 0; + while (cur_dot->is_target_indexed_access()) { + int index_at = std::get(cur_dot->target); + index_path = (index_path << 8) + index_at + 1; + if (auto parent_dot = unwrap_not_null_operator(cur_dot->get_obj())->try_as()) { + cur_dot = parent_dot; + } else { + break; + } + } + if (auto as_ref = unwrap_not_null_operator(cur_dot->get_obj())->try_as()) { + if (LocalVarPtr var_ref = as_ref->sym->try_as()) { + return SinkExpression(var_ref, index_path); + } + } + } + + if (auto as_par = v->try_as()) { + return extract_sink_expression_from_vertex(as_par->get_expr()); + } + + if (auto as_assign = v->try_as()) { + return extract_sink_expression_from_vertex(as_assign->get_lhs()); + } + + return {}; +} + +// given `lhs = rhs`, calculate "original" type of `lhs` +// example: `var x: int? = ...; if (x != null) { x (here) = null; }` +// "(here)" x is `int` (smart cast), but originally declared as `int?` +// example: `if (x is (int,int)?) { x!.0 = rhs }`, here `x!.0` is `int` +TypePtr calc_declared_type_before_smart_cast(AnyExprV v) { + if (auto as_ref = v->try_as()) { + if (LocalVarPtr var_ref = as_ref->sym->try_as()) { + return var_ref->declared_type; + } + } + + if (auto as_dot = v->try_as(); as_dot && as_dot->is_target_indexed_access()) { + TypePtr obj_type = as_dot->get_obj()->inferred_type; // v already inferred; hence, index_at is correct + int index_at = std::get(as_dot->target); + if (const auto* t_tensor = obj_type->try_as()) { + return t_tensor->items[index_at]; + } + if (const auto* t_tuple = obj_type->try_as()) { + return t_tuple->items[index_at]; + } + } + + return v->inferred_type; +} + +// given `lhs = rhs` (and `var x = rhs`), calculate probable smart cast for lhs +// it's NOT directly type of rhs! see comment at the top of the file about internal structure of tensors/tuples. +// obvious example: `var x: int? = 5`, it's `int` (most cases are like this) +// obvious example: `var x: (int,int)? = null`, it's `null` (`x == null` is always true, `x` can be passed to any `T?`) +// not obvious example: `var x: (int?, int?)? = (3,null)`, result is `(int?,int?)`, whereas type of rhs is `(int,null)` +TypePtr calc_smart_cast_type_on_assignment(TypePtr lhs_declared_type, TypePtr rhs_inferred_type) { + // assign `T` to `T?` (or at least "assignable-to-T" to "T?") + // smart cast to `T` + if (const auto* lhs_nullable = lhs_declared_type->try_as()) { + if (lhs_nullable->inner->can_rhs_be_assigned(rhs_inferred_type)) { + return lhs_nullable->inner; + } + } + + // assign `null` to `T?` + // smart cast to `null` + if (lhs_declared_type->try_as() && rhs_inferred_type == TypeDataNullLiteral::create()) { + return TypeDataNullLiteral::create(); + } + + // no smart cast, type is the same as declared + // example: `var x: (int?,slice?) = (1, null)`, it's `(int?,slice?)`, not `(int,null)` + return lhs_declared_type; +} + + +std::ostream& operator<<(std::ostream& os, const FlowContext& flow) { + os << "(" << flow.known_facts.size() << " facts) " << (flow.unreachable ? "(unreachable) " : ""); + for (const auto& [s_expr, facts] : flow.known_facts) { + os << ", " << s_expr.to_string() << ": " << facts; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const FactsAboutExpr& facts) { + os << facts.expr_type; + if (facts.sign_state != SignState::Unknown) { + os << " " << to_string(facts.sign_state); + } + if (facts.bool_state != BoolState::Unknown) { + os << " " << to_string(facts.bool_state); + } + return os; +} + +} // namespace tolk diff --git a/tolk/smart-casts-cfg.h b/tolk/smart-casts-cfg.h new file mode 100644 index 00000000..b97c8864 --- /dev/null +++ b/tolk/smart-casts-cfg.h @@ -0,0 +1,208 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "fwd-declarations.h" +#include "type-system.h" +#include +#include + +namespace tolk { + +/* + * TypeInferringUnifyStrategy unifies types from various branches to a common result (lca). + * It's used to auto infer function return type based on return statements, like in TypeScript. + * Example: `fun f() { ... return 1; ... return null; }` inferred as `int?`. + * + * Besides function returns, it's also used for ternary `return cond ? 1 : null` and `match` expression. + * If types can't be unified (a function returns int and cell, for example), `unify()` returns false, handled outside. + * BTW, don't confuse this way of inferring with Hindley-Milner, they have nothing in common. + */ +class TypeInferringUnifyStrategy { + TypePtr unified_result = nullptr; + +public: + bool unify_with(TypePtr next); + bool unify_with_implicit_return_void(); + + TypePtr get_result() const { return unified_result; } +}; + +/* + * SinkExpression is an expression that can be smart cast like `if (x != null)` (x is int inside) + * or analyzed by data flow is some other way like `if (x > 0) ... else ...` (x <= 0 inside else). + * In other words, it "absorbs" data flow facts. + * Examples: `localVar`, `localTensor.1`, `localTuple.1.2.3`, `localObj.field` + * These are NOT sink expressions: `globalVar`, `f()`, `f().1` + * Note, that globals are NOT sink: don't encourage to use a global twice, it costs gas, better assign it to a local. + */ +struct SinkExpression { + LocalVarPtr const var_ref; // smart casts and data flow applies only to locals + const uint64_t index_path; // 0 for just `v`; for `v.N` it's (N+1), for `v.N.M` it's (N+1) + (M+1)<<8, etc. + + SinkExpression() + : var_ref(nullptr), index_path(0) {} + explicit SinkExpression(LocalVarPtr var_ref) + : var_ref(var_ref), index_path(0) {} + explicit SinkExpression(LocalVarPtr var_ref, uint64_t index_path) + : var_ref(var_ref), index_path(index_path) {} + + SinkExpression(const SinkExpression&) = default; + SinkExpression& operator=(const SinkExpression&) = delete; + + bool operator==(const SinkExpression& rhs) const { return var_ref == rhs.var_ref && index_path == rhs.index_path; } + bool operator<(const SinkExpression& rhs) const { return var_ref == rhs.var_ref ? index_path < rhs.index_path : var_ref < rhs.var_ref; } + explicit operator bool() const { return var_ref != nullptr; } + + std::string to_string() const; +}; + +// UnreachableKind is a reason of why control flow is unreachable or interrupted +// example: `return;` interrupts control flow +// example: `if (true) ... else ...` inside "else" flow is unreachable because it can't happen +enum class UnreachableKind { + Unknown, // no definite info or not unreachable + CantHappen, + ThrowStatement, + ReturnStatement, + CallNeverReturnFunction, +}; + +// SignState is "definitely positive", etc. +// example: inside `if (x > 0)`, x is Positive, in `else` it's NonPositive (if x is local, until reassigned) +enum class SignState { + Unknown, // no definite info + Positive, + Negative, + Zero, + NonNegative, + NonPositive, + Never // can't happen, like "never" type +}; + +// BoolState is "definitely true" or "definitely false" +// example: inside `if (x)`, x is AlwaysTrue, in `else` it's AlwaysFalse +enum class BoolState { + Unknown, // no definite info + AlwaysTrue, + AlwaysFalse, + Never // can't happen, like "never" type +}; + +// FactsAboutExpr represents "everything known about SinkExpression at a given execution point" +// example: after `var x = getNullableInt()`, x is `int?`, sign/bool is Unknown +// example: after `x = 2;`, x is `int`, sign is Positive, bool is AlwaysTrue +// example: inside `if (x != null && x > 0)`, x is `int`, sign is Positive (in else, no definite knowledge) +// remember, that indices/fields are also expressions, `t.1 = 2` or `u.id = 2` also store such facts +// WARNING! Detecting data-flow facts about sign state and bool state is NOT IMPLEMENTED +// (e.g. `if (x > 0)` / `if (!t.1)` is NOT analysed, therefore not updated, always Unknown now) +// it's a potential improvement for the future, for example `if (x > 0) { ... if (x < 0)` to warn always false +// their purpose for now is to show, that data flow is not only about smart casts, but eventually for other facts also +struct FactsAboutExpr { + TypePtr expr_type; // originally declared type or smart cast (Unknown if no info) + SignState sign_state; // definitely positive, etc. (Unknown if no info) + BoolState bool_state; // definitely true/false (Unknown if no info) + + FactsAboutExpr() + : expr_type(nullptr), sign_state(SignState::Unknown), bool_state(BoolState::Unknown) {} + FactsAboutExpr(TypePtr smart_cast_type, SignState sign_state, BoolState bool_state) + : expr_type(smart_cast_type), sign_state(sign_state), bool_state(bool_state) {} + + bool operator==(const FactsAboutExpr& rhs) const = default; +}; + +// FlowContext represents "everything known about control flow at a given execution point" +// while traversing AST, each statement node gets "in" FlowContext (prior knowledge) +// and returns "output" FlowContext (representing a state AFTER execution of a statement) +// on branching, like if/else, input context is cloned, two contexts for each branch calculated, and merged to a result +class FlowContext { + // std::map, not std::unordered_map, because LLDB visualises it better, for debugging + std::map known_facts; // all local vars plus (optionally) indices/fields of tensors/tuples/objects + bool unreachable = false; // if execution can't reach this point (after `return`, for example) + + FlowContext(std::map&& known_facts, bool unreachable) + : known_facts(std::move(known_facts)), unreachable(unreachable) {} + + void invalidate_all_subfields(LocalVarPtr var_ref, uint64_t parent_path, uint64_t parent_mask); + + friend std::ostream& operator<<(std::ostream& os, const FlowContext& flow); + +public: + FlowContext() = default; + FlowContext(FlowContext&&) noexcept = default; + FlowContext(const FlowContext&) = delete; + FlowContext& operator=(FlowContext&&) = default; + FlowContext& operator=(const FlowContext&) = delete; + + FlowContext clone() const { + std::map copy = known_facts; + return FlowContext(std::move(copy), unreachable); + } + + bool is_unreachable() const { return unreachable; } + + TypePtr smart_cast_if_exists(SinkExpression s_expr) const { + auto it = known_facts.find(s_expr); + return it == known_facts.end() ? nullptr : it->second.expr_type; + } + + void register_known_type(SinkExpression s_expr, TypePtr assigned_type); + void mark_unreachable(UnreachableKind reason); + + static FlowContext merge_flow(FlowContext&& c1, FlowContext&& c2); +}; + +struct ExprFlow { + FlowContext out_flow; + + // only calculated inside `if`, left of `&&`, etc. — there this expression is immediate condition, empty otherwise + FlowContext true_flow; + FlowContext false_flow; + + ExprFlow(FlowContext&& out_flow, FlowContext&& true_flow, FlowContext&& false_flow) + : out_flow(std::move(out_flow)) + , true_flow(std::move(true_flow)) + , false_flow(std::move(false_flow)) {} + ExprFlow(FlowContext&& out_flow, const bool clone_flow_for_condition) + : out_flow(std::move(out_flow)) { + if (clone_flow_for_condition) { + true_flow = this->out_flow.clone(); + false_flow = this->out_flow.clone(); + } + } + + ExprFlow(ExprFlow&&) noexcept = default; + ExprFlow(const ExprFlow&) = delete; + ExprFlow& operator=(ExprFlow&&) = delete; + ExprFlow& operator=(const ExprFlow&) = delete; + + int get_always_true_false_state() const { + if (true_flow.is_unreachable() != false_flow.is_unreachable()) { + return false_flow.is_unreachable() ? 1 : 2; // 1 is "always true" + } + return 0; + } +}; + +std::ostream& operator<<(std::ostream& os, const FactsAboutExpr& facts); +std::ostream& operator<<(std::ostream& os, const FlowContext& flow); +TypePtr calculate_type_subtract_null(TypePtr type); +SinkExpression extract_sink_expression_from_vertex(AnyExprV v); +TypePtr calc_declared_type_before_smart_cast(AnyExprV v); +TypePtr calc_smart_cast_type_on_assignment(TypePtr lhs_declared_type, TypePtr rhs_inferred_type); + +} // namespace tolk diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index 52ac3821..1286c1f9 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -18,6 +18,7 @@ #include "compiler-state.h" #include #include +#include namespace tolk { @@ -146,9 +147,10 @@ void SrcLocation::show_context(std::ostream& os) const { return; } SrcFile::SrcPosition pos = src_file->convert_offset(char_offset); - os << " " << pos.line_str << "\n"; + os << std::right << std::setw(4) << pos.line_no << " | "; + os << pos.line_str << "\n"; - os << " "; + os << " " << " | "; for (int i = 1; i < pos.char_no; ++i) { os << ' '; } @@ -193,8 +195,11 @@ std::ostream& operator<<(std::ostream& os, const ParseError& error) { } void ParseError::show(std::ostream& os) const { - os << where << ": error: " << message << std::endl; - where.show_context(os); + os << loc << ": error: " << message << std::endl; + if (current_function) { + os << " // in function `" << current_function->as_human_readable() << "`" << std::endl; + } + loc.show_context(os); } } // namespace tolk diff --git a/tolk/src-file.h b/tolk/src-file.h index 0c82bf18..b0f9cba3 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -124,10 +124,14 @@ struct Fatal final : std::exception { std::ostream& operator<<(std::ostream& os, const Fatal& fatal); struct ParseError : std::exception { - SrcLocation where; + FunctionPtr current_function; + SrcLocation loc; std::string message; - ParseError(SrcLocation _where, std::string _msg) : where(_where), message(std::move(_msg)) { - } + + ParseError(SrcLocation loc, std::string message) + : current_function(nullptr), loc(loc), message(std::move(message)) {} + ParseError(FunctionPtr current_function, SrcLocation loc, std::string message) + : current_function(current_function), loc(loc), message(std::move(message)) {} const char* what() const noexcept override { return message.c_str(); diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index c56dc6ed..51dc3440 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -102,9 +102,6 @@ void LocalVarData::assign_resolved_type(TypePtr declared_type) { } void LocalVarData::assign_inferred_type(TypePtr inferred_type) { -#ifdef TOLK_DEBUG - assert(this->declared_type == nullptr); // called when type declaration omitted, inferred from assigned value -#endif this->declared_type = inferred_type; } @@ -120,7 +117,7 @@ static void fire_error_redefinition_of_symbol(SrcLocation loc, const Symbol* pre throw ParseError(loc, "redefinition of built-in symbol"); } -void GlobalSymbolTable::add_function(const FunctionData* f_sym) { +void GlobalSymbolTable::add_function(FunctionPtr f_sym) { auto key = key_hash(f_sym->name); auto [it, inserted] = entries.emplace(key, f_sym); if (!inserted) { @@ -128,7 +125,7 @@ void GlobalSymbolTable::add_function(const FunctionData* f_sym) { } } -void GlobalSymbolTable::add_global_var(const GlobalVarData* g_sym) { +void GlobalSymbolTable::add_global_var(GlobalVarPtr g_sym) { auto key = key_hash(g_sym->name); auto [it, inserted] = entries.emplace(key, g_sym); if (!inserted) { @@ -136,7 +133,7 @@ void GlobalSymbolTable::add_global_var(const GlobalVarData* g_sym) { } } -void GlobalSymbolTable::add_global_const(const GlobalConstData* c_sym) { +void GlobalSymbolTable::add_global_const(GlobalConstPtr c_sym) { auto key = key_hash(c_sym->name); auto [it, inserted] = entries.emplace(key, c_sym); if (!inserted) { diff --git a/tolk/symtable.h b/tolk/symtable.h index 27753ceb..9419afce 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -37,17 +37,12 @@ struct Symbol { virtual ~Symbol() = default; - template - const T* as() const { + template + ConstTPtr try_as() const { #ifdef TOLK_DEBUG - assert(dynamic_cast(this) != nullptr); + assert(this != nullptr); #endif - return dynamic_cast(this); - } - - template - const T* try_as() const { - return dynamic_cast(this); + return dynamic_cast(this); } }; @@ -229,9 +224,9 @@ class GlobalSymbolTable { } public: - void add_function(const FunctionData* f_sym); - void add_global_var(const GlobalVarData* g_sym); - void add_global_const(const GlobalConstData* c_sym); + void add_function(FunctionPtr f_sym); + void add_global_var(GlobalVarPtr g_sym); + void add_global_const(GlobalConstPtr c_sym); const Symbol* lookup(std::string_view name) const { const auto it = entries.find(key_hash(name)); diff --git a/tolk/tolk-version.h b/tolk/tolk-version.h index 84326012..bbea63ff 100644 --- a/tolk/tolk-version.h +++ b/tolk/tolk-version.h @@ -18,6 +18,6 @@ namespace tolk { -constexpr const char* TOLK_VERSION = "0.8.0"; +constexpr const char* TOLK_VERSION = "0.9.0"; } // namespace tolk diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index cc867c52..71d1969d 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -58,8 +58,8 @@ int tolk_proceed(const std::string &entrypoint_filename) { pipeline_register_global_symbols(); pipeline_resolve_identifiers_and_assign_symbols(); pipeline_calculate_rvalue_lvalue(); - pipeline_detect_unreachable_statements(); pipeline_infer_types_and_calls_and_fields(); + pipeline_check_inferred_types(); pipeline_refine_lvalue_for_mutate_arguments(); pipeline_check_rvalue_lvalue(); pipeline_check_pure_impure_operations(); diff --git a/tolk/tolk.h b/tolk/tolk.h index 4086d7f7..3f00d0d4 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -45,7 +45,7 @@ typedef int const_idx_t; struct TmpVar { var_idx_t ir_idx; // every var in IR represents 1 stack slot - TypePtr v_type; // calc_width_on_stack() is 1 + TypePtr v_type; // get_width_on_stack() is 1 std::string name; // "x" for vars originated from user sources; "x.0" for tensor components; empty for implicitly created tmp vars SrcLocation loc; // location of var declaration in sources or where a tmp var was originated #ifdef TOLK_DEBUG @@ -205,7 +205,6 @@ struct VarDescrList { std::size_t count_used(const std::vector idx_list) const; VarDescr& add(var_idx_t idx); VarDescr& add_newval(var_idx_t idx); - VarDescrList& operator&=(const VarDescrList& values); VarDescrList& import_values(const VarDescrList& values); VarDescrList operator|(const VarDescrList& y) const; VarDescrList& operator|=(const VarDescrList& values); @@ -283,8 +282,8 @@ struct Op { enum { _Disabled = 1, _NoReturn = 4, _Impure = 24 }; int flags; std::unique_ptr next; - const FunctionData* f_sym = nullptr; - const GlobalVarData* g_sym = nullptr; + FunctionPtr f_sym = nullptr; + GlobalVarPtr g_sym = nullptr; SrcLocation where; VarDescrList var_info; std::vector args; @@ -313,19 +312,19 @@ struct Op { : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(std::move(_left)), right(std::move(_right)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, - const FunctionData* _fun) + FunctionPtr _fun) : cl(_cl), flags(0), f_sym(_fun), where(_where), left(_left), right(_right) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, - const FunctionData* _fun) + FunctionPtr _fun) : cl(_cl), flags(0), f_sym(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, - const GlobalVarData* _gvar) + GlobalVarPtr _gvar) : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(_left), right(_right) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, - const GlobalVarData* _gvar) + GlobalVarPtr _gvar) : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(std::move(_left)), right(std::move(_right)) { } @@ -575,6 +574,7 @@ struct AsmOpList { const std::vector* var_names_{nullptr}; std::vector constants_; bool retalt_{false}; + bool retalt_inserted_{false}; void out(std::ostream& os, int mode = 0) const; AsmOpList(int indent = 0, const std::vector* var_names = nullptr) : indent_(indent), var_names_(var_names) { } @@ -1030,7 +1030,7 @@ struct Stack { } void apply_wrappers(int callxargs_count) { bool is_inline = mode & _InlineFunc; - if (o.retalt_) { + if (o.retalt_inserted_) { o.insert(0, "SAMEALTSAVE"); o.insert(0, "c2 SAVE"); } @@ -1083,7 +1083,7 @@ struct FunctionBodyAsm { struct CodeBlob { int var_cnt, in_var_cnt; - const FunctionData* fun_ref; + FunctionPtr fun_ref; std::string name; SrcLocation loc; std::vector vars; @@ -1094,7 +1094,7 @@ struct CodeBlob { #endif std::stack*> cur_ops_stack; bool require_callxargs = false; - CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref) + CodeBlob(std::string name, SrcLocation loc, FunctionPtr fun_ref) : var_cnt(0), in_var_cnt(0), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) { } template diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp index c7122e10..d73625c2 100644 --- a/tolk/type-system.cpp +++ b/tolk/type-system.cpp @@ -84,6 +84,7 @@ TypePtr TypeDataTuple::singleton; TypePtr TypeDataContinuation::singleton; TypePtr TypeDataNullLiteral::singleton; TypePtr TypeDataUnknown::singleton; +TypePtr TypeDataNever::singleton; TypePtr TypeDataVoid::singleton; void type_system_init() { @@ -96,6 +97,7 @@ void type_system_init() { TypeDataContinuation::singleton = new TypeDataContinuation; TypeDataNullLiteral::singleton = new TypeDataNullLiteral; TypeDataUnknown::singleton = new TypeDataUnknown; + TypeDataNever::singleton = new TypeDataNever; TypeDataVoid::singleton = new TypeDataVoid; } @@ -108,6 +110,19 @@ void type_system_init() { // and creates an object only if it isn't found in a global hashtable // +TypePtr TypeDataNullable::create(TypePtr inner) { + TypeDataTypeIdCalculation hash(1774084920039440885ULL); + hash.feed_child(inner); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + // most types (int?, slice?, etc.), when nullable, still occupy 1 stack slot (holding TVM NULL at runtime) + // but for example for `(int, int)` we need an extra stack slot "null flag" + int width_on_stack = inner->can_hold_tvm_null_instead() ? 1 : inner->get_width_on_stack() + 1; + return hash.register_unique(new TypeDataNullable(hash.type_id(), hash.children_flags(), width_on_stack, inner)); +} + TypePtr TypeDataFunCallable::create(std::vector&& params_types, TypePtr return_type) { TypeDataTypeIdCalculation hash(3184039965511020991ULL); for (TypePtr param : params_types) { @@ -143,7 +158,11 @@ TypePtr TypeDataTensor::create(std::vector&& items) { if (TypePtr existing = hash.get_existing()) { return existing; } - return hash.register_unique(new TypeDataTensor(hash.type_id(), hash.children_flags(), std::move(items))); + int width_on_stack = 0; + for (TypePtr item : items) { + width_on_stack += item->get_width_on_stack(); + } + return hash.register_unique(new TypeDataTensor(hash.type_id(), hash.children_flags(), width_on_stack, std::move(items))); } TypePtr TypeDataTypedTuple::create(std::vector&& items) { @@ -178,6 +197,12 @@ TypePtr TypeDataUnresolved::create(std::string&& text, SrcLocation loc) { // only non-trivial implementations are here; trivial are defined in .h file // +std::string TypeDataNullable::as_human_readable() const { + std::string nested = inner->as_human_readable(); + bool embrace = inner->try_as(); + return embrace ? "(" + nested + ")?" : nested + "?"; +} + std::string TypeDataFunCallable::as_human_readable() const { std::string result = "("; for (TypePtr param : params_types) { @@ -223,6 +248,11 @@ std::string TypeDataTypedTuple::as_human_readable() const { // only non-trivial implementations are here; by default (no children), `callback(this)` is executed // +void TypeDataNullable::traverse(const TraverserCallbackT& callback) const { + callback(this); + inner->traverse(callback); +} + void TypeDataFunCallable::traverse(const TraverserCallbackT& callback) const { callback(this); for (TypePtr param : params_types) { @@ -254,6 +284,10 @@ void TypeDataTypedTuple::traverse(const TraverserCallbackT& callback) const { // only non-trivial implementations are here; by default (no children), `return callback(this)` is executed // +TypePtr TypeDataNullable::replace_children_custom(const ReplacerCallbackT& callback) const { + return callback(create(inner->replace_children_custom(callback))); +} + TypePtr TypeDataFunCallable::replace_children_custom(const ReplacerCallbackT& callback) const { std::vector mapped; mapped.reserve(params_types.size()); @@ -282,122 +316,90 @@ TypePtr TypeDataTypedTuple::replace_children_custom(const ReplacerCallbackT& cal } -// -------------------------------------------- -// calc_width_on_stack() -// -// returns the number of stack slots occupied by a variable of this type -// only non-trivial implementations are here; by default (most types) occupy 1 stack slot -// - -int TypeDataGenericT::calc_width_on_stack() const { - // this function is invoked only in functions with generics already instantiated - assert(false); - return -999999; -} - -int TypeDataTensor::calc_width_on_stack() const { - int sum = 0; - for (TypePtr item : items) { - sum += item->calc_width_on_stack(); - } - return sum; -} - -int TypeDataUnresolved::calc_width_on_stack() const { - // since early pipeline stages, no unresolved types left - assert(false); - return -999999; -} - -int TypeDataVoid::calc_width_on_stack() const { - return 0; -} - - // -------------------------------------------- // can_rhs_be_assigned() // // on `var lhs: = rhs`, having inferred rhs_type, check that it can be assigned without any casts // the same goes for passing arguments, returning values, etc. — where the "receiver" (lhs) checks "applier" (rhs) -// for now, `null` can be assigned to any TVM primitive, be later we'll have T? types and null safety // bool TypeDataInt::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataBool::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataCell::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataSlice::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataBuilder::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataTuple::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataContinuation::can_rhs_be_assigned(TypePtr rhs) const { if (rhs == this) { return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataNullLiteral::can_rhs_be_assigned(TypePtr rhs) const { - return rhs == this; + if (rhs == this) { + return true; + } + return rhs == TypeDataNever::create(); +} + +bool TypeDataNullable::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + if (const TypeDataNullable* rhs_nullable = rhs->try_as()) { + return inner->can_rhs_be_assigned(rhs_nullable->inner); + } + if (inner->can_rhs_be_assigned(rhs)) { + return true; + } + return rhs == TypeDataNever::create(); } bool TypeDataFunCallable::can_rhs_be_assigned(TypePtr rhs) const { - return rhs == this; + if (rhs == this) { + return true; + } + return rhs == TypeDataNever::create(); } bool TypeDataGenericT::can_rhs_be_assigned(TypePtr rhs) const { @@ -414,8 +416,7 @@ bool TypeDataTensor::can_rhs_be_assigned(TypePtr rhs) const { } return true; } - // note, that tensors can not accept null - return false; + return rhs == TypeDataNever::create(); } bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { @@ -427,10 +428,7 @@ bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { } return true; } - if (rhs == TypeDataNullLiteral::create()) { - return true; - } - return false; + return rhs == TypeDataNever::create(); } bool TypeDataUnknown::can_rhs_be_assigned(TypePtr rhs) const { @@ -442,8 +440,15 @@ bool TypeDataUnresolved::can_rhs_be_assigned(TypePtr rhs) const { return false; } +bool TypeDataNever::can_rhs_be_assigned(TypePtr rhs) const { + return true; +} + bool TypeDataVoid::can_rhs_be_assigned(TypePtr rhs) const { - return rhs == this; + if (rhs == this) { + return true; + } + return rhs == TypeDataNever::create(); } @@ -455,41 +460,69 @@ bool TypeDataVoid::can_rhs_be_assigned(TypePtr rhs) const { // bool TypeDataInt::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { // `int` as `int?` + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataBool::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this || cast_to == TypeDataInt::create(); } bool TypeDataCell::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataSlice::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataBuilder::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataContinuation::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return cast_to == this; } bool TypeDataNullLiteral::can_be_casted_with_as_operator(TypePtr cast_to) const { - return cast_to == this - || cast_to == TypeDataInt::create() || cast_to == TypeDataBool::create() || cast_to == TypeDataCell::create() || cast_to == TypeDataSlice::create() - || cast_to == TypeDataBuilder::create() || cast_to == TypeDataContinuation::create() || cast_to == TypeDataTuple::create() - || cast_to->try_as(); + return cast_to == this || cast_to->try_as(); +} + +bool TypeDataNullable::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return inner->can_be_casted_with_as_operator(to_nullable->inner); + } + return false; } bool TypeDataFunCallable::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return this == cast_to; } @@ -506,6 +539,9 @@ bool TypeDataTensor::can_be_casted_with_as_operator(TypePtr cast_to) const { } return true; } + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return false; } @@ -518,31 +554,73 @@ bool TypeDataTypedTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { } return true; } + if (const auto* to_nullable = cast_to->try_as()) { + return can_be_casted_with_as_operator(to_nullable->inner); + } return false; } bool TypeDataUnknown::can_be_casted_with_as_operator(TypePtr cast_to) const { - // 'unknown' can be cast to any type - // (though it's not valid for exception arguments when casting them to non-1 stack width, - // but to ensure it, we need a special type "unknown TVM primitive", which is overwhelming I think) - return true; + // 'unknown' can be cast to any TVM value + return cast_to->get_width_on_stack() == 1; } bool TypeDataUnresolved::can_be_casted_with_as_operator(TypePtr cast_to) const { return false; } +bool TypeDataNever::can_be_casted_with_as_operator(TypePtr cast_to) const { + return true; +} + bool TypeDataVoid::can_be_casted_with_as_operator(TypePtr cast_to) const { return cast_to == this; } +// -------------------------------------------- +// can_hold_tvm_null_instead() +// +// assigning `null` to a primitive variable like `int?` / `cell?` can store TVM NULL inside the same slot +// (that's why the default implementation is just "return true", and most of types occupy 1 slot) +// but for complex variables, like `(int, int)?`, "null presence" is kept in a separate slot (UTag for union types) +// though still, tricky situations like `(int, ())?` can still "embed" TVM NULL in parallel with original value +// + +bool TypeDataNullable::can_hold_tvm_null_instead() const { + if (get_width_on_stack() != 1) { // `(int, int)?` / `()?` can not hold null instead + return false; // only `int?` / `cell?` / `StructWith1IntField?` can + } // and some tricky situations like `(int, ())?`, but not `(int?, ())?` + return !inner->can_hold_tvm_null_instead(); +} + +bool TypeDataTensor::can_hold_tvm_null_instead() const { + if (get_width_on_stack() != 1) { // `(int, int)` / `()` can not hold null instead, since null is 1 slot + return false; // only `((), int)` and similar can: + } // one item is width 1 (and not nullable), others are 0 + for (TypePtr item : items) { + if (item->get_width_on_stack() == 1 && !item->can_hold_tvm_null_instead()) { + return false; + } + } + return true; +} + +bool TypeDataNever::can_hold_tvm_null_instead() const { + return false; +} + +bool TypeDataVoid::can_hold_tvm_null_instead() const { + return false; +} + + // -------------------------------------------- // parsing type from tokens // // here we implement parsing types (mostly after colon) to TypeData // example: `var v: int` is TypeDataInt -// example: `var v: (builder, [cell])` is TypeDataTensor(TypeDataBuilder, TypeDataTypedTuple(TypeDataCell)) +// example: `var v: (builder?, [cell])` is TypeDataTensor(TypeDataNullable(TypeDataBuilder), TypeDataTypedTuple(TypeDataCell)) // example: `fun f(): ()` is TypeDataTensor() (an empty one) // // note, that unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved, @@ -598,6 +676,7 @@ static TypePtr parse_simple_type(Lexer& lex) { case 5: if (str == "slice") return TypeDataSlice::create(); if (str == "tuple") return TypeDataTuple::create(); + if (str == "never") return TypeDataNever::create(); break; case 7: if (str == "builder") return TypeDataBuilder::create(); @@ -633,7 +712,8 @@ static TypePtr parse_type_nullable(Lexer& lex) { TypePtr result = parse_simple_type(lex); if (lex.tok() == tok_question) { - lex.error("nullable types are not supported yet"); + lex.next(); + result = TypeDataNullable::create(result); } return result; diff --git a/tolk/type-system.h b/tolk/type-system.h index 482039e6..4b671e30 100644 --- a/tolk/type-system.h +++ b/tolk/type-system.h @@ -50,6 +50,8 @@ class TypeData { const uint64_t type_id; // bits of flag_mask, to store often-used properties and return them without tree traversing const int flags; + // how many slots on a stack this type occupies (calculated on creation), e.g. `int`=1, `(int,int)`=2, `(int,int)?`=3 + const int width_on_stack; friend class TypeDataTypeIdCalculation; @@ -60,9 +62,10 @@ protected: flag_contains_unresolved_inside = 1 << 3, }; - explicit TypeData(uint64_t type_id, int flags_with_children) + explicit TypeData(uint64_t type_id, int flags_with_children, int width_on_stack) : type_id(type_id) - , flags(flags_with_children) { + , flags(flags_with_children) + , width_on_stack(width_on_stack) { } public: @@ -74,6 +77,7 @@ public: } uint64_t get_type_id() const { return type_id; } + int get_width_on_stack() const { return width_on_stack; } bool has_unknown_inside() const { return flags & flag_contains_unknown_inside; } bool has_genericT_inside() const { return flags & flag_contains_genericT_inside; } @@ -86,6 +90,10 @@ public: virtual bool can_rhs_be_assigned(TypePtr rhs) const = 0; virtual bool can_be_casted_with_as_operator(TypePtr cast_to) const = 0; + virtual bool can_hold_tvm_null_instead() const { + return true; + } + virtual void traverse(const TraverserCallbackT& callback) const { callback(this); } @@ -93,17 +101,13 @@ public: virtual TypePtr replace_children_custom(const ReplacerCallbackT& callback) const { return callback(this); } - - virtual int calc_width_on_stack() const { - return 1; - } }; /* * `int` is TypeDataInt, representation of TVM int. */ class TypeDataInt final : public TypeData { - TypeDataInt() : TypeData(1ULL, 0) {} + TypeDataInt() : TypeData(1ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -121,7 +125,7 @@ public: * From the type system point of view, int and bool are different, not-autocastable types. */ class TypeDataBool final : public TypeData { - TypeDataBool() : TypeData(2ULL, 0) {} + TypeDataBool() : TypeData(2ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -138,7 +142,7 @@ public: * `cell` is TypeDataCell, representation of TVM cell. */ class TypeDataCell final : public TypeData { - TypeDataCell() : TypeData(3ULL, 0) {} + TypeDataCell() : TypeData(3ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -155,7 +159,7 @@ public: * `slice` is TypeDataSlice, representation of TVM slice. */ class TypeDataSlice final : public TypeData { - TypeDataSlice() : TypeData(4ULL, 0) {} + TypeDataSlice() : TypeData(4ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -172,7 +176,7 @@ public: * `builder` is TypeDataBuilder, representation of TVM builder. */ class TypeDataBuilder final : public TypeData { - TypeDataBuilder() : TypeData(5ULL, 0) {} + TypeDataBuilder() : TypeData(5ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -191,7 +195,7 @@ public: * so getting its element results in TypeDataUnknown (which must be assigned/cast explicitly). */ class TypeDataTuple final : public TypeData { - TypeDataTuple() : TypeData(6ULL, 0) {} + TypeDataTuple() : TypeData(6ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -209,7 +213,7 @@ public: * It's like "untyped callable", not compatible with other types. */ class TypeDataContinuation final : public TypeData { - TypeDataContinuation() : TypeData(7ULL, 0) {} + TypeDataContinuation() : TypeData(7ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -224,12 +228,12 @@ public: /* * `null` has TypeDataNullLiteral type. - * Currently, it can be assigned to int/slice/etc., but later Tolk will have T? types and null safety. + * It can be assigned only to nullable types (`int?`, etc.), to ensure null safety. * Note, that `var i = null`, though valid (i would be constant null), fires an "always-null" compilation error * (it's much better for user to see an error here than when he passes this variable somewhere). */ class TypeDataNullLiteral final : public TypeData { - TypeDataNullLiteral() : TypeData(8ULL, 0) {} + TypeDataNullLiteral() : TypeData(8ULL, 0, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -242,6 +246,30 @@ public: bool can_be_casted_with_as_operator(TypePtr cast_to) const override; }; +/* + * `T?` is "nullable T". + * It can be converted to T either with ! (non-null assertion operator) or with smart casts. + */ +class TypeDataNullable final : public TypeData { + TypeDataNullable(uint64_t type_id, int children_flags, int width_on_stack, TypePtr inner) + : TypeData(type_id, children_flags, width_on_stack) + , inner(inner) {} + +public: + const TypePtr inner; + + static TypePtr create(TypePtr inner); + + bool is_primitive_nullable() const { return get_width_on_stack() == 1 && inner->get_width_on_stack() == 1; } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; + bool can_hold_tvm_null_instead() const override; +}; + /* * `fun(int, int) -> void` is TypeDataFunCallable, think of is as a typed continuation. * A type of function `fun f(x: int) { return x; }` is actually `fun(int) -> int`. @@ -249,7 +277,7 @@ public: */ class TypeDataFunCallable final : public TypeData { TypeDataFunCallable(uint64_t type_id, int children_flags, std::vector&& params_types, TypePtr return_type) - : TypeData(type_id, children_flags) + : TypeData(type_id, children_flags, 1) , params_types(std::move(params_types)) , return_type(return_type) {} @@ -275,7 +303,7 @@ public: */ class TypeDataGenericT final : public TypeData { TypeDataGenericT(uint64_t type_id, std::string&& nameT) - : TypeData(type_id, flag_contains_genericT_inside) + : TypeData(type_id, flag_contains_genericT_inside, -999999) // width undefined until instantiated , nameT(std::move(nameT)) {} public: @@ -286,7 +314,6 @@ public: std::string as_human_readable() const override { return nameT; } bool can_rhs_be_assigned(TypePtr rhs) const override; bool can_be_casted_with_as_operator(TypePtr cast_to) const override; - int calc_width_on_stack() const override; }; /* @@ -296,8 +323,8 @@ public: * A tensor can be empty. */ class TypeDataTensor final : public TypeData { - TypeDataTensor(uint64_t type_id, int children_flags, std::vector&& items) - : TypeData(type_id, children_flags) + TypeDataTensor(uint64_t type_id, int children_flags, int width_on_stack, std::vector&& items) + : TypeData(type_id, children_flags, width_on_stack) , items(std::move(items)) {} public: @@ -312,7 +339,7 @@ public: bool can_be_casted_with_as_operator(TypePtr cast_to) const override; void traverse(const TraverserCallbackT& callback) const override; TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; - int calc_width_on_stack() const override; + bool can_hold_tvm_null_instead() const override; }; /* @@ -322,7 +349,7 @@ public: */ class TypeDataTypedTuple final : public TypeData { TypeDataTypedTuple(uint64_t type_id, int children_flags, std::vector&& items) - : TypeData(type_id, children_flags) + : TypeData(type_id, children_flags, 1) , items(std::move(items)) {} public: @@ -346,7 +373,7 @@ public: * The only thing available to do with unknown is to cast it: `catch (excNo, arg) { var i = arg as int; }` */ class TypeDataUnknown final : public TypeData { - TypeDataUnknown() : TypeData(20ULL, flag_contains_unknown_inside) {} + TypeDataUnknown() : TypeData(20ULL, flag_contains_unknown_inside, 1) {} static TypePtr singleton; friend void type_system_init(); @@ -367,7 +394,7 @@ public: */ class TypeDataUnresolved final : public TypeData { TypeDataUnresolved(uint64_t type_id, std::string&& text, SrcLocation loc) - : TypeData(type_id, flag_contains_unresolved_inside) + : TypeData(type_id, flag_contains_unresolved_inside, -999999) , text(std::move(text)) , loc(loc) {} @@ -380,7 +407,27 @@ public: std::string as_human_readable() const override { return text + "*"; } bool can_rhs_be_assigned(TypePtr rhs) const override; bool can_be_casted_with_as_operator(TypePtr cast_to) const override; - int calc_width_on_stack() const override; +}; + +/* + * `never` is a special type meaning "no value can be hold". + * Is may appear due to smart casts, for example `if (x == null && x != null)` makes x "never". + * Functions returning "never" assume to never exit, calling them interrupts control flow. + * Such variables can not be cast to any other types, all their usage will trigger type mismatch errors. + */ +class TypeDataNever final : public TypeData { + TypeDataNever() : TypeData(19ULL, 0, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "never"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + bool can_hold_tvm_null_instead() const override; }; /* @@ -389,7 +436,7 @@ public: * Empty tensor is not compatible with void, although at IR level they are similar, 0 stack slots. */ class TypeDataVoid final : public TypeData { - TypeDataVoid() : TypeData(10ULL, 0) {} + TypeDataVoid() : TypeData(10ULL, 0, 0) {} static TypePtr singleton; friend void type_system_init(); @@ -400,7 +447,7 @@ public: std::string as_human_readable() const override { return "void"; } bool can_rhs_be_assigned(TypePtr rhs) const override; bool can_be_casted_with_as_operator(TypePtr cast_to) const override; - int calc_width_on_stack() const override; + bool can_hold_tvm_null_instead() const override; }; diff --git a/ton/ton-types.h b/ton/ton-types.h index 2447a8c5..c7aff644 100644 --- a/ton/ton-types.h +++ b/ton/ton-types.h @@ -493,6 +493,7 @@ struct CatChainOptions { td::uint64 max_block_height_coeff = 0; bool debug_disable_db = false; + double broadcast_speed_multiplier = 1.0; }; struct ValidatorSessionConfig { diff --git a/tonlib/CMakeLists.txt b/tonlib/CMakeLists.txt index eb538361..3dbd628d 100644 --- a/tonlib/CMakeLists.txt +++ b/tonlib/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) +option(TONLIBJSON_STATIC "Build tonlibjson as static library" OFF) + if (NOT OPENSSL_FOUND) find_package(OpenSSL REQUIRED) endif() @@ -90,7 +92,7 @@ set(TONLIB_JSON_HEADERS tonlib/tonlib_client_json.h) set(TONLIB_JSON_SOURCE tonlib/tonlib_client_json.cpp) include(GenerateExportHeader) -if (USE_EMSCRIPTEN) +if (TONLIBJSON_STATIC OR USE_EMSCRIPTEN) add_library(tonlibjson STATIC ${TONLIB_JSON_SOURCE}) else() add_library(tonlibjson SHARED ${TONLIB_JSON_SOURCE}) @@ -103,7 +105,7 @@ else() endif() generate_export_header(tonlibjson EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/tonlib/tonlibjson_export.h) -if (USE_EMSCRIPTEN) +if (TONLIBJSON_STATIC OR USE_EMSCRIPTEN) target_compile_definitions(tonlibjson PUBLIC TONLIBJSON_STATIC_DEFINE) endif() target_include_directories(tonlibjson PUBLIC @@ -157,7 +159,7 @@ endif() install(FILES ${TONLIB_JSON_HEADERS} ${CMAKE_CURRENT_BINARY_DIR}/tonlib/tonlibjson_export.h DESTINATION include/tonlib/) -if (NOT USE_EMSCRIPTEN) +if (NOT USE_EMSCRIPTEN AND NOT TONLIBJSON_STATIC) install(EXPORT Tonlib FILE TonlibTargets.cmake NAMESPACE Tonlib:: diff --git a/tonlib/tonlib/TonlibClient.cpp b/tonlib/tonlib/TonlibClient.cpp index d917a57a..d73e715c 100644 --- a/tonlib/tonlib/TonlibClient.cpp +++ b/tonlib/tonlib/TonlibClient.cpp @@ -4619,6 +4619,8 @@ void TonlibClient::get_libraries(ton::BlockIdExt blkid, std::vector std::vector> result_entries; result_entries.reserve(library_list.size()); std::vector not_cached_hashes; + not_cached_hashes.reserve(library_list.size()); + for (auto& library_hash : library_list) { if (libraries.key_exists(library_hash)) { auto library_content = vm::std_boc_serialize(libraries.lookup_ref(library_hash)).move_as_ok().as_slice().str(); @@ -4633,7 +4635,8 @@ void TonlibClient::get_libraries(ton::BlockIdExt blkid, std::vector return; } - client_.send_query(ton::lite_api::liteServer_getLibrariesWithProof(ton::create_tl_lite_block_id(blkid), 1, std::move(not_cached_hashes)), + auto missed_lib_ids = not_cached_hashes; + client_.send_query(ton::lite_api::liteServer_getLibrariesWithProof(ton::create_tl_lite_block_id(blkid), 1, std::move(missed_lib_ids)), promise.wrap([self=this, blkid, result_entries = std::move(result_entries), not_cached_hashes] (td::Result> r_libraries) mutable -> td::Result> { diff --git a/validator-engine/validator-engine.cpp b/validator-engine/validator-engine.cpp index cc7c57b3..2ea04e18 100644 --- a/validator-engine/validator-engine.cpp +++ b/validator-engine/validator-engine.cpp @@ -1504,6 +1504,7 @@ td::Status ValidatorEngine::load_global_config() { } validator_options_.write().set_hardforks(std::move(h)); validator_options_.write().set_fast_state_serializer_enabled(fast_state_serializer_enabled_); + validator_options_.write().set_catchain_broadcast_speed_multiplier(broadcast_speed_multiplier_catchain_); return td::Status::OK(); } @@ -1957,7 +1958,8 @@ void ValidatorEngine::started_overlays() { void ValidatorEngine::start_validator() { validator_options_.write().set_allow_blockchain_init(config_.validators.size() > 0); - validator_options_.write().set_state_serializer_enabled(config_.state_serializer_enabled); + validator_options_.write().set_state_serializer_enabled(config_.state_serializer_enabled && + !state_serializer_disabled_flag_); load_collator_options(); validator_manager_ = ton::validator::ValidatorManagerFactory::create( @@ -2003,9 +2005,13 @@ void ValidatorEngine::start_full_node() { R.ensure(); td::actor::send_closure(SelfId, &ValidatorEngine::started_full_node); }); + ton::validator::fullnode::FullNodeOptions full_node_options{ + .config_ = config_.full_node_config, + .public_broadcast_speed_multiplier_ = broadcast_speed_multiplier_public_, + .private_broadcast_speed_multiplier_ = broadcast_speed_multiplier_private_}; full_node_ = ton::validator::fullnode::FullNode::create( short_id, ton::adnl::AdnlNodeIdShort{config_.full_node}, validator_options_->zero_block_id().file_hash, - config_.full_node_config, keyring_.get(), adnl_.get(), rldp_.get(), rldp2_.get(), + full_node_options, keyring_.get(), adnl_.get(), rldp_.get(), rldp2_.get(), default_dht_node_.is_zero() ? td::actor::ActorId{} : dht_nodes_[default_dht_node_].get(), overlay_manager_.get(), validator_manager_.get(), full_node_client_.get(), db_root_, std::move(P)); for (auto &v : config_.validators) { @@ -3973,7 +3979,7 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_setStateS promise.set_value(ton::create_serialize_tl_object()); return; } - validator_options_.write().set_state_serializer_enabled(query.enabled_); + validator_options_.write().set_state_serializer_enabled(query.enabled_ && !state_serializer_disabled_flag_); td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::update_options, validator_options_); config_.state_serializer_enabled = query.enabled_; @@ -4556,6 +4562,47 @@ int main(int argc, char *argv[]) { td::actor::send_closure(x, &ValidatorEngine::set_validator_telemetry_filename, s); }); }); + p.add_option( + '\0', "disable-state-serializer", + "disable persistent state serializer (similar to set-state-serializer-enabled 0 in validator console)", [&]() { + acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_state_serializer_disabled_flag); }); + }); + p.add_checked_option( + '\0', "broadcast-speed-catchain", + "multiplier for broadcast speed in catchain overlays (experimental, default is 1.0, which is ~300 KB/s)", + [&](td::Slice s) -> td::Status { + auto v = td::to_double(s); + if (v <= 0.0) { + return td::Status::Error("broadcast-speed-catchain should be positive"); + } + acts.push_back( + [&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_broadcast_speed_multiplier_catchain, v); }); + return td::Status::OK(); + }); + p.add_checked_option( + '\0', "broadcast-speed-public", + "multiplier for broadcast speed in public shard overlays (experimental, default is 1.0, which is ~300 KB/s)", + [&](td::Slice s) -> td::Status { + auto v = td::to_double(s); + if (v <= 0.0) { + return td::Status::Error("broadcast-speed-public should be positive"); + } + acts.push_back( + [&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_broadcast_speed_multiplier_public, v); }); + return td::Status::OK(); + }); + p.add_checked_option( + '\0', "broadcast-speed-private", + "multiplier for broadcast speed in private block overlays (experimental, default is 1.0, which is ~300 KB/s)", + [&](td::Slice s) -> td::Status { + auto v = td::to_double(s); + if (v <= 0.0) { + return td::Status::Error("broadcast-speed-private should be positive"); + } + acts.push_back( + [&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_broadcast_speed_multiplier_private, v); }); + return td::Status::OK(); + }); auto S = p.run(argc, argv); if (S.is_error()) { LOG(ERROR) << "failed to parse options: " << S.move_as_error(); diff --git a/validator-engine/validator-engine.hpp b/validator-engine/validator-engine.hpp index b7abb0b1..e0dc91f1 100644 --- a/validator-engine/validator-engine.hpp +++ b/validator-engine/validator-engine.hpp @@ -228,6 +228,10 @@ class ValidatorEngine : public td::actor::Actor { std::string validator_telemetry_filename_; bool not_all_shards_ = false; std::vector add_shard_cmds_; + bool state_serializer_disabled_flag_ = false; + double broadcast_speed_multiplier_catchain_ = 1.0; + double broadcast_speed_multiplier_public_ = 1.0; + double broadcast_speed_multiplier_private_ = 1.0; std::set unsafe_catchains_; std::map> unsafe_catchain_rotations_; @@ -325,6 +329,18 @@ class ValidatorEngine : public td::actor::Actor { void add_shard_cmd(ton::ShardIdFull shard) { add_shard_cmds_.push_back(shard); } + void set_state_serializer_disabled_flag() { + state_serializer_disabled_flag_ = true; + } + void set_broadcast_speed_multiplier_catchain(double value) { + broadcast_speed_multiplier_catchain_ = value; + } + void set_broadcast_speed_multiplier_public(double value) { + broadcast_speed_multiplier_public_ = value; + } + void set_broadcast_speed_multiplier_private(double value) { + broadcast_speed_multiplier_private_ = value; + } void start_up() override; ValidatorEngine() { diff --git a/validator/db/archive-manager.cpp b/validator/db/archive-manager.cpp index d349f9d8..8c7cde17 100644 --- a/validator/db/archive-manager.cpp +++ b/validator/db/archive-manager.cpp @@ -1196,6 +1196,30 @@ void ArchiveManager::set_async_mode(bool mode, td::Promise promise) { } } +void ArchiveManager::prepare_stats(td::Promise>> promise) { + std::vector> stats; + { + std::map states; + for (auto &[key, file] : perm_states_) { + BlockSeqno seqno = key.first; + auto r_stat = td::stat(db_root_ + "/archive/states/" + file.filename_short()); + if (r_stat.is_error()) { + LOG(WARNING) << "Cannot stat persistent state file " << file.filename_short() << " : " << r_stat.move_as_error(); + } else { + states[seqno] += r_stat.move_as_ok().size_; + } + } + td::StringBuilder sb; + for (auto &[seqno, size] : states) { + sb << seqno << ":" << td::format::as_size(size) << " "; + } + if (!sb.as_cslice().empty()) { + stats.emplace_back("persistent_states", sb.as_cslice().str()); + } + } + promise.set_value(std::move(stats)); +} + void ArchiveManager::truncate(BlockSeqno masterchain_seqno, ConstBlockHandle handle, td::Promise promise) { index_->begin_transaction().ensure(); td::MultiPromise mp; diff --git a/validator/db/archive-manager.hpp b/validator/db/archive-manager.hpp index 90fc6a0b..d919e32e 100644 --- a/validator/db/archive-manager.hpp +++ b/validator/db/archive-manager.hpp @@ -81,6 +81,8 @@ class ArchiveManager : public td::actor::Actor { cur_shard_split_depth_ = value; } + void prepare_stats(td::Promise>> promise); + static constexpr td::uint32 archive_size() { return 20000; } diff --git a/validator/db/celldb.cpp b/validator/db/celldb.cpp index 9dcecdb3..e86a373d 100644 --- a/validator/db/celldb.cpp +++ b/validator/db/celldb.cpp @@ -158,6 +158,17 @@ void CellDbIn::start_up() { }, td::Timestamp::now()); } + + { + std::string key = "stats.last_deleted_mc_seqno", value; + auto R = cell_db_->get(td::as_slice(key), value); + R.ensure(); + if (R.ok() == td::KeyValue::GetStatus::Ok) { + auto r_value = td::to_integer_safe(value); + r_value.ensure(); + last_deleted_mc_state_ = r_value.move_as_ok(); + } + } } void CellDbIn::load_cell(RootHash hash, td::Promise> promise) { @@ -452,6 +463,11 @@ void CellDbIn::gc_cont2(BlockHandle handle) { cell_db_->erase(get_key(key_hash)).ensure(); set_block(F.prev, std::move(P)); set_block(F.next, std::move(N)); + if (handle->id().is_masterchain()) { + last_deleted_mc_state_ = handle->id().seqno(); + std::string key = "stats.last_deleted_mc_seqno", value = td::to_string(last_deleted_mc_state_); + cell_db_->set(td::as_slice(key), td::as_slice(value)); + } cell_db_->commit_write_batch().ensure(); alarm_timestamp() = td::Timestamp::now(); timer_write_batch.reset(); @@ -475,9 +491,6 @@ void CellDbIn::gc_cont2(BlockHandle handle) { if (!opts_->get_disable_rocksdb_stats()) { cell_db_statistics_.gc_cell_time_.insert(timer.elapsed() * 1e6); } - if (handle->id().is_masterchain()) { - last_deleted_mc_state_ = handle->id().seqno(); - } LOG(DEBUG) << "Deleted state " << handle->id().to_str(); timer_finish.reset(); timer_all.reset(); diff --git a/validator/db/rootdb.cpp b/validator/db/rootdb.cpp index e0579d57..8d83e7a7 100644 --- a/validator/db/rootdb.cpp +++ b/validator/db/rootdb.cpp @@ -438,6 +438,7 @@ void RootDb::allow_block_gc(BlockIdExt block_id, td::Promise promise) { void RootDb::prepare_stats(td::Promise>> promise) { auto merger = StatsMerger::create(std::move(promise)); td::actor::send_closure(cell_db_, &CellDb::prepare_stats, merger.make_promise("celldb.")); + td::actor::send_closure(archive_db_, &ArchiveManager::prepare_stats, merger.make_promise("archive.")); } void RootDb::truncate(BlockSeqno seqno, ConstBlockHandle handle, td::Promise promise) { diff --git a/validator/downloaders/download-state.cpp b/validator/downloaders/download-state.cpp index 32978ea5..8473cb22 100644 --- a/validator/downloaders/download-state.cpp +++ b/validator/downloaders/download-state.cpp @@ -38,6 +38,7 @@ DownloadShardState::DownloadShardState(BlockIdExt block_id, BlockIdExt mastercha } void DownloadShardState::start_up() { + status_ = ProcessStatus(manager_, "process.download_state"); alarm_timestamp() = timeout_; auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result R) { @@ -81,6 +82,7 @@ void DownloadShardState::download_state() { }); td::actor::send_closure(manager_, &ValidatorManager::send_get_block_proof_link_request, block_id_, priority_, std::move(P)); + status_.set_status(PSTRING() << block_id_.id.to_str() << " : downloading proof"); } void DownloadShardState::downloaded_proof_link(td::BufferSlice data) { @@ -123,6 +125,7 @@ void DownloadShardState::checked_proof_link() { td::actor::send_closure(manager_, &ValidatorManager::send_get_persistent_state_request, block_id_, masterchain_block_id_, priority_, std::move(P)); } + status_.set_status(PSTRING() << block_id_.id.to_str() << " : downloading state"); } void DownloadShardState::download_zero_state() { @@ -152,6 +155,7 @@ void DownloadShardState::downloaded_zero_state(td::BufferSlice data) { } void DownloadShardState::downloaded_shard_state(td::BufferSlice data) { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : processing downloaded state"); auto S = create_shard_state(block_id_, data.clone()); if (S.is_error()) { fail_handler(actor_id(this), S.move_as_error()); @@ -174,6 +178,7 @@ void DownloadShardState::downloaded_shard_state(td::BufferSlice data) { } void DownloadShardState::checked_shard_state() { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : storing state file"); LOG(WARNING) << "checked shard state " << block_id_.to_str(); auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result R) { R.ensure(); @@ -189,6 +194,7 @@ void DownloadShardState::checked_shard_state() { } void DownloadShardState::written_shard_state_file() { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : storing state to celldb"); LOG(WARNING) << "written shard state file " << block_id_.to_str(); auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { R.ensure(); @@ -198,6 +204,7 @@ void DownloadShardState::written_shard_state_file() { } void DownloadShardState::written_shard_state(td::Ref state) { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : finishing"); state_ = std::move(state); handle_->set_unix_time(state_->get_unix_time()); handle_->set_is_key_block(block_id_.is_masterchain()); diff --git a/validator/downloaders/download-state.hpp b/validator/downloaders/download-state.hpp index 02984c53..bde80aae 100644 --- a/validator/downloaders/download-state.hpp +++ b/validator/downloaders/download-state.hpp @@ -19,6 +19,7 @@ #pragma once #include "validator/interfaces/validator-manager.h" +#include "stats-provider.h" namespace ton { @@ -67,6 +68,8 @@ class DownloadShardState : public td::actor::Actor { td::BufferSlice data_; td::Ref state_; + + ProcessStatus status_; }; } // namespace validator diff --git a/validator/full-node-private-overlay.cpp b/validator/full-node-private-overlay.cpp index 1acfbd4e..f86323fc 100644 --- a/validator/full-node-private-overlay.cpp +++ b/validator/full-node-private-overlay.cpp @@ -264,8 +264,11 @@ void FullNodePrivateBlockOverlay::init() { overlay::OverlayPrivacyRules rules{overlay::Overlays::max_fec_broadcast_size(), overlay::CertificateFlags::AllowFec | overlay::CertificateFlags::Trusted, {}}; - td::actor::send_closure(overlays_, &overlay::Overlays::create_private_overlay, local_id_, overlay_id_full_.clone(), - nodes_, std::make_unique(actor_id(this)), rules, R"({ "type": "private-blocks" })"); + overlay::OverlayOptions overlay_options; + overlay_options.broadcast_speed_multiplier_ = opts_.private_broadcast_speed_multiplier_; + td::actor::send_closure(overlays_, &overlay::Overlays::create_private_overlay_ex, local_id_, overlay_id_full_.clone(), + nodes_, std::make_unique(actor_id(this)), rules, R"({ "type": "private-blocks" })", + overlay_options); td::actor::send_closure(rldp_, &rldp::Rldp::add_id, local_id_); td::actor::send_closure(rldp2_, &rldp2::Rldp::add_id, local_id_); @@ -366,7 +369,7 @@ void FullNodeCustomOverlay::receive_broadcast(PublicKeyHash src, td::BufferSlice } void FullNodeCustomOverlay::send_external_message(td::BufferSlice data) { - if (!inited_ || config_.ext_messages_broadcast_disabled_) { + if (!inited_ || opts_.config_.ext_messages_broadcast_disabled_) { return; } VLOG(FULL_NODE_DEBUG) << "Sending external message to custom overlay \"" << name_ << "\""; @@ -472,10 +475,13 @@ void FullNodeCustomOverlay::init() { authorized_keys[sender.pubkey_hash()] = overlay::Overlays::max_fec_broadcast_size(); } overlay::OverlayPrivacyRules rules{overlay::Overlays::max_fec_broadcast_size(), 0, std::move(authorized_keys)}; + overlay::OverlayOptions overlay_options; + overlay_options.broadcast_speed_multiplier_ = opts_.private_broadcast_speed_multiplier_; td::actor::send_closure( - overlays_, &overlay::Overlays::create_private_overlay, local_id_, overlay_id_full_.clone(), nodes_, + overlays_, &overlay::Overlays::create_private_overlay_ex, local_id_, overlay_id_full_.clone(), nodes_, std::make_unique(actor_id(this)), rules, - PSTRING() << R"({ "type": "custom-overlay", "name": ")" << td::format::Escaped{name_} << R"(" })"); + PSTRING() << R"({ "type": "custom-overlay", "name": ")" << td::format::Escaped{name_} << R"(" })", + overlay_options); td::actor::send_closure(rldp_, &rldp::Rldp::add_id, local_id_); td::actor::send_closure(rldp2_, &rldp2::Rldp::add_id, local_id_); diff --git a/validator/full-node-private-overlay.hpp b/validator/full-node-private-overlay.hpp index 1e29f8c9..70e196ea 100644 --- a/validator/full-node-private-overlay.hpp +++ b/validator/full-node-private-overlay.hpp @@ -50,14 +50,14 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { void collect_validator_telemetry(std::string filename); void set_config(FullNodeConfig config) { - config_ = std::move(config); + opts_.config_ = std::move(config); } void start_up() override; void tear_down() override; FullNodePrivateBlockOverlay(adnl::AdnlNodeIdShort local_id, std::vector nodes, - FileHash zero_state_file_hash, FullNodeConfig config, + FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, @@ -66,7 +66,7 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { : local_id_(local_id) , nodes_(std::move(nodes)) , zero_state_file_hash_(zero_state_file_hash) - , config_(config) + , opts_(opts) , keyring_(keyring) , adnl_(adnl) , rldp_(rldp) @@ -80,7 +80,7 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { adnl::AdnlNodeIdShort local_id_; std::vector nodes_; FileHash zero_state_file_hash_; - FullNodeConfig config_; + FullNodeOptions opts_; bool enable_compression_ = true; td::actor::ActorId keyring_; @@ -126,14 +126,14 @@ class FullNodeCustomOverlay : public td::actor::Actor { td::BufferSlice data); void set_config(FullNodeConfig config) { - config_ = std::move(config); + opts_.config_ = std::move(config); } void start_up() override; void tear_down() override; FullNodeCustomOverlay(adnl::AdnlNodeIdShort local_id, CustomOverlayParams params, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, td::actor::ActorId validator_manager, @@ -144,7 +144,7 @@ class FullNodeCustomOverlay : public td::actor::Actor { , msg_senders_(std::move(params.msg_senders_)) , block_senders_(std::move(params.block_senders_)) , zero_state_file_hash_(zero_state_file_hash) - , config_(config) + , opts_(opts) , keyring_(keyring) , adnl_(adnl) , rldp_(rldp) @@ -161,7 +161,7 @@ class FullNodeCustomOverlay : public td::actor::Actor { std::map msg_senders_; std::set block_senders_; FileHash zero_state_file_hash_; - FullNodeConfig config_; + FullNodeOptions opts_; td::actor::ActorId keyring_; td::actor::ActorId adnl_; diff --git a/validator/full-node-shard.cpp b/validator/full-node-shard.cpp index 7d33a195..ac0eb768 100644 --- a/validator/full-node-shard.cpp +++ b/validator/full-node-shard.cpp @@ -105,6 +105,7 @@ void FullNodeShardImpl::create_overlay() { }; overlay::OverlayOptions opts; opts.announce_self_ = active_; + opts.broadcast_speed_multiplier_ = opts_.public_broadcast_speed_multiplier_; td::actor::send_closure(overlays_, &overlay::Overlays::create_public_overlay_ex, adnl_id_, overlay_id_full_.clone(), std::make_unique(actor_id(this)), rules_, PSTRING() << "{ \"type\": \"shard\", \"shard_id\": " << get_shard() @@ -132,7 +133,7 @@ void FullNodeShardImpl::check_broadcast(PublicKeyHash src, td::BufferSlice broad if (!processed_ext_msg_broadcasts_.insert(hash).second) { return promise.set_error(td::Status::Error("duplicate external message broadcast")); } - if (config_.ext_messages_broadcast_disabled_) { + if (opts_.config_.ext_messages_broadcast_disabled_) { promise.set_error(td::Status::Error("rebroadcasting external messages is disabled")); promise = [manager = validator_manager_, message = q->message_->data_.clone()](td::Result R) mutable { if (R.is_ok()) { @@ -850,7 +851,7 @@ void FullNodeShardImpl::send_ihr_message(td::BufferSlice data) { } void FullNodeShardImpl::send_external_message(td::BufferSlice data) { - if (config_.ext_messages_broadcast_disabled_) { + if (opts_.config_.ext_messages_broadcast_disabled_) { return; } if (!client_.empty()) { @@ -1367,7 +1368,7 @@ void FullNodeShardImpl::get_stats_extra(td::Promise promise) { } FullNodeShardImpl::FullNodeShardImpl(ShardIdFull shard, PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, - FileHash zero_state_file_hash, FullNodeConfig config, + FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, @@ -1387,17 +1388,17 @@ FullNodeShardImpl::FullNodeShardImpl(ShardIdFull shard, PublicKeyHash local_id, , client_(client) , full_node_(full_node) , active_(active) - , config_(config) { + , opts_(opts) { } td::actor::ActorOwn FullNodeShard::create( ShardIdFull shard, PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, td::actor::ActorId adnl, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, td::actor::ActorId validator_manager, td::actor::ActorId client, td::actor::ActorId full_node, bool active) { return td::actor::create_actor(PSTRING() << "tonnode" << shard.to_str(), shard, local_id, adnl_id, - zero_state_file_hash, config, keyring, adnl, rldp, rldp2, overlays, + zero_state_file_hash, opts, keyring, adnl, rldp, rldp2, overlays, validator_manager, client, full_node, active); } diff --git a/validator/full-node-shard.h b/validator/full-node-shard.h index 16945325..5898db80 100644 --- a/validator/full-node-shard.h +++ b/validator/full-node-shard.h @@ -76,7 +76,7 @@ class FullNodeShard : public td::actor::Actor { static td::actor::ActorOwn create( ShardIdFull shard, PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, td::actor::ActorId adnl, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, td::actor::ActorId validator_manager, td::actor::ActorId client, td::actor::ActorId full_node, bool active); diff --git a/validator/full-node-shard.hpp b/validator/full-node-shard.hpp index 86748134..fb3eef76 100644 --- a/validator/full-node-shard.hpp +++ b/validator/full-node-shard.hpp @@ -86,7 +86,7 @@ class FullNodeShardImpl : public FullNodeShard { void set_active(bool active) override; void set_config(FullNodeConfig config) override { - config_ = config; + opts_.config_ = config; } void try_get_next_block(td::Timestamp timestamp, td::Promise promise); @@ -222,7 +222,7 @@ class FullNodeShardImpl : public FullNodeShard { } FullNodeShardImpl(ShardIdFull shard, PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, - FileHash zero_state_file_hash, FullNodeConfig config, td::actor::ActorId keyring, + FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId overlays, td::actor::ActorId validator_manager, @@ -269,7 +269,7 @@ class FullNodeShardImpl : public FullNodeShard { bool active_; - FullNodeConfig config_; + FullNodeOptions opts_; std::set my_ext_msg_broadcasts_; std::set processed_ext_msg_broadcasts_; diff --git a/validator/full-node.cpp b/validator/full-node.cpp index 658cb34e..e1951c36 100644 --- a/validator/full-node.cpp +++ b/validator/full-node.cpp @@ -139,7 +139,7 @@ void FullNodeImpl::update_adnl_id(adnl::AdnlNodeIdShort adnl_id, td::Promise state, std void FullNodeImpl::update_shard_actor(ShardIdFull shard, bool active) { ShardInfo &info = shards_[shard]; if (info.actor.empty()) { - info.actor = FullNodeShard::create(shard, local_id_, adnl_id_, zero_state_file_hash_, config_, keyring_, adnl_, rldp_, + info.actor = FullNodeShard::create(shard, local_id_, adnl_id_, zero_state_file_hash_, opts_, keyring_, adnl_, rldp_, rldp2_, overlays_, validator_manager_, client_, actor_id(this), active); if (!all_validators_.empty()) { td::actor::send_closure(info.actor, &FullNodeShard::update_validators, all_validators_, sign_cert_by_); @@ -717,7 +717,7 @@ void FullNodeImpl::create_private_block_overlay(PublicKeyHash key) { nodes.push_back(p.second); } private_block_overlays_[key] = td::actor::create_actor( - "BlocksPrivateOverlay", current_validators_[key], std::move(nodes), zero_state_file_hash_, config_, keyring_, + "BlocksPrivateOverlay", current_validators_[key], std::move(nodes), zero_state_file_hash_, opts_, keyring_, adnl_, rldp_, rldp2_, overlays_, validator_manager_, actor_id(this)); update_validator_telemetry_collector(); } @@ -735,7 +735,7 @@ void FullNodeImpl::update_custom_overlay(CustomOverlayInfo &overlay) { old_actors.erase(it); } else { overlay.actors_[local_id] = td::actor::create_actor( - "CustomOverlay", local_id, params, zero_state_file_hash_, config_, keyring_, adnl_, rldp_, rldp2_, + "CustomOverlay", local_id, params, zero_state_file_hash_, opts_, keyring_, adnl_, rldp_, rldp2_, overlays_, validator_manager_, actor_id(this)); } } @@ -794,7 +794,7 @@ void FullNodeImpl::send_block_candidate_broadcast_to_custom_overlays(const Block } FullNodeImpl::FullNodeImpl(PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId dht, td::actor::ActorId overlays, @@ -814,16 +814,16 @@ FullNodeImpl::FullNodeImpl(PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id , client_(client) , db_root_(db_root) , started_promise_(std::move(started_promise)) - , config_(config) { + , opts_(opts) { } td::actor::ActorOwn FullNode::create( - ton::PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, FullNodeConfig config, + ton::PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId dht, td::actor::ActorId overlays, td::actor::ActorId validator_manager, td::actor::ActorId client, std::string db_root, td::Promise started_promise) { - return td::actor::create_actor("fullnode", local_id, adnl_id, zero_state_file_hash, config, keyring, + return td::actor::create_actor("fullnode", local_id, adnl_id, zero_state_file_hash, opts, keyring, adnl, rldp, rldp2, dht, overlays, validator_manager, client, db_root, std::move(started_promise)); } diff --git a/validator/full-node.h b/validator/full-node.h index fdb1bf3b..555082dc 100644 --- a/validator/full-node.h +++ b/validator/full-node.h @@ -55,6 +55,12 @@ struct FullNodeConfig { bool ext_messages_broadcast_disabled_ = false; }; +struct FullNodeOptions { + FullNodeConfig config_; + double public_broadcast_speed_multiplier_ = 1.0; + double private_broadcast_speed_multiplier_ = 1.0; +}; + struct CustomOverlayParams { std::string name_; std::vector nodes_; @@ -107,7 +113,7 @@ class FullNode : public td::actor::Actor { enum { broadcast_mode_public = 1, broadcast_mode_private_block = 2, broadcast_mode_custom = 4 }; static td::actor::ActorOwn create( - ton::PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, FullNodeConfig config, + ton::PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId dht, td::actor::ActorId overlays, td::actor::ActorId validator_manager, diff --git a/validator/full-node.hpp b/validator/full-node.hpp index 0ea6fa0b..b4c79363 100644 --- a/validator/full-node.hpp +++ b/validator/full-node.hpp @@ -98,7 +98,7 @@ class FullNodeImpl : public FullNode { void start_up() override; FullNodeImpl(PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, - FullNodeConfig config, td::actor::ActorId keyring, td::actor::ActorId adnl, + FullNodeOptions opts, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId rldp2, td::actor::ActorId dht, td::actor::ActorId overlays, td::actor::ActorId validator_manager, @@ -141,7 +141,7 @@ class FullNodeImpl : public FullNode { std::set local_keys_; td::Promise started_promise_; - FullNodeConfig config_; + FullNodeOptions opts_; std::map> private_block_overlays_; bool broadcast_block_candidates_in_public_overlay_ = false; diff --git a/validator/impl/accept-block.cpp b/validator/impl/accept-block.cpp index a9dd7fe2..de48626d 100644 --- a/validator/impl/accept-block.cpp +++ b/validator/impl/accept-block.cpp @@ -308,8 +308,11 @@ bool AcceptBlockQuery::create_new_proof() { } // 10. check resulting object if (!block::gen::t_BlockProof.validate_ref(bs_cell)) { - block::gen::t_BlockProof.print_ref(std::cerr, bs_cell); - vm::load_cell_slice(bs_cell).print_rec(std::cerr); + FLOG(WARNING) { + sb << "BlockProof object just created failed to pass automated consistency checks: "; + block::gen::t_BlockProof.print_ref(sb, bs_cell); + vm::load_cell_slice(bs_cell).print_rec(sb); + }; return fatal_error("BlockProof object just created failed to pass automated consistency checks"); } // 11. create a proof object from this cell @@ -851,15 +854,12 @@ bool AcceptBlockQuery::create_top_shard_block_description() { && (root.is_null() || cb.store_ref_bool(std::move(root))) && cb.finalize_to(td_cell))) { return fatal_error("cannot serialize ShardTopBlockDescription for the newly-accepted block "s + id_.to_str()); } - if (false) { - // debug output - std::cerr << "new ShardTopBlockDescription: "; - block::gen::t_TopBlockDescr.print_ref(std::cerr, td_cell); - vm::load_cell_slice(td_cell).print_rec(std::cerr); - } if (!block::gen::t_TopBlockDescr.validate_ref(td_cell)) { - block::gen::t_TopBlockDescr.print_ref(std::cerr, td_cell); - vm::load_cell_slice(td_cell).print_rec(std::cerr); + FLOG(WARNING) { + sb << "just created ShardTopBlockDescription is invalid: "; + block::gen::t_TopBlockDescr.print_ref(sb, td_cell); + vm::load_cell_slice(td_cell).print_rec(sb); + }; return fatal_error("just created ShardTopBlockDescription for "s + id_.to_str() + " is invalid"); } auto res = vm::std_boc_serialize(td_cell, 0); diff --git a/validator/impl/collator-impl.h b/validator/impl/collator-impl.h index a781968d..340e3a40 100644 --- a/validator/impl/collator-impl.h +++ b/validator/impl/collator-impl.h @@ -50,7 +50,7 @@ class Collator final : public td::actor::Actor { using LtCellRef = block::LtCellRef; using NewOutMsg = block::NewOutMsg; const ShardIdFull shard_; - ton::BlockId new_id; + ton::BlockId new_id{workchainInvalid, 0, 0}; bool busy_{false}; bool before_split_{false}; bool after_split_{false}; @@ -109,14 +109,11 @@ class Collator final : public td::actor::Actor { return 2; } - static td::Result> - impl_create_ordinary_transaction(Ref msg_root, - block::Account* acc, - UnixTime utime, LogicalTime lt, - block::StoragePhaseConfig* storage_phase_cfg, - block::ComputePhaseConfig* compute_phase_cfg, - block::ActionPhaseConfig* action_phase_cfg, - bool external, LogicalTime after_lt); + static td::Result> impl_create_ordinary_transaction( + Ref msg_root, block::Account* acc, UnixTime utime, LogicalTime lt, + block::StoragePhaseConfig* storage_phase_cfg, block::ComputePhaseConfig* compute_phase_cfg, + block::ActionPhaseConfig* action_phase_cfg, block::SerializeConfig* serialize_cfg, bool external, + LogicalTime after_lt); private: void start_up() override; @@ -177,6 +174,7 @@ class Collator final : public td::actor::Actor { block::StoragePhaseConfig storage_phase_cfg_{&storage_prices_}; block::ComputePhaseConfig compute_phase_cfg_; block::ActionPhaseConfig action_phase_cfg_; + block::SerializeConfig serialize_cfg_; td::RefInt256 masterchain_create_fee_, basechain_create_fee_; std::unique_ptr block_limits_; std::unique_ptr block_limit_status_; diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index e171eaa1..2a6d7a2b 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -53,16 +53,6 @@ static constexpr int HIGH_PRIORITY_EXTERNAL = 10; // don't skip high priority e static constexpr int MAX_ATTEMPTS = 5; -#define DBG(__n) dbg(__n)&& -#define DSTART int __dcnt = 0; -#define DEB DBG(++__dcnt) - -static inline bool dbg(int c) TD_UNUSED; -static inline bool dbg(int c) { - std::cerr << '[' << (char)('0' + c / 10) << (char)('0' + c % 10) << ']'; - return true; -} - /** * Constructs a Collator object. * @@ -362,6 +352,8 @@ bool Collator::fatal_error(td::Status error) { attempt_idx_ + 1); } else { main_promise(std::move(error)); + td::actor::send_closure(manager, &ValidatorManager::record_collate_query_stats, BlockIdExt{new_id, RootHash::zero(), FileHash::zero()}, + work_timer_.elapsed(), cpu_work_timer_.elapsed(), td::optional{}); } busy_ = false; } @@ -761,8 +753,6 @@ bool Collator::unpack_last_mc_state() { << " (upgrade validator software?)"; } // TODO: extract start_lt and end_lt from prev_mc_block as well - // std::cerr << " block::gen::ShardState::print_ref(mc_state_root) = "; - // block::gen::t_ShardState.print_ref(std::cerr, mc_state_root, 2); return true; } @@ -888,8 +878,10 @@ void Collator::got_neighbor_out_queue(int i, td::Result> res) // unpack ProcessedUpto LOG(DEBUG) << "unpacking ProcessedUpto of neighbor " << descr.blk_.to_str(); if (verbosity >= 2) { - block::gen::t_ProcessedInfo.print(std::cerr, qinfo.proc_info); - qinfo.proc_info->print_rec(std::cerr); + FLOG(INFO) { + block::gen::t_ProcessedInfo.print(sb, qinfo.proc_info); + qinfo.proc_info->print_rec(sb); + }; } descr.processed_upto = block::MsgProcessedUptoCollection::unpack(descr.shard(), qinfo.proc_info); if (!descr.processed_upto) { @@ -1756,9 +1748,11 @@ bool Collator::import_new_shard_top_blocks() { shard_conf_adjusted_ = true; } if (tb_act && verbosity >= 0) { // DEBUG - LOG(INFO) << "updated shard block configuration to "; - auto csr = shard_conf_->get_root_csr(); - block::gen::t_ShardHashes.print(std::cerr, csr.write()); + FLOG(INFO) { + sb << "updated shard block configuration to "; + auto csr = shard_conf_->get_root_csr(); + block::gen::t_ShardHashes.print(sb, csr); + }; } block::gen::ShardFeeCreated::Record fc; if (!(tlb::csr_unpack(fees_import_dict_->get_root_extra(), @@ -2001,12 +1995,9 @@ bool Collator::init_lt() { * @returns True if the configuration parameters were successfully fetched and initialized, false otherwise. */ bool Collator::fetch_config_params() { - auto res = block::FetchConfigParams::fetch_config_params(*config_, - &old_mparams_, &storage_prices_, &storage_phase_cfg_, - &rand_seed_, &compute_phase_cfg_, &action_phase_cfg_, - &masterchain_create_fee_, &basechain_create_fee_, - workchain(), now_ - ); + auto res = block::FetchConfigParams::fetch_config_params( + *config_, &old_mparams_, &storage_prices_, &storage_phase_cfg_, &rand_seed_, &compute_phase_cfg_, + &action_phase_cfg_, &serialize_cfg_, &masterchain_create_fee_, &basechain_create_fee_, workchain(), now_); if (res.is_error()) { return fatal_error(res.move_as_error()); } @@ -2279,10 +2270,12 @@ bool Collator::dequeue_message(Ref msg_envelope, ton::LogicalTime deli bool Collator::out_msg_queue_cleanup() { LOG(INFO) << "cleaning outbound queue from messages already imported by neighbors"; if (verbosity >= 2) { - auto rt = out_msg_queue_->get_root(); - std::cerr << "old out_msg_queue is "; - block::gen::t_OutMsgQueue.print(std::cerr, *rt); - rt->print_rec(std::cerr); + FLOG(INFO) { + auto rt = out_msg_queue_->get_root(); + sb << "old out_msg_queue is "; + block::gen::t_OutMsgQueue.print(sb, rt); + rt->print_rec(sb); + }; } if (after_merge_) { @@ -2422,10 +2415,12 @@ bool Collator::out_msg_queue_cleanup() { << out_msg_queue_size_; } if (verbosity >= 2) { - auto rt = out_msg_queue_->get_root(); - std::cerr << "new out_msg_queue is "; - block::gen::t_OutMsgQueue.print(std::cerr, *rt); - rt->print_rec(std::cerr); + FLOG(INFO) { + auto rt = out_msg_queue_->get_root(); + sb << "new out_msg_queue is "; + block::gen::t_OutMsgQueue.print(sb, rt); + rt->print_rec(sb); + }; } return register_out_msg_queue_op(true); } @@ -2524,19 +2519,27 @@ bool Collator::combine_account_transactions() { auto cell = cb.finalize(); auto csr = vm::load_cell_slice_ref(cell); if (verbosity > 2) { - std::cerr << "new AccountBlock for " << z.first.to_hex() << ": "; - block::gen::t_AccountBlock.print_ref(std::cerr, cell); - csr->print_rec(std::cerr); + FLOG(INFO) { + sb << "new AccountBlock for " << z.first.to_hex() << ": "; + block::gen::t_AccountBlock.print_ref(sb, cell); + csr->print_rec(sb); + }; } if (!block::gen::t_AccountBlock.validate_ref(100000, cell)) { - block::gen::t_AccountBlock.print_ref(std::cerr, cell); - csr->print_rec(std::cerr); + FLOG(WARNING) { + sb << "AccountBlock failed to pass automatic validation tests: "; + block::gen::t_AccountBlock.print_ref(sb, cell); + csr->print_rec(sb); + }; return fatal_error(std::string{"new AccountBlock for "} + z.first.to_hex() + " failed to pass automatic validation tests"); } if (!block::tlb::t_AccountBlock.validate_ref(100000, cell)) { - block::gen::t_AccountBlock.print_ref(std::cerr, cell); - csr->print_rec(std::cerr); + FLOG(WARNING) { + sb << "AccountBlock failed to pass handwritten validation tests: "; + block::gen::t_AccountBlock.print_ref(sb, cell); + csr->print_rec(sb); + }; return fatal_error(std::string{"new AccountBlock for "} + z.first.to_hex() + " failed to pass handwritten validation tests"); } @@ -2561,8 +2564,10 @@ bool Collator::combine_account_transactions() { } else if (acc.status == block::Account::acc_nonexist) { // account deleted if (verbosity > 2) { - std::cerr << "deleting account " << acc.addr.to_hex() << " with empty new value "; - block::gen::t_Account.print_ref(std::cerr, acc.total_state); + FLOG(INFO) { + sb << "deleting account " << acc.addr.to_hex() << " with empty new value "; + block::gen::t_Account.print_ref(sb, acc.total_state); + }; } if (account_dict->lookup_delete(acc.addr).is_null()) { return fatal_error(std::string{"cannot delete account "} + acc.addr.to_hex() + " from ShardAccounts"); @@ -2570,8 +2575,10 @@ bool Collator::combine_account_transactions() { } else { // existing account modified if (verbosity > 4) { - std::cerr << "modifying account " << acc.addr.to_hex() << " to "; - block::gen::t_Account.print_ref(std::cerr, acc.total_state); + FLOG(INFO) { + sb << "modifying account " << acc.addr.to_hex() << " to "; + block::gen::t_Account.print_ref(sb, acc.total_state); + }; } if (!(cb.store_ref_bool(acc.total_state) // account_descr$_ account:^Account && cb.store_bits_bool(acc.last_trans_hash_) // last_trans_hash:bits256 @@ -2594,9 +2601,11 @@ bool Collator::combine_account_transactions() { return fatal_error("cannot serialize ShardAccountBlocks"); } if (verbosity > 2) { - std::cerr << "new ShardAccountBlocks: "; - block::gen::t_ShardAccountBlocks.print_ref(std::cerr, shard_account_blocks_); - vm::load_cell_slice(shard_account_blocks_).print_rec(std::cerr); + FLOG(INFO) { + sb << "new ShardAccountBlocks: "; + block::gen::t_ShardAccountBlocks.print_ref(sb, shard_account_blocks_); + vm::load_cell_slice(shard_account_blocks_).print_rec(sb); + }; } if (!block::gen::t_ShardAccountBlocks.validate_ref(100000, shard_account_blocks_)) { return fatal_error("new ShardAccountBlocks failed to pass automatic validity tests"); @@ -2606,9 +2615,11 @@ bool Collator::combine_account_transactions() { } auto shard_accounts = account_dict->get_root(); if (verbosity > 2) { - std::cerr << "new ShardAccounts: "; - block::gen::t_ShardAccounts.print(std::cerr, *shard_accounts); - shard_accounts->print_rec(std::cerr); + FLOG(INFO) { + sb << "new ShardAccounts: "; + block::gen::t_ShardAccounts.print(sb, shard_accounts); + shard_accounts->print_rec(sb); + }; } if (verify >= 2) { LOG(INFO) << "verifying new ShardAccounts"; @@ -2659,7 +2670,9 @@ bool Collator::create_special_transaction(block::CurrencyCollection amount, Ref< addr.to_hex()); } if (verbosity >= 4) { - block::gen::t_Message_Any.print_ref(std::cerr, msg); + FLOG(INFO) { + block::gen::t_Message_Any.print_ref(sb, msg); + }; } CHECK(block::gen::t_Message_Any.validate_ref(msg)); CHECK(block::tlb::t_Message.validate_ref(msg)); @@ -2734,7 +2747,7 @@ bool Collator::create_ticktock_transaction(const ton::StdSmcAddress& smc_addr, t return fatal_error(td::Status::Error( -666, std::string{"cannot create action phase of a new transaction for smart contract "} + smc_addr.to_hex())); } - if (!trans->serialize()) { + if (!trans->serialize(serialize_cfg_)) { return fatal_error(td::Status::Error( -666, std::string{"cannot serialize new transaction for smart contract "} + smc_addr.to_hex())); } @@ -2818,7 +2831,7 @@ Ref Collator::create_ordinary_transaction(Ref msg_root, after_lt = std::max(after_lt, it->second); } auto res = impl_create_ordinary_transaction(msg_root, acc, now_, start_lt, &storage_phase_cfg_, &compute_phase_cfg_, - &action_phase_cfg_, external, after_lt); + &action_phase_cfg_, &serialize_cfg_, external, after_lt); if (res.is_error()) { auto error = res.move_as_error(); if (error.code() == -701) { @@ -2869,6 +2882,7 @@ Ref Collator::create_ordinary_transaction(Ref msg_root, * @param storage_phase_cfg The configuration for the storage phase of the transaction. * @param compute_phase_cfg The configuration for the compute phase of the transaction. * @param action_phase_cfg The configuration for the action phase of the transaction. + * @param serialize_cfg The configuration for the serialization of the transaction. * @param external Flag indicating if the message is external. * @param after_lt The logical time after which the transaction should occur. Used only for external messages. * @@ -2882,6 +2896,7 @@ td::Result> Collator::impl_crea block::StoragePhaseConfig* storage_phase_cfg, block::ComputePhaseConfig* compute_phase_cfg, block::ActionPhaseConfig* action_phase_cfg, + block::SerializeConfig* serialize_cfg, bool external, LogicalTime after_lt) { if (acc->last_trans_end_lt_ >= lt && acc->transactions.empty()) { return td::Status::Error(-669, PSTRING() << "last transaction time in the state of account " << acc->workchain @@ -2949,7 +2964,7 @@ td::Result> Collator::impl_crea return td::Status::Error( -669, "cannot create bounce phase of a new transaction for smart contract "s + acc->addr.to_hex()); } - if (!trans->serialize()) { + if (!trans->serialize(*serialize_cfg)) { return td::Status::Error(-669, "cannot serialize new transaction for smart contract "s + acc->addr.to_hex()); } return std::move(trans); @@ -3163,8 +3178,10 @@ int Collator::process_one_new_message(block::NewOutMsg msg, bool enqueue_only, R Ref msg_env; CHECK(block::tlb::pack_cell(msg_env, msg_env_rec)); if (verbosity > 2) { - std::cerr << "new (processed outbound) message envelope: "; - block::gen::t_MsgEnvelope.print_ref(std::cerr, msg_env); + FLOG(INFO) { + sb << "new (processed outbound) message envelope: "; + block::gen::t_MsgEnvelope.print_ref(sb, msg_env); + }; } // 3. create InMsg, referring to this MsgEnvelope and this Transaction vm::CellBuilder cb; @@ -3286,16 +3303,20 @@ bool Collator::enqueue_transit_message(Ref msg, Ref old_msg_ Ref out_msg = cb.finalize(); // 4.1. insert OutMsg into OutMsgDescr if (verbosity > 2) { - std::cerr << "OutMsg for a transit message: "; - block::gen::t_OutMsg.print_ref(std::cerr, out_msg); + FLOG(INFO) { + sb << "OutMsg for a transit message: "; + block::gen::t_OutMsg.print_ref(sb, out_msg); + }; } if (!insert_out_msg(out_msg)) { return fatal_error("cannot insert a new OutMsg into OutMsgDescr"); } // 4.2. insert InMsg into InMsgDescr if (verbosity > 2) { - std::cerr << "InMsg for a transit message: "; - block::gen::t_InMsg.print_ref(std::cerr, in_msg); + FLOG(INFO) { + sb << "InMsg for a transit message: "; + block::gen::t_InMsg.print_ref(sb, in_msg); + }; } if (!insert_in_msg(in_msg)) { return fatal_error("cannot insert a new InMsg into InMsgDescr"); @@ -3366,7 +3387,10 @@ bool Collator::process_inbound_message(Ref enq_msg, ton::LogicalT if (enq_msg.is_null() || enq_msg->size_ext() != 0x10040 || (enqueued_lt = enq_msg->prefetch_ulong(64)) < /* 0 */ 1 * lt) { // DEBUG if (enq_msg.not_null()) { - block::gen::t_EnqueuedMsg.print(std::cerr, *enq_msg); + FLOG(WARNING) { + sb << "inbound internal message is not a valid EnqueuedMsg: "; + block::gen::t_EnqueuedMsg.print(sb, enq_msg); + }; } LOG(ERROR) << "inbound internal message is not a valid EnqueuedMsg (created lt " << lt << ", enqueued " << enqueued_lt << ")"; @@ -3590,14 +3614,18 @@ bool Collator::process_inbound_internal_messages() { LOG(DEBUG) << "processing inbound message with (lt,hash)=(" << kv->lt << "," << kv->key.to_hex() << ") from neighbor #" << kv->source; if (verbosity > 2) { - std::cerr << "inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() << " msg="; - block::gen::t_EnqueuedMsg.print(std::cerr, *(kv->msg)); + FLOG(INFO) { + sb << "inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() << " msg="; + block::gen::t_EnqueuedMsg.print(sb, kv->msg); + }; } if (!process_inbound_message(kv->msg, kv->lt, kv->key.cbits(), neighbors_.at(kv->source))) { if (verbosity > 1) { - std::cerr << "invalid inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() - << " msg="; - block::gen::t_EnqueuedMsg.print(std::cerr, *(kv->msg)); + FLOG(INFO) { + sb << "invalid inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() + << " msg="; + block::gen::t_EnqueuedMsg.print(sb, kv->msg); + }; } return fatal_error("error processing inbound internal message"); } @@ -3884,7 +3912,10 @@ bool Collator::process_deferred_message(Ref enq_msg, StdSmcAddres LogicalTime enqueued_lt = 0; if (enq_msg.is_null() || enq_msg->size_ext() != 0x10040 || (enqueued_lt = enq_msg->prefetch_ulong(64)) != lt) { if (enq_msg.not_null()) { - block::gen::t_EnqueuedMsg.print(std::cerr, *enq_msg); + FLOG(WARNING) { + sb << "internal message in DispatchQueue is not a valid EnqueuedMsg: "; + block::gen::t_EnqueuedMsg.print(sb, enq_msg); + }; } LOG(ERROR) << "internal message in DispatchQueue is not a valid EnqueuedMsg (created lt " << lt << ", enqueued " << enqueued_lt << ")"; @@ -3986,8 +4017,10 @@ bool Collator::process_deferred_message(Ref enq_msg, StdSmcAddres */ bool Collator::insert_in_msg(Ref in_msg) { if (verbosity > 2) { - std::cerr << "InMsg being inserted into InMsgDescr: "; - block::gen::t_InMsg.print_ref(std::cerr, in_msg); + FLOG(INFO) { + sb << "InMsg being inserted into InMsgDescr: "; + block::gen::t_InMsg.print_ref(sb, in_msg); + }; } auto cs = load_cell_slice(in_msg); if (!cs.size_refs()) { @@ -4028,8 +4061,10 @@ bool Collator::insert_in_msg(Ref in_msg) { */ bool Collator::insert_out_msg(Ref out_msg) { if (verbosity > 2) { - std::cerr << "OutMsg being inserted into OutMsgDescr: "; - block::gen::t_OutMsg.print_ref(std::cerr, out_msg); + FLOG(INFO) { + sb << "OutMsg being inserted into OutMsgDescr: "; + block::gen::t_OutMsg.print_ref(sb, out_msg); + }; } auto cs = load_cell_slice(out_msg); if (!cs.size_refs()) { @@ -4125,8 +4160,10 @@ bool Collator::enqueue_message(block::NewOutMsg msg, td::RefInt256 fwd_fees_rema } // 4. insert OutMsg into OutMsgDescr if (verbosity > 2) { - std::cerr << "OutMsg for a newly-generated message: "; - block::gen::t_OutMsg.print_ref(std::cerr, out_msg); + FLOG(INFO) { + sb << "OutMsg for a newly-generated message: "; + block::gen::t_OutMsg.print_ref(sb, out_msg); + }; } if (!insert_out_msg(out_msg)) { return fatal_error("cannot insert a new OutMsg into OutMsgDescr"); @@ -4419,9 +4456,12 @@ bool Collator::create_mc_state_extra() { bool ignore_cfg_changes = false; Ref cfg0; if (!block::valid_config_data(cfg_smc_config, config_addr, true, true, old_mparams_)) { - block::gen::t_Hashmap_32_Ref_Cell.print_ref(std::cerr, cfg_smc_config); LOG(ERROR) << "configuration smart contract "s + config_addr.to_hex() + " contains an invalid configuration in its data, IGNORING CHANGES"; + FLOG(WARNING) { + sb << "ignored configuration: "; + block::gen::t_Hashmap_32_Ref_Cell.print_ref(sb, cfg_smc_config); + }; ignore_cfg_changes = true; } else { cfg0 = cfg_dict.lookup_ref(td::BitArray<32>{(long long)0}); @@ -4459,34 +4499,26 @@ bool Collator::create_mc_state_extra() { return fatal_error(wset_res.move_as_error()); } bool update_shard_cc = is_key_block_ || (now_ / ccvc.shard_cc_lifetime > prev_now_ / ccvc.shard_cc_lifetime); - // temp debug - if (verbosity >= 3 * 1) { - auto csr = shard_conf_->get_root_csr(); - LOG(INFO) << "new shard configuration before post-processing is"; - std::ostringstream os; - csr->print_rec(os); - block::gen::t_ShardHashes.print(os, csr.write()); - LOG(INFO) << os.str(); - } - // end (temp debug) if (!update_shard_config(wset_res.move_as_ok(), ccvc, update_shard_cc)) { auto csr = shard_conf_->get_root_csr(); if (csr.is_null()) { LOG(WARNING) << "new shard configuration is null (!)"; } else { LOG(WARNING) << "invalid new shard configuration is"; - std::ostringstream os; - csr->print_rec(os); - block::gen::t_ShardHashes.print(os, csr.write()); - LOG(WARNING) << os.str(); + FLOG(WARNING) { + csr->print_rec(sb); + block::gen::t_ShardHashes.print(sb, csr); + }; } return fatal_error("cannot post-process shard configuration"); } // 3. save new shard_hashes state_extra.shard_hashes = shard_conf_->get_root_csr(); - if (verbosity >= 3 * 0) { // DEBUG - std::cerr << "updated shard configuration to "; - block::gen::t_ShardHashes.print(std::cerr, *state_extra.shard_hashes); + if (verbosity >= 3) { + FLOG(INFO) { + sb << "updated shard configuration to "; + block::gen::t_ShardHashes.print(sb, state_extra.shard_hashes); + }; } if (!block::gen::t_ShardHashes.validate_upto(10000, *state_extra.shard_hashes)) { return fatal_error("new ShardHashes is invalid"); @@ -4587,13 +4619,18 @@ bool Collator::create_mc_state_extra() { if (verify >= 2) { LOG(INFO) << "verifying new BlockCreateStats"; if (!block::gen::t_BlockCreateStats.validate_csr(100000, cs)) { - cs->print_rec(std::cerr); - block::gen::t_BlockCreateStats.print(std::cerr, *cs); + FLOG(WARNING) { + sb << "BlockCreateStats in the new masterchain state failed to pass automated validity checks: "; + cs->print_rec(sb); + block::gen::t_BlockCreateStats.print(sb, cs); + }; return fatal_error("BlockCreateStats in the new masterchain state failed to pass automated validity checks"); } } if (verbosity >= 4 * 1) { - block::gen::t_BlockCreateStats.print(std::cerr, *cs); + FLOG(INFO) { + block::gen::t_BlockCreateStats.print(sb, cs); + }; } } else { state_extra.r1.block_create_stats.clear(); @@ -4628,7 +4665,6 @@ bool Collator::update_block_creator_count(td::ConstBitPtr key, unsigned shard_in if (!block::unpack_CreatorStats(std::move(cs), mc_cnt, shard_cnt)) { return fatal_error("cannot unpack CreatorStats for "s + key.to_hex(256) + " from previous masterchain state"); } - // std::cerr << mc_cnt.to_str() << " " << shard_cnt.to_str() << std::endl; if (mc_incr && !mc_cnt.increase_by(mc_incr, now_)) { return fatal_error(PSTRING() << "cannot increase masterchain block counter in CreatorStats for " << key.to_hex(256) << " by " << mc_incr << " (old value is " << mc_cnt.to_str() << ")"); @@ -4999,9 +5035,11 @@ bool Collator::update_public_libraries() { } } if (libraries_changed_ && verbosity >= 2) { - std::cerr << "New public libraries: "; - block::gen::t_HashmapE_256_LibDescr.print(std::cerr, shard_libraries_->get_root()); - shard_libraries_->get_root()->print_rec(std::cerr); + FLOG(INFO) { + sb << "New public libraries: "; + block::gen::t_HashmapE_256_LibDescr.print(sb, shard_libraries_->get_root()); + shard_libraries_->get_root()->print_rec(sb); + }; } return true; } @@ -5124,9 +5162,11 @@ bool Collator::create_shard_state() { } LOG(DEBUG) << "min_ref_mc_seqno is " << min_ref_mc_seqno_; if (verbosity > 2) { - std::cerr << "new ShardState: "; - block::gen::t_ShardState.print_ref(std::cerr, state_root); - vm::load_cell_slice(state_root).print_rec(std::cerr); + FLOG(INFO) { + sb << "new ShardState: "; + block::gen::t_ShardState.print_ref(sb, state_root); + vm::load_cell_slice(state_root).print_rec(sb); + }; } if (verify >= 2) { LOG(INFO) << "verifying new ShardState"; @@ -5139,9 +5179,11 @@ bool Collator::create_shard_state() { return fatal_error("cannot create Merkle update for ShardState"); } if (verbosity > 2) { - std::cerr << "Merkle Update for ShardState: "; - vm::CellSlice cs{vm::NoVm{}, state_update}; - cs.print_rec(std::cerr); + FLOG(INFO) { + sb << "Merkle Update for ShardState: "; + vm::CellSlice cs{vm::NoVm{}, state_update}; + cs.print_rec(sb); + }; } LOG(INFO) << "updating block profile statistics"; block_limit_status_->add_proof(state_root); @@ -5186,10 +5228,12 @@ bool Collator::update_processed_upto() { */ bool Collator::compute_out_msg_queue_info(Ref& out_msg_queue_info) { if (verbosity >= 2) { - auto rt = out_msg_queue_->get_root(); - std::cerr << "resulting out_msg_queue is "; - block::gen::t_OutMsgQueue.print(std::cerr, *rt); - rt->print_rec(std::cerr); + FLOG(INFO) { + auto rt = out_msg_queue_->get_root(); + sb << "resulting out_msg_queue is "; + block::gen::t_OutMsgQueue.print(sb, rt); + rt->print_rec(sb); + }; } vm::CellBuilder cb; // out_msg_queue_extra#0 dispatch_queue:DispatchQueue out_queue_size:(Maybe uint48) = OutMsgQueueExtra; @@ -5239,8 +5283,10 @@ bool Collator::compute_total_balance() { } vm::CellSlice cs{*(in_msg_dict->get_root_extra())}; if (verbosity > 2) { - block::gen::t_ImportFees.print(std::cerr, vm::CellSlice{*(in_msg_dict->get_root_extra())}); - cs.print_rec(std::cerr); + FLOG(INFO) { + block::gen::t_ImportFees.print(sb, in_msg_dict->get_root_extra()); + cs.print_rec(sb); + }; } auto new_import_fees = block::tlb::t_Grams.as_integer_skip(cs); if (new_import_fees.is_null()) { @@ -5468,9 +5514,11 @@ bool Collator::create_block() { return fatal_error("cannot create new Block"); } if (verbosity >= 3 * 1) { - std::cerr << "new Block: "; - block::gen::t_Block.print_ref(std::cerr, new_block); - vm::load_cell_slice(new_block).print_rec(std::cerr); + FLOG(INFO) { + sb << "new Block: "; + block::gen::t_Block.print_ref(sb, new_block); + vm::load_cell_slice(new_block).print_rec(sb); + }; } if (verify >= 1) { LOG(INFO) << "verifying new Block"; @@ -5508,9 +5556,11 @@ Ref Collator::collate_shard_block_descr_set() { return {}; } if (verbosity >= 4 * 1) { - std::cerr << "serialized TopBlockDescrSet for collated data is: "; - block::gen::t_TopBlockDescrSet.print_ref(std::cerr, cell); - vm::load_cell_slice(cell).print_rec(std::cerr); + FLOG(INFO) { + sb << "serialized TopBlockDescrSet for collated data is: "; + block::gen::t_TopBlockDescrSet.print_ref(sb, cell); + vm::load_cell_slice(cell).print_rec(sb); + }; } return cell; } @@ -5717,8 +5767,10 @@ td::Result Collator::register_external_message_cell(Ref ext_msg, return td::Status::Error("inbound external message has destination address not in this shard"); } if (verbosity > 2) { - std::cerr << "registered external message: "; - block::gen::t_Message_Any.print_ref(std::cerr, ext_msg); + FLOG(INFO) { + sb << "registered external message: "; + block::gen::t_Message_Any.print_ref(sb, ext_msg); + }; } ext_msg_map.emplace(hash, 1); ext_msg_list_.push_back({std::move(ext_msg), ext_hash, priority}); diff --git a/validator/impl/external-message.cpp b/validator/impl/external-message.cpp index 2fdb491b..8b1f5eb7 100644 --- a/validator/impl/external-message.cpp +++ b/validator/impl/external-message.cpp @@ -136,13 +136,12 @@ td::Status ExtMessageQ::run_message_on_account(ton::WorkchainId wc, td::BitArray<256> rand_seed_; block::ComputePhaseConfig compute_phase_cfg_; block::ActionPhaseConfig action_phase_cfg_; + block::SerializeConfig serialize_config_; td::RefInt256 masterchain_create_fee, basechain_create_fee; - auto fetch_res = block::FetchConfigParams::fetch_config_params(*config, &old_mparams, - &storage_prices_, &storage_phase_cfg_, - &rand_seed_, &compute_phase_cfg_, - &action_phase_cfg_, &masterchain_create_fee, - &basechain_create_fee, wc, utime); + auto fetch_res = block::FetchConfigParams::fetch_config_params( + *config, &old_mparams, &storage_prices_, &storage_phase_cfg_, &rand_seed_, &compute_phase_cfg_, + &action_phase_cfg_, &serialize_config_, &masterchain_create_fee, &basechain_create_fee, wc, utime); if(fetch_res.is_error()) { auto error = fetch_res.move_as_error(); LOG(DEBUG) << "Cannot fetch config params: " << error.message(); @@ -152,10 +151,9 @@ td::Status ExtMessageQ::run_message_on_account(ton::WorkchainId wc, compute_phase_cfg_.with_vm_log = true; compute_phase_cfg_.stop_on_accept_message = true; - auto res = Collator::impl_create_ordinary_transaction(msg_root, acc, utime, lt, - &storage_phase_cfg_, &compute_phase_cfg_, - &action_phase_cfg_, - true, lt); + auto res = + Collator::impl_create_ordinary_transaction(msg_root, acc, utime, lt, &storage_phase_cfg_, &compute_phase_cfg_, + &action_phase_cfg_, &serialize_config_, true, lt); if(res.is_error()) { auto error = res.move_as_error(); LOG(DEBUG) << "Cannot run message on account: " << error.message(); diff --git a/validator/impl/liteserver.cpp b/validator/impl/liteserver.cpp index 50938cd5..06f40b8f 100644 --- a/validator/impl/liteserver.cpp +++ b/validator/impl/liteserver.cpp @@ -85,19 +85,13 @@ void LiteQuery::abort_query(td::Status reason) { if (acc_state_promise_) { acc_state_promise_.set_error(std::move(reason)); } else if (promise_) { + td::actor::send_closure(manager_, &ValidatorManager::add_lite_query_stats, query_obj_ ? query_obj_->get_id() : 0, + false); promise_.set_error(std::move(reason)); } stop(); } -void LiteQuery::abort_query_ext(td::Status reason, std::string comment) { - LOG(INFO) << "aborted liteserver query: " << comment << " : " << reason.to_string(); - if (promise_) { - promise_.set_error(reason.move_as_error_prefix(comment + " : ")); - } - stop(); -} - bool LiteQuery::fatal_error(td::Status error) { abort_query(std::move(error)); return false; @@ -120,6 +114,8 @@ bool LiteQuery::finish_query(td::BufferSlice result, bool skip_cache_update) { td::actor::send_closure(cache_, &LiteServerCache::update, cache_key_, result.clone()); } if (promise_) { + td::actor::send_closure(manager_, &ValidatorManager::add_lite_query_stats, query_obj_ ? query_obj_->get_id() : 0, + true); promise_.set_result(std::move(result)); stop(); return true; @@ -139,7 +135,6 @@ void LiteQuery::start_up() { auto F = fetch_tl_object(query_, true); if (F.is_error()) { - td::actor::send_closure(manager_, &ValidatorManager::add_lite_query_stats, 0); // unknown abort_query(F.move_as_error()); return; } @@ -192,7 +187,6 @@ bool LiteQuery::use_cache() { } void LiteQuery::perform() { - td::actor::send_closure(manager_, &ValidatorManager::add_lite_query_stats, query_obj_->get_id()); lite_api::downcast_call( *query_obj_, td::overloaded( diff --git a/validator/impl/liteserver.hpp b/validator/impl/liteserver.hpp index 447e1dad..fc873533 100644 --- a/validator/impl/liteserver.hpp +++ b/validator/impl/liteserver.hpp @@ -97,7 +97,6 @@ class LiteQuery : public td::actor::Actor { bool fatal_error(std::string err_msg, int err_code = -400); bool fatal_error(int err_code, std::string err_msg = ""); void abort_query(td::Status reason); - void abort_query_ext(td::Status reason, std::string err_msg); bool finish_query(td::BufferSlice result, bool skip_cache_update = false); void alarm() override; void start_up() override; diff --git a/validator/impl/signature-set.cpp b/validator/impl/signature-set.cpp index c7298216..0078a115 100644 --- a/validator/impl/signature-set.cpp +++ b/validator/impl/signature-set.cpp @@ -42,9 +42,6 @@ td::BufferSlice BlockSignatureSetQ::serialize() const { } Ref root; CHECK(serialize_to(root)); - //std::cerr << "serializing BlockSignatureSet: "; - //vm::CellSlice{vm::NoVm{}, root}.print_rec(std::cerr); - //std::cerr << std::endl; auto res = vm::std_boc_serialize(std::move(root)); LOG_CHECK(res.is_ok()) << res.move_as_error(); return res.move_as_ok(); diff --git a/validator/impl/top-shard-descr.cpp b/validator/impl/top-shard-descr.cpp index 8ff8862d..9eadeef3 100644 --- a/validator/impl/top-shard-descr.cpp +++ b/validator/impl/top-shard-descr.cpp @@ -175,9 +175,11 @@ td::Status ShardTopBlockDescrQ::unpack() { block::gen::TopBlockDescr::Record rec; if (!(block::gen::t_TopBlockDescr.force_validate_ref(root_) && tlb::unpack_cell(root_, rec) && block::tlb::t_BlockIdExt.unpack(rec.proof_for.write(), block_id_))) { - std::cerr << "invalid ShardTopBlockDescr: "; - block::gen::t_TopBlockDescr.print_ref(std::cerr, root_); - vm::load_cell_slice(root_).print_rec(std::cerr); + FLOG(INFO) { + sb << "invalid ShardTopBlockDescr: "; + block::gen::t_TopBlockDescr.print_ref(sb, root_); + vm::load_cell_slice(root_).print_rec(sb); + }; return td::Status::Error(-666, "Shard top block description is not a valid TopBlockDescr TL-B object"); } LOG(DEBUG) << "unpacking a ShardTopBlockDescr for " << block_id_.to_str() << " with " << rec.len << " links"; diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index 9e4d406e..90966d82 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -115,7 +115,7 @@ bool ValidateQuery::reject_query(std::string error, td::BufferSlice reason) { error = error_ctx() + error; LOG(ERROR) << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error; if (main_promise) { - record_stats(); + record_stats(false); errorlog::ErrorLog::log(PSTRING() << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error << ": data=" << block_candidate.id.file_hash.to_hex() << " collated_data=" << block_candidate.collated_file_hash.to_hex()); @@ -153,7 +153,7 @@ bool ValidateQuery::soft_reject_query(std::string error, td::BufferSlice reason) error = error_ctx() + error; LOG(ERROR) << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error; if (main_promise) { - record_stats(); + record_stats(false); errorlog::ErrorLog::log(PSTRING() << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error << ": data=" << block_candidate.id.file_hash.to_hex() << " collated_data=" << block_candidate.collated_file_hash.to_hex()); @@ -176,7 +176,7 @@ bool ValidateQuery::fatal_error(td::Status error) { error.ensure_error(); LOG(ERROR) << "aborting validation of block candidate for " << shard_.to_str() << " : " << error.to_string(); if (main_promise) { - record_stats(); + record_stats(false); auto c = error.code(); if (c <= -667 && c >= -670) { errorlog::ErrorLog::log(PSTRING() << "FATAL ERROR: aborting validation of block candidate for " << shard_.to_str() @@ -234,7 +234,7 @@ bool ValidateQuery::fatal_error(std::string err_msg, int err_code) { */ void ValidateQuery::finish_query() { if (main_promise) { - record_stats(); + record_stats(true); LOG(WARNING) << "validate query done"; main_promise.set_result(now_); } @@ -1004,6 +1004,10 @@ bool ValidateQuery::fetch_config_params() { action_phase_cfg_.disable_custom_fess = config_->get_global_version() >= 8; action_phase_cfg_.reserve_extra_enabled = config_->get_global_version() >= 9; action_phase_cfg_.mc_blackhole_addr = config_->get_burning_config().blackhole_addr; + action_phase_cfg_.extra_currency_v2 = config_->get_global_version() >= 10; + } + { + serialize_cfg_.extra_currency_v2 = config_->get_global_version() >= 10; } { // fetch block_grams_created @@ -1553,8 +1557,10 @@ void ValidateQuery::got_neighbor_out_queue(int i, td::Result> // unpack ProcessedUpto LOG(DEBUG) << "unpacking ProcessedUpto of neighbor " << descr.blk_.to_str(); if (verbosity >= 2) { - block::gen::t_ProcessedInfo.print(std::cerr, qinfo.proc_info); - qinfo.proc_info->print_rec(std::cerr); + FLOG(INFO) { + block::gen::t_ProcessedInfo.print(sb, qinfo.proc_info); + qinfo.proc_info->print_rec(sb); + }; } descr.processed_upto = block::MsgProcessedUptoCollection::unpack(descr.shard(), qinfo.proc_info); if (!descr.processed_upto) { @@ -2656,7 +2662,6 @@ bool ValidateQuery::unpack_precheck_value_flow(Ref value_flow_root) { " but the sum over all accounts present in the new state is " + cc.to_str()); } auto msg_extra = in_msg_dict_->get_root_extra(); - // block::gen::t_ImportFees.print(std::cerr, msg_extra); if (!(block::tlb::t_Grams.as_integer_skip_to(msg_extra.write(), import_fees_) && cc.unpack(std::move(msg_extra)))) { return reject_query("cannot unpack ImportFees from the augmentation of the InMsgDescr dictionary"); } @@ -2760,20 +2765,22 @@ bool ValidateQuery::precheck_one_account_update(td::ConstBitPtr acc_id, Reflookup(acc_id, 256); if (acc_blk_root.is_null()) { if (verbosity >= 3 * 0) { - std::cerr << "state of account " << workchain() << ":" << acc_id.to_hex(256) - << " in the old shardchain state:" << std::endl; - if (old_value.not_null()) { - block::gen::t_ShardAccount.print(std::cerr, *old_value); - } else { - std::cerr << "" << std::endl; - } - std::cerr << "state of account " << workchain() << ":" << acc_id.to_hex(256) - << " in the new shardchain state:" << std::endl; - if (new_value.not_null()) { - block::gen::t_ShardAccount.print(std::cerr, *new_value); - } else { - std::cerr << "" << std::endl; - } + FLOG(INFO) { + sb << "state of account " << workchain() << ":" << acc_id.to_hex(256) + << " in the old shardchain state:" << "\n"; + if (old_value.not_null()) { + block::gen::t_ShardAccount.print(sb, old_value); + } else { + sb << "" << "\n"; + } + sb << "state of account " << workchain() << ":" << acc_id.to_hex(256) + << " in the new shardchain state:" << "\n"; + if (new_value.not_null()) { + block::gen::t_ShardAccount.print(sb, new_value); + } else { + sb << "" << "\n"; + } + }; } return reject_query("the state of account "s + acc_id.to_hex(256) + " changed in the new state with respect to the old state, but the block contains no " @@ -2931,8 +2938,6 @@ bool ValidateQuery::precheck_one_account_block(td::ConstBitPtr acc_id, Refprint_rec(std::cerr); - // block::gen::t_AccountBlock.print(std::cerr, acc_blk_root); block::gen::AccountBlock::Record acc_blk; block::gen::HASH_UPDATE::Record hash_upd; if (!(tlb::csr_unpack(acc_blk_root, acc_blk) && @@ -3860,7 +3865,9 @@ bool ValidateQuery::check_in_msg(td::ConstBitPtr key, Ref in_msg) ton::LogicalTime trans_lt; CHECK(block::get_transaction_id(transaction, trans_addr, trans_lt)); if (dest_addr != trans_addr) { - block::gen::t_InMsg.print(std::cerr, *in_msg); + FLOG(INFO) { + block::gen::t_InMsg.print(sb, in_msg); + }; return reject_query(PSTRING() << "InMsg corresponding to inbound message with hash " << key.to_hex(256) << " and destination address " << dest_addr.to_hex() << " claims that the message is processed by transaction " << trans_lt @@ -4408,7 +4415,9 @@ bool ValidateQuery::check_out_msg(td::ConstBitPtr key, Ref out_ms ton::LogicalTime trans_lt; CHECK(block::get_transaction_id(transaction, trans_addr, trans_lt)); if (src_addr != trans_addr) { - block::gen::t_OutMsg.print(std::cerr, *out_msg); + FLOG(INFO) { + block::gen::t_OutMsg.print(sb, out_msg); + }; return reject_query(PSTRING() << "OutMsg corresponding to outbound message with hash " << key.to_hex(256) << " and source address " << src_addr.to_hex() << " claims that the message was created by transaction " << trans_lt @@ -5022,15 +5031,19 @@ bool ValidateQuery::check_in_queue() { LOG(DEBUG) << "processing inbound message with (lt,hash)=(" << kv->lt << "," << kv->key.to_hex() << ") from neighbor #" << kv->source; if (verbosity > 3) { - std::cerr << "inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() << " msg="; - block::gen::t_EnqueuedMsg.print(std::cerr, *(kv->msg)); + FLOG(INFO) { + sb << "inbound message: lt=" << kv->lt << " from=" << kv->source << " key=" << kv->key.to_hex() << " msg="; + block::gen::t_EnqueuedMsg.print(sb, kv->msg); + }; } bool unprocessed = false; if (!check_neighbor_outbound_message(kv->msg, kv->lt, kv->key.cbits(), neighbors_.at(kv->source), unprocessed)) { if (verbosity > 1) { - std::cerr << "invalid neighbor outbound message: lt=" << kv->lt << " from=" << kv->source - << " key=" << kv->key.to_hex() << " msg="; - block::gen::t_EnqueuedMsg.print(std::cerr, *(kv->msg)); + FLOG(INFO) { + sb << "invalid neighbor outbound message: lt=" << kv->lt << " from=" << kv->source + << " key=" << kv->key.to_hex() << " msg="; + block::gen::t_EnqueuedMsg.print(sb, kv->msg); + }; } return reject_query("error processing outbound internal message "s + kv->key.to_hex() + " of neighbor " + neighbors_.at(kv->source).blk_.to_str()); @@ -5599,7 +5612,7 @@ bool ValidateQuery::check_one_transaction(block::Account& account, ton::LogicalT return reject_query(PSTRING() << "cannot re-create bounce phase of transaction " << lt << " for smart contract " << addr.to_hex()); } - if (!trs->serialize()) { + if (!trs->serialize(serialize_cfg_)) { return reject_query(PSTRING() << "cannot re-create the serialization of transaction " << lt << " for smart contract " << addr.to_hex()); } @@ -5636,10 +5649,12 @@ bool ValidateQuery::check_one_transaction(block::Account& account, ton::LogicalT // now compare the re-created transaction with the one we have if (trans_root2->get_hash() != trans_root->get_hash()) { if (verbosity >= 3 * 0) { - std::cerr << "original transaction " << lt << " of " << addr.to_hex() << ": "; - block::gen::t_Transaction.print_ref(std::cerr, trans_root); - std::cerr << "re-created transaction " << lt << " of " << addr.to_hex() << ": "; - block::gen::t_Transaction.print_ref(std::cerr, trans_root2); + FLOG(INFO) { + sb << "original transaction " << lt << " of " << addr.to_hex() << ": "; + block::gen::t_Transaction.print_ref(sb, trans_root); + sb << "re-created transaction " << lt << " of " << addr.to_hex() << ": "; + block::gen::t_Transaction.print_ref(sb, trans_root2); + }; } return reject_query(PSTRING() << "the transaction " << lt << " of " << addr.to_hex() << " has hash " << trans_root->get_hash().to_hex() @@ -6917,13 +6932,13 @@ void ValidateQuery::written_candidate() { /** * Sends validation work time to manager. */ -void ValidateQuery::record_stats() { +void ValidateQuery::record_stats(bool success) { double work_time = work_timer_.elapsed(); double cpu_work_time = cpu_work_timer_.elapsed(); LOG(WARNING) << "validation took " << perf_timer_.elapsed() << "s"; LOG(WARNING) << "Validate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s"; td::actor::send_closure(manager, &ValidatorManager::record_validate_query_stats, block_candidate.id, work_time, - cpu_work_time); + cpu_work_time, success); } } // namespace validator diff --git a/validator/impl/validate-query.hpp b/validator/impl/validate-query.hpp index 98cd2493..60f0cc8a 100644 --- a/validator/impl/validate-query.hpp +++ b/validator/impl/validate-query.hpp @@ -205,6 +205,7 @@ class ValidateQuery : public td::actor::Actor { block::StoragePhaseConfig storage_phase_cfg_{&storage_prices_}; block::ComputePhaseConfig compute_phase_cfg_; block::ActionPhaseConfig action_phase_cfg_; + block::SerializeConfig serialize_cfg_; td::RefInt256 masterchain_create_fee_, basechain_create_fee_; std::vector neighbors_; @@ -400,7 +401,7 @@ class ValidateQuery : public td::actor::Actor { td::Timer work_timer_{true}; td::ThreadCpuTimer cpu_work_timer_{true}; - void record_stats(); + void record_stats(bool success); }; } // namespace validator diff --git a/validator/interfaces/validator-manager.h b/validator/interfaces/validator-manager.h index 20d4bd62..00fb77e1 100644 --- a/validator/interfaces/validator-manager.h +++ b/validator/interfaces/validator-manager.h @@ -205,13 +205,13 @@ class ValidatorManager : public ValidatorManagerInterface { td::optional shard, td::Promise> promise) = 0; - virtual void add_lite_query_stats(int lite_query_id) { + virtual void add_lite_query_stats(int lite_query_id, bool success) { } virtual void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, - CollationStats stats) { + td::optional stats) { } - virtual void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { + virtual void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, bool success) { } virtual void add_persistent_state_description(td::Ref desc) = 0; diff --git a/validator/manager-init.cpp b/validator/manager-init.cpp index c2944b25..6f304680 100644 --- a/validator/manager-init.cpp +++ b/validator/manager-init.cpp @@ -32,6 +32,8 @@ namespace ton { namespace validator { void ValidatorManagerMasterchainReiniter::start_up() { + status_ = ProcessStatus(manager_, "process.initial_sync"); + status_.set_status(PSTRING() << "starting, init block seqno " << block_id_.seqno()); LOG(INFO) << "init_block_id=" << block_id_; CHECK(block_id_.is_masterchain()); CHECK(block_id_.id.shard == shardIdAll); @@ -58,6 +60,7 @@ void ValidatorManagerMasterchainReiniter::got_masterchain_handle(BlockHandle han key_blocks_.push_back(handle_); if (opts_->initial_sync_disabled()) { + status_.set_status(PSTRING() << "downloading masterchain state " << handle_->id().seqno()); auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { R.ensure(); td::actor::send_closure(SelfId, &ValidatorManagerMasterchainReiniter::download_masterchain_state); @@ -181,6 +184,7 @@ void ValidatorManagerMasterchainReiniter::got_next_key_blocks(std::vector(key_blocks_.size()); key_blocks_.resize(key_blocks_.size() + vec.size(), nullptr); @@ -247,6 +251,7 @@ void ValidatorManagerMasterchainReiniter::choose_masterchain_state() { } void ValidatorManagerMasterchainReiniter::download_masterchain_state() { + status_.set_status(PSTRING() << "downloading masterchain state " << block_id_.seqno()); auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { if (R.is_error()) { LOG(WARNING) << "failed to download masterchain state: " << R.move_as_error(); @@ -274,6 +279,7 @@ void ValidatorManagerMasterchainReiniter::downloaded_masterchain_state(td::Ref("shardclient", opts_, handle_, state_, manager_, std::move(P)); + status_.set_status(PSTRING() << "downloading all shard states, mc seqno " << block_id_.seqno()); } void ValidatorManagerMasterchainReiniter::downloaded_all_shards() { diff --git a/validator/manager-init.hpp b/validator/manager-init.hpp index 7dce4e47..901b826b 100644 --- a/validator/manager-init.hpp +++ b/validator/manager-init.hpp @@ -27,6 +27,8 @@ #include "manager-init.h" +#include + namespace ton { namespace validator { @@ -77,6 +79,8 @@ class ValidatorManagerMasterchainReiniter : public td::actor::Actor { td::uint32 pending_ = 0; td::actor::ActorOwn client_; + + ProcessStatus status_; }; class ValidatorManagerMasterchainStarter : public td::actor::Actor { diff --git a/validator/manager.cpp b/validator/manager.cpp index 068ea5eb..b0ac5409 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -430,6 +430,10 @@ void ValidatorManagerImpl::add_external_message(td::Ref msg, int pri ext_messages_hashes_[id.hash] = {priority, id}; } void ValidatorManagerImpl::check_external_message(td::BufferSlice data, td::Promise> promise) { + if (!started_) { + promise.set_error(td::Status::Error(ErrorCode::notready, "node not synced")); + return; + } auto state = do_get_last_liteserver_state(); if (state.is_null()) { promise.set_error(td::Status::Error(ErrorCode::notready, "not ready")); @@ -451,11 +455,9 @@ void ValidatorManagerImpl::check_external_message(td::BufferSlice data, td::Prom promise = [self = this, wc, addr, promise = std::move(promise), SelfId = actor_id(this)](td::Result> R) mutable { - if (R.is_error()) { - promise.set_error(R.move_as_error()); - return; - } - td::actor::send_lambda(SelfId, [=, promise = std::move(promise), message = R.move_as_ok()]() mutable { + td::actor::send_lambda(SelfId, [=, promise = std::move(promise), R = std::move(R)]() mutable { + ++(R.is_ok() ? self->total_check_ext_messages_ok_ : self->total_check_ext_messages_error_); + TRY_RESULT_PROMISE(promise, message, std::move(R)); if (self->checked_ext_msg_counter_.inc_msg_count(wc, addr) > max_ext_msg_per_addr()) { promise.set_error( td::Status::Error(PSTRING() << "too many external messages to address " << wc << ":" << addr.to_hex())); @@ -698,11 +700,10 @@ void ValidatorManagerImpl::wait_block_state(BlockHandle handle, td::uint32 prior auto P = td::PromiseCreator::lambda([SelfId = actor_id(this), handle](td::Result> R) { td::actor::send_closure(SelfId, &ValidatorManagerImpl::finished_wait_state, handle, std::move(R)); }); - auto id = - td::actor::create_actor("waitstate", handle, priority, actor_id(this), - td::Timestamp::at(timeout.at() + 10.0), std::move(P), - get_block_persistent_state(handle->id())) - .release(); + auto id = td::actor::create_actor("waitstate", handle, priority, actor_id(this), + td::Timestamp::at(timeout.at() + 10.0), std::move(P), + get_block_persistent_state_to_download(handle->id())) + .release(); wait_state_[handle->id()].actor_ = id; it = wait_state_.find(handle->id()); } @@ -1148,9 +1149,10 @@ void ValidatorManagerImpl::finished_wait_state(BlockHandle handle, td::Result> R) { td::actor::send_closure(SelfId, &ValidatorManagerImpl::finished_wait_state, handle, std::move(R)); }); - auto id = td::actor::create_actor("waitstate", handle, X.second, actor_id(this), X.first, - std::move(P), get_block_persistent_state(handle->id())) - .release(); + auto id = + td::actor::create_actor("waitstate", handle, X.second, actor_id(this), X.first, + std::move(P), get_block_persistent_state_to_download(handle->id())) + .release(); it->second.actor_ = id; return; } @@ -2131,7 +2133,7 @@ void ValidatorManagerImpl::update_shards() { } } - bool validating_masterchain = false; + active_validator_groups_master_ = active_validator_groups_shard_ = 0; if (allow_validate_) { for (auto &desc : new_shards) { auto shard = desc.first; @@ -2148,9 +2150,7 @@ void ValidatorManagerImpl::update_shards() { auto validator_id = get_validator(shard, val_set); if (!validator_id.is_zero()) { - if (shard.is_masterchain()) { - validating_masterchain = true; - } + ++(shard.is_masterchain() ? active_validator_groups_master_ : active_validator_groups_shard_); auto val_group_id = get_validator_set_id(shard, val_set, opts_hash, key_seqno, opts); if (force_recover) { @@ -2845,8 +2845,8 @@ void ValidatorManagerImpl::prepare_stats(td::Promiseid().to_str()); vec.emplace_back("rotatemasterchainblock", last_rotate_block_id_.to_str()); //vec.emplace_back("shardclientmasterchainseqno", td::to_string(min_confirmed_masterchain_seqno_)); - vec.emplace_back("stateserializermasterchainseqno", td::to_string(state_serializer_masterchain_seqno_)); } + td::NamedThreadSafeCounter::get_default().for_each([&](auto key, auto value) { vec.emplace_back("counter." + key, PSTRING() << value); }); @@ -2864,9 +2864,48 @@ void ValidatorManagerImpl::prepare_stats(td::Promiseget_state_serializer_enabled(); + if (is_validator() && last_masterchain_state_->get_global_id() == -239) { + serializer_enabled = false; + } + vec.emplace_back("stateserializerenabled", serializer_enabled ? "true" : "false"); + merger.make_promise("").set_value(std::move(vec)); + if (!serializer_.empty()) { + td::actor::send_closure(serializer_, &AsyncStateSerializer::prepare_stats, merger.make_promise("")); + } + td::actor::send_closure(db_, &Db::prepare_stats, merger.make_promise("db.")); + for (auto &[_, p] : stats_providers_) { + p.second(merger.make_promise(p.first)); + } } void ValidatorManagerImpl::prepare_perf_timer_stats(td::Promise> promise) { @@ -3336,11 +3375,18 @@ void ValidatorManagerImpl::got_persistent_state_descriptions(std::vector ValidatorManagerImpl::get_block_persistent_state(BlockIdExt block_id) { +td::Ref ValidatorManagerImpl::get_block_persistent_state_to_download(BlockIdExt block_id) { + if (block_id.is_masterchain()) { + return {}; + } auto it = persistent_state_blocks_.find(block_id); if (it == persistent_state_blocks_.end()) { return {}; } + if (it->second->masterchain_id.seqno() + 16 >= min_confirmed_masterchain_seqno_) { + // Do not download persistent states during ordinary shard client sync + return {}; + } return it->second; } @@ -3353,17 +3399,28 @@ td::actor::ActorOwn ValidatorManagerFactory::create( } void ValidatorManagerImpl::record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, - CollationStats stats) { + td::optional stats) { + if (!stats) { + ++(block_id.is_masterchain() ? total_collated_blocks_master_error_ : total_collated_blocks_shard_error_); + return; + } auto &record = new_block_stats_record(block_id); record.collator_work_time_ = work_time; record.collator_cpu_work_time_ = cpu_work_time; - record.collator_stats_ = std::move(stats); + record.collator_stats_ = std::move(stats.value()); + ++(block_id.is_masterchain() ? total_collated_blocks_master_ok_ : total_collated_blocks_shard_ok_); } -void ValidatorManagerImpl::record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { +void ValidatorManagerImpl::record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, + bool success) { auto &record = new_block_stats_record(block_id); record.validator_work_time_ = work_time; record.validator_cpu_work_time_ = cpu_work_time; + if (success) { + ++(block_id.is_masterchain() ? total_validated_blocks_master_ok_ : total_validated_blocks_shard_ok_); + } else { + ++(block_id.is_masterchain() ? total_validated_blocks_master_error_ : total_validated_blocks_shard_error_); + } } ValidatorManagerImpl::RecordedBlockStats &ValidatorManagerImpl::new_block_stats_record(BlockIdExt block_id) { @@ -3377,6 +3434,16 @@ ValidatorManagerImpl::RecordedBlockStats &ValidatorManagerImpl::new_block_stats_ return recorded_block_stats_[block_id]; } +void ValidatorManagerImpl::register_stats_provider( + td::uint64 idx, std::string prefix, + std::function>>)> callback) { + stats_providers_[idx] = {std::move(prefix), std::move(callback)}; +} + +void ValidatorManagerImpl::unregister_stats_provider(td::uint64 idx) { + stats_providers_.erase(idx); +} + size_t ValidatorManagerImpl::CheckedExtMsgCounter::get_msg_count(WorkchainId wc, StdSmcAddress addr) { before_query(); auto it1 = counter_cur_.find({wc, addr}); diff --git a/validator/manager.hpp b/validator/manager.hpp index 519cab12..418deb35 100644 --- a/validator/manager.hpp +++ b/validator/manager.hpp @@ -655,8 +655,9 @@ class ValidatorManagerImpl : public ValidatorManager { td::optional shard, td::Promise> promise) override; - void add_lite_query_stats(int lite_query_id) override { + void add_lite_query_stats(int lite_query_id, bool success) override { ++ls_stats_[lite_query_id]; + ++(success ? total_ls_queries_ok_ : total_ls_queries_error_)[lite_query_id]; } private: @@ -733,7 +734,7 @@ class ValidatorManagerImpl : public ValidatorManager { void got_persistent_state_descriptions(std::vector> descs); void add_persistent_state_description_impl(td::Ref desc); - td::Ref get_block_persistent_state(BlockIdExt block_id); + td::Ref get_block_persistent_state_to_download(BlockIdExt block_id); private: bool need_monitor(ShardIdFull shard) const { @@ -747,6 +748,16 @@ class ValidatorManagerImpl : public ValidatorManager { std::map ls_stats_; // lite_api ID -> count, 0 for unknown td::uint32 ls_stats_check_ext_messages_{0}; + UnixTime started_at_ = (UnixTime)td::Clocks::system(); + std::map total_ls_queries_ok_, total_ls_queries_error_; // lite_api ID -> count, 0 for unknown + td::uint64 total_check_ext_messages_ok_{0}, total_check_ext_messages_error_{0}; + td::uint64 total_collated_blocks_master_ok_{0}, total_collated_blocks_master_error_{0}; + td::uint64 total_validated_blocks_master_ok_{0}, total_validated_blocks_master_error_{0}; + td::uint64 total_collated_blocks_shard_ok_{0}, total_collated_blocks_shard_error_{0}; + td::uint64 total_validated_blocks_shard_ok_{0}, total_validated_blocks_shard_error_{0}; + + size_t active_validator_groups_master_{0}, active_validator_groups_shard_{0}; + td::actor::ActorOwn candidates_buffer_; struct RecordedBlockStats { @@ -760,16 +771,25 @@ class ValidatorManagerImpl : public ValidatorManager { std::queue recorded_block_stats_lru_; void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, - CollationStats stats) override; - void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) override; + td::optional stats) override; + void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, bool success) override; RecordedBlockStats &new_block_stats_record(BlockIdExt block_id); + void register_stats_provider( + td::uint64 idx, std::string prefix, + std::function>>)> callback) override; + void unregister_stats_provider(td::uint64 idx) override; + std::map> validator_telemetry_; void init_validator_telemetry(); std::map> persistent_state_descriptions_; std::map> persistent_state_blocks_; + + std::map>>)>>> + stats_providers_; }; } // namespace validator diff --git a/validator/net/download-state.cpp b/validator/net/download-state.cpp index 2b373ef3..6735a2b5 100644 --- a/validator/net/download-state.cpp +++ b/validator/net/download-state.cpp @@ -70,6 +70,7 @@ void DownloadState::finish_query() { } void DownloadState::start_up() { + status_ = ProcessStatus(validator_manager_, "process.download_state_net"); alarm_timestamp() = timeout_; td::actor::send_closure(validator_manager_, &ValidatorManagerInterface::get_persistent_state, block_id_, @@ -190,6 +191,7 @@ void DownloadState::got_block_state_description(td::BufferSlice data) { td::Timestamp::in(3.0), std::move(P)); } })); + status_.set_status(PSTRING() << block_id_.id.to_str() << " : 0 bytes, 0B/s"); } void DownloadState::got_block_state_part(td::BufferSlice data, td::uint32 requested_size) { @@ -198,14 +200,18 @@ void DownloadState::got_block_state_part(td::BufferSlice data, td::uint32 reques parts_.push_back(std::move(data)); double elapsed = prev_logged_timer_.elapsed(); - if (elapsed > 10.0) { + if (elapsed > 5.0) { prev_logged_timer_ = td::Timer(); + auto speed = (td::uint64)((double)(sum_ - prev_logged_sum_) / elapsed); LOG(WARNING) << "downloading state " << block_id_.to_str() << ": " << td::format::as_size(sum_) << " (" - << td::format::as_size((td::uint64)(double(sum_ - prev_logged_sum_) / elapsed)) << "/s)"; + << td::format::as_size(speed) << "/s)"; + status_.set_status(PSTRING() << block_id_.id.to_str() << " : " << sum_ << " bytes, " << td::format::as_size(speed) + << "/s"); prev_logged_sum_ = sum_; } if (last_part) { + status_.set_status(PSTRING() << block_id_.id.to_str() << " : " << sum_ << " bytes, finishing"); td::BufferSlice res{td::narrow_cast(sum_)}; auto S = res.as_slice(); for (auto &p : parts_) { diff --git a/validator/net/download-state.hpp b/validator/net/download-state.hpp index 19c44beb..470c5431 100644 --- a/validator/net/download-state.hpp +++ b/validator/net/download-state.hpp @@ -23,6 +23,8 @@ #include "validator/validator.h" #include "adnl/adnl-ext-client.h" +#include + namespace ton { namespace validator { @@ -75,6 +77,8 @@ class DownloadState : public td::actor::Actor { td::uint64 prev_logged_sum_ = 0; td::Timer prev_logged_timer_; + + ProcessStatus status_; }; } // namespace fullnode diff --git a/validator/state-serializer.cpp b/validator/state-serializer.cpp index b693232b..bc3d7b5e 100644 --- a/validator/state-serializer.cpp +++ b/validator/state-serializer.cpp @@ -58,6 +58,12 @@ void AsyncStateSerializer::got_self_state(AsyncSerializerState state) { }); td::actor::send_closure(manager_, &ValidatorManager::get_block_handle, last_block_id_, true, std::move(P)); } + + inited_block_id_ = true; + for (auto& promise : wait_init_block_id_) { + promise.set_value(td::Unit()); + } + wait_init_block_id_.clear(); } void AsyncStateSerializer::got_init_handle(BlockHandle handle) { @@ -186,6 +192,9 @@ void AsyncStateSerializer::next_iteration() { td::actor::send_closure(SelfId, &AsyncStateSerializer::request_previous_state_files); }, td::Timestamp::in(delay)); + current_status_ = PSTRING() << "delay before serializing seqno=" << masterchain_handle_->id().seqno() << " " + << (int)delay << "s"; + current_status_ts_ = td::Timestamp::now(); return; } if (next_idx_ < shards_.size()) { @@ -379,9 +388,14 @@ void AsyncStateSerializer::got_masterchain_state(td::Ref state td::actor::send_closure(manager_, &ValidatorManager::store_persistent_state_file_gen, masterchain_handle_->id(), masterchain_handle_->id(), write_data, std::move(P)); + + current_status_ = PSTRING() << "serializing masterchain state " << state->get_block_id().id.to_str(); + current_status_ts_ = td::Timestamp::now(); } void AsyncStateSerializer::stored_masterchain_state() { + current_status_ = "pending"; + current_status_ts_ = {}; LOG(ERROR) << "finished serializing masterchain state " << masterchain_handle_->id().id.to_str(); running_ = false; next_iteration(); @@ -444,9 +458,14 @@ void AsyncStateSerializer::got_shard_state(BlockHandle handle, td::Refid(), masterchain_handle_->id(), write_data, std::move(P)); + current_status_ = PSTRING() << "serializing shard state " << next_idx_ << "/" << shards_.size() << " " + << state->get_block_id().id.to_str(); + current_status_ts_ = td::Timestamp::now(); } void AsyncStateSerializer::fail_handler(td::Status reason) { + current_status_ = PSTRING() << "pending, " << reason; + current_status_ts_ = {}; VLOG(VALIDATOR_NOTICE) << "failure: " << reason; attempt_++; delay_action( @@ -460,6 +479,8 @@ void AsyncStateSerializer::fail_handler_cont() { } void AsyncStateSerializer::success_handler() { + current_status_ = "pending"; + current_status_ts_ = {}; running_ = false; next_iteration(); } @@ -478,6 +499,29 @@ void AsyncStateSerializer::auto_disable_serializer(bool disabled) { } } +void AsyncStateSerializer::prepare_stats(td::Promise>> promise) { + if (!inited_block_id_) { + wait_init_block_id_.push_back( + [SelfId = actor_id(this), promise = std::move(promise)](td::Result R) mutable { + TRY_STATUS_PROMISE(promise, R.move_as_status()); + td::actor::send_closure(SelfId, &AsyncStateSerializer::prepare_stats, std::move(promise)); + }); + return; + } + std::vector> vec; + vec.emplace_back("stateserializermasterchainseqno", td::to_string(last_block_id_.seqno())); + td::StringBuilder sb; + sb << current_status_; + if (current_status_ts_) { + sb << " (started " << (int)(td::Timestamp::now() - current_status_ts_) << "s ago)"; + } + if (!opts_->get_state_serializer_enabled() || auto_disabled_) { + sb << " (disabled)"; + } + vec.emplace_back("stateserializerstatus", sb.as_cslice().str()); + promise.set_result(std::move(vec)); +} + bool AsyncStateSerializer::need_serialize(BlockHandle handle) { if (handle->id().id.seqno == 0 || !handle->is_key_block()) { return false; diff --git a/validator/state-serializer.hpp b/validator/state-serializer.hpp index 1e7f5c9c..406ac350 100644 --- a/validator/state-serializer.hpp +++ b/validator/state-serializer.hpp @@ -36,6 +36,9 @@ class AsyncStateSerializer : public td::actor::Actor { UnixTime last_key_block_ts_ = 0; bool saved_to_db_ = true; + bool inited_block_id_ = false; + std::vector> wait_init_block_id_; + td::Ref opts_; bool auto_disabled_ = false; td::CancellationTokenSource cancellation_token_source_; @@ -95,6 +98,8 @@ class AsyncStateSerializer : public td::actor::Actor { promise.set_result(last_block_id_.id.seqno); } + void prepare_stats(td::Promise>> promise); + void update_last_known_key_block_ts(UnixTime ts) { last_known_key_block_ts_ = std::max(last_known_key_block_ts_, ts); } @@ -111,6 +116,9 @@ class AsyncStateSerializer : public td::actor::Actor { void update_options(td::Ref opts); void auto_disable_serializer(bool disabled); + + std::string current_status_ = "pending"; + td::Timestamp current_status_ts_ = td::Timestamp::never(); }; } // namespace validator diff --git a/validator/stats-provider.h b/validator/stats-provider.h new file mode 100644 index 00000000..e0a7f565 --- /dev/null +++ b/validator/stats-provider.h @@ -0,0 +1,105 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "validator.h" +#include "common/AtomicRef.h" + +#include + +namespace ton { + +namespace validator { + +class StatsProvider { + public: + StatsProvider() = default; + StatsProvider(td::actor::ActorId manager, std::string prefix, + std::function>>)> callback) + : inited_(true), manager_(std::move(manager)) { + static std::atomic cur_idx{0}; + idx_ = cur_idx.fetch_add(1); + td::actor::send_closure(manager_, &ValidatorManagerInterface::register_stats_provider, idx_, std::move(prefix), + std::move(callback)); + } + StatsProvider(const StatsProvider&) = delete; + StatsProvider(StatsProvider&& other) noexcept + : inited_(other.inited_), idx_(other.idx_), manager_(std::move(other.manager_)) { + other.inited_ = false; + } + ~StatsProvider() { + if (inited_) { + td::actor::send_closure(manager_, &ValidatorManagerInterface::unregister_stats_provider, idx_); + } + } + + StatsProvider& operator=(const StatsProvider&) = delete; + StatsProvider& operator=(StatsProvider&& other) noexcept { + if (this != &other) { + inited_ = other.inited_; + idx_ = other.idx_; + manager_ = std::move(other.manager_); + other.inited_ = false; + } + return *this; + } + + bool inited() const { + return inited_; + } + + private: + bool inited_ = false; + td::uint64 idx_ = 0; + td::actor::ActorId manager_; +}; + +class ProcessStatus { + public: + ProcessStatus() = default; + ProcessStatus(td::actor::ActorId manager, std::string name) + : stats_provider_(std::move(manager), std::move(name), [value = value_](auto promise) { + auto status = value->load(); + if (status.is_null()) { + promise.set_error(td::Status::Error("empty")); + return; + } + std::vector> vec; + vec.emplace_back("", *status); + promise.set_value(std::move(vec)); + }) { + } + ProcessStatus(const ProcessStatus&) = delete; + ProcessStatus(ProcessStatus&& other) noexcept = default; + ProcessStatus& operator=(const ProcessStatus&) = delete; + ProcessStatus& operator=(ProcessStatus&& other) noexcept = default; + + void set_status(std::string s) { + if (!value_) { + return; + } + value_->store(td::Ref>(true, std::move(s))); + } + + private: + std::shared_ptr>> value_ = std::make_shared>>(); + StatsProvider stats_provider_; +}; + +} // namespace validator + +} // namespace ton diff --git a/validator/validator-group.cpp b/validator/validator-group.cpp index 1817180d..110ccd81 100644 --- a/validator/validator-group.cpp +++ b/validator/validator-group.cpp @@ -373,6 +373,7 @@ void ValidatorGroup::create_session() { } CHECK(found); + config_.catchain_opts.broadcast_speed_multiplier = opts_->get_catchain_broadcast_speed_multiplier(); if (!config_.new_catchain_ids) { session_ = validatorsession::ValidatorSession::create(session_id_, config_, local_id_, std::move(vec), make_validator_session_callback(), keyring_, adnl_, rldp_, diff --git a/validator/validator-options.hpp b/validator/validator-options.hpp index e958d886..ace6b106 100644 --- a/validator/validator-options.hpp +++ b/validator/validator-options.hpp @@ -154,6 +154,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { bool get_fast_state_serializer_enabled() const override { return fast_state_serializer_enabled_; } + double get_catchain_broadcast_speed_multiplier() const override { + return catchain_broadcast_speed_multipliers_; + } void set_zero_block_id(BlockIdExt block_id) override { zero_block_id_ = block_id; @@ -249,6 +252,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { void set_fast_state_serializer_enabled(bool value) override { fast_state_serializer_enabled_ = value; } + void set_catchain_broadcast_speed_multiplier(double value) override { + catchain_broadcast_speed_multipliers_ = value; + } ValidatorManagerOptionsImpl *make_copy() const override { return new ValidatorManagerOptionsImpl(*this); @@ -302,6 +308,7 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { bool state_serializer_enabled_ = true; td::Ref collator_options_{true}; bool fast_state_serializer_enabled_ = false; + double catchain_broadcast_speed_multipliers_; }; } // namespace validator diff --git a/validator/validator.h b/validator/validator.h index 73065aa9..5d6c0173 100644 --- a/validator/validator.h +++ b/validator/validator.h @@ -20,6 +20,7 @@ #include #include +#include #include "td/actor/actor.h" @@ -115,6 +116,7 @@ struct ValidatorManagerOptions : public td::CntObject { virtual bool get_state_serializer_enabled() const = 0; virtual td::Ref get_collator_options() const = 0; virtual bool get_fast_state_serializer_enabled() const = 0; + virtual double get_catchain_broadcast_speed_multiplier() const = 0; virtual void set_zero_block_id(BlockIdExt block_id) = 0; virtual void set_init_block_id(BlockIdExt block_id) = 0; @@ -147,6 +149,7 @@ struct ValidatorManagerOptions : public td::CntObject { virtual void set_state_serializer_enabled(bool value) = 0; virtual void set_collator_options(td::Ref value) = 0; virtual void set_fast_state_serializer_enabled(bool value) = 0; + virtual void set_catchain_broadcast_speed_multiplier(double value) = 0; static td::Ref create( BlockIdExt zero_block_id, BlockIdExt init_block_id, @@ -292,6 +295,13 @@ class ValidatorManagerInterface : public td::actor::Actor { virtual void get_out_msg_queue_size(BlockIdExt block_id, td::Promise promise) = 0; virtual void update_options(td::Ref opts) = 0; + + virtual void register_stats_provider( + td::uint64 idx, std::string prefix, + std::function>>)> callback) { + } + virtual void unregister_stats_provider(td::uint64 idx) { + } }; } // namespace validator