From 21ce145af2aeb628c154ac5d805ac5f5b7c20941 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Wed, 12 Jul 2023 13:29:59 +0300 Subject: [PATCH] Limit collated data size in collator --- crypto/block/block.cpp | 23 ++++++++++++++++------- crypto/block/block.h | 7 +++++-- crypto/vm/boc.cpp | 31 +++++++++++++++++++++++++++++++ crypto/vm/boc.h | 13 +++++++++++++ crypto/vm/cells/CellUsageTree.cpp | 12 +++++++++--- crypto/vm/cells/CellUsageTree.h | 13 +++++++++++-- crypto/vm/cells/MerkleProof.h | 4 ++++ crypto/vm/cells/UsageCell.h | 2 +- validator/impl/collator.cpp | 28 ++++++++++++++++++++++++++-- 9 files changed, 116 insertions(+), 17 deletions(-) diff --git a/crypto/block/block.cpp b/crypto/block/block.cpp index 1131213c..b9ecfe60 100644 --- a/crypto/block/block.cpp +++ b/crypto/block/block.cpp @@ -701,12 +701,19 @@ int BlockLimits::classify_lt(ton::LogicalTime lt) const { return lt_delta.classify(lt - start_lt); } -int BlockLimits::classify(td::uint64 size, td::uint64 gas, ton::LogicalTime lt) const { - return std::max(std::max(classify_size(size), classify_gas(gas)), classify_lt(lt)); +int BlockLimits::classify_collated_data_size(td::uint64 size) const { + return bytes.classify(size); // TODO: Maybe separate limits in config } -bool BlockLimits::fits(unsigned cls, td::uint64 size, td::uint64 gas_value, ton::LogicalTime lt) const { - return bytes.fits(cls, size) && gas.fits(cls, gas_value) && lt_delta.fits(cls, lt - start_lt); +int BlockLimits::classify(td::uint64 size, td::uint64 gas, ton::LogicalTime lt, td::uint64 collated_size) const { + return std::max( + {classify_size(size), classify_gas(gas), classify_lt(lt), classify_collated_data_size(collated_size)}); +} + +bool BlockLimits::fits(unsigned cls, td::uint64 size, td::uint64 gas_value, ton::LogicalTime lt, + td::uint64 collated_size) const { + return bytes.fits(cls, size) && gas.fits(cls, gas_value) && lt_delta.fits(cls, lt - start_lt) && + bytes.fits(cls, collated_size); } td::uint64 BlockLimitStatus::estimate_block_size(const vm::NewCellStorageStat::Stat* extra) const { @@ -719,20 +726,22 @@ td::uint64 BlockLimitStatus::estimate_block_size(const vm::NewCellStorageStat::S } int BlockLimitStatus::classify() const { - return limits.classify(estimate_block_size(), gas_used, cur_lt); + return limits.classify(estimate_block_size(), gas_used, cur_lt, collated_data_stat.estimate_proof_size()); } bool BlockLimitStatus::fits(unsigned cls) const { return cls >= ParamLimits::limits_cnt || (limits.gas.fits(cls, gas_used) && limits.lt_delta.fits(cls, cur_lt - limits.start_lt) && - limits.bytes.fits(cls, estimate_block_size())); + limits.bytes.fits(cls, estimate_block_size()) && + limits.bytes.fits(cls, collated_data_stat.estimate_proof_size())); } bool BlockLimitStatus::would_fit(unsigned cls, ton::LogicalTime end_lt, td::uint64 more_gas, const vm::NewCellStorageStat::Stat* extra) const { return cls >= ParamLimits::limits_cnt || (limits.gas.fits(cls, gas_used + more_gas) && limits.lt_delta.fits(cls, std::max(cur_lt, end_lt) - limits.start_lt) && - limits.bytes.fits(cls, estimate_block_size(extra))); + limits.bytes.fits(cls, estimate_block_size(extra)) && + limits.bytes.fits(cls, collated_data_stat.estimate_proof_size())); } // SETS: account_dict, shard_libraries_, mc_state_extra diff --git a/crypto/block/block.h b/crypto/block/block.h index 19d99e6a..b6b46a3d 100644 --- a/crypto/block/block.h +++ b/crypto/block/block.h @@ -252,8 +252,9 @@ struct BlockLimits { int classify_size(td::uint64 size) const; int classify_gas(td::uint64 gas) const; int classify_lt(ton::LogicalTime lt) const; - int classify(td::uint64 size, td::uint64 gas, ton::LogicalTime lt) const; - bool fits(unsigned cls, td::uint64 size, td::uint64 gas, ton::LogicalTime lt) const; + int classify_collated_data_size(td::uint64 size) const; + int classify(td::uint64 size, td::uint64 gas, ton::LogicalTime lt, td::uint64 collated_size) const; + bool fits(unsigned cls, td::uint64 size, td::uint64 gas, ton::LogicalTime lt, td::uint64 collated_size) const; }; struct BlockLimitStatus { @@ -262,6 +263,7 @@ struct BlockLimitStatus { td::uint64 gas_used{}; vm::NewCellStorageStat st_stat; unsigned accounts{}, transactions{}, extra_out_msgs{}; + vm::ProofStorageStat collated_data_stat; BlockLimitStatus(const BlockLimits& limits_, ton::LogicalTime lt = 0) : limits(limits_), cur_lt(std::max(limits_.start_lt, lt)) { } @@ -271,6 +273,7 @@ struct BlockLimitStatus { transactions = accounts = 0; gas_used = 0; extra_out_msgs = 0; + collated_data_stat = {}; } td::uint64 estimate_block_size(const vm::NewCellStorageStat::Stat* extra = nullptr) const; int classify() const; diff --git a/crypto/vm/boc.cpp b/crypto/vm/boc.cpp index 11583ede..0d84c118 100644 --- a/crypto/vm/boc.cpp +++ b/crypto/vm/boc.cpp @@ -1214,4 +1214,35 @@ bool VmStorageStat::add_storage(const CellSlice& cs) { return true; } +static td::uint64 estimate_prunned_size() { + return 41; +} + +static td::uint64 estimate_serialized_size(const Ref& cell) { + return cell->get_serialized_size() + cell->size_refs() * 3 + 3; +} + +void ProofStorageStat::add_cell(const Ref& cell) { + auto& status = cells_[cell->get_hash()]; + if (status == c_loaded) { + return; + } + if (status == c_prunned) { + proof_size_ -= estimate_prunned_size(); + } + status = c_loaded; + proof_size_ += estimate_serialized_size(cell); + for (unsigned i = 0; i < cell->size_refs(); ++i) { + auto& child_status = cells_[cell->get_ref(i)->get_hash()]; + if (child_status == c_none) { + child_status = c_prunned; + proof_size_ += estimate_prunned_size(); + } + } +} + +td::uint64 ProofStorageStat::estimate_proof_size() const { + return proof_size_; +} + } // namespace vm diff --git a/crypto/vm/boc.h b/crypto/vm/boc.h index c7a1810d..d6a7f9ad 100644 --- a/crypto/vm/boc.h +++ b/crypto/vm/boc.h @@ -41,6 +41,7 @@ class NewCellStorageStat { Stat(td::uint64 cells_, td::uint64 bits_, td::uint64 internal_refs_ = 0, td::uint64 external_refs_ = 0) : cells(cells_), bits(bits_), internal_refs(internal_refs_), external_refs(external_refs_) { } + Stat(const Stat&) = default; td::uint64 cells{0}; td::uint64 bits{0}; td::uint64 internal_refs{0}; @@ -160,6 +161,18 @@ struct VmStorageStat { } }; +class ProofStorageStat { + public: + void add_cell(const Ref& cell); + td::uint64 estimate_proof_size() const; + private: + enum CellStatus { + c_none = 0, c_prunned = 1, c_loaded = 2 + }; + std::map cells_; + td::uint64 proof_size_ = 0; +}; + struct CellSerializationInfo { bool special; Cell::LevelMask level_mask; diff --git a/crypto/vm/cells/CellUsageTree.cpp b/crypto/vm/cells/CellUsageTree.cpp index 3f43ec6b..410b3fcd 100644 --- a/crypto/vm/cells/CellUsageTree.cpp +++ b/crypto/vm/cells/CellUsageTree.cpp @@ -22,12 +22,12 @@ namespace vm { // // CellUsageTree::NodePtr // -bool CellUsageTree::NodePtr::on_load() const { +bool CellUsageTree::NodePtr::on_load(const td::Ref& cell) const { auto tree = tree_weak_.lock(); if (!tree) { return false; } - tree->on_load(node_id_); + tree->on_load(node_id_, cell); return true; } @@ -111,8 +111,14 @@ void CellUsageTree::set_use_mark_for_is_loaded(bool use_mark) { use_mark_ = use_mark; } -void CellUsageTree::on_load(NodeId node_id) { +void CellUsageTree::on_load(NodeId node_id, const td::Ref& cell) { + if (nodes_[node_id].is_loaded) { + return; + } nodes_[node_id].is_loaded = true; + if (cell_load_callback_) { + cell_load_callback_(cell); + } } CellUsageTree::NodeId CellUsageTree::create_child(NodeId node_id, unsigned ref_id) { diff --git a/crypto/vm/cells/CellUsageTree.h b/crypto/vm/cells/CellUsageTree.h index 150dd2bd..af0f21f5 100644 --- a/crypto/vm/cells/CellUsageTree.h +++ b/crypto/vm/cells/CellUsageTree.h @@ -22,8 +22,12 @@ #include "td/utils/int_types.h" #include "td/utils/logging.h" +#include namespace vm { + +class DataCell; + class CellUsageTree : public std::enable_shared_from_this { public: using NodeId = td::uint32; @@ -38,7 +42,7 @@ class CellUsageTree : public std::enable_shared_from_this { return node_id_ == 0 || tree_weak_.expired(); } - bool on_load() const; + bool on_load(const td::Ref& cell) const; NodePtr create_child(unsigned ref_id) const; bool mark_path(CellUsageTree* master_tree) const; bool is_from_tree(const CellUsageTree* master_tree) const; @@ -59,6 +63,10 @@ class CellUsageTree : public std::enable_shared_from_this { void set_use_mark_for_is_loaded(bool use_mark = true); NodeId create_child(NodeId node_id, unsigned ref_id); + void set_cell_load_callback(std::function&)> f) { + cell_load_callback_ = std::move(f); + } + private: struct Node { bool is_loaded{false}; @@ -68,8 +76,9 @@ class CellUsageTree : public std::enable_shared_from_this { }; bool use_mark_{false}; std::vector nodes_{2}; + std::function&)> cell_load_callback_; - void on_load(NodeId node_id); + void on_load(NodeId node_id, const td::Ref& cell); NodeId create_node(NodeId parent); }; } // namespace vm diff --git a/crypto/vm/cells/MerkleProof.h b/crypto/vm/cells/MerkleProof.h index 9c50fd07..fc2cb6eb 100644 --- a/crypto/vm/cells/MerkleProof.h +++ b/crypto/vm/cells/MerkleProof.h @@ -66,6 +66,10 @@ class MerkleProofBuilder { td::Result> extract_proof() const; bool extract_proof_to(Ref &proof_root) const; td::Result extract_proof_boc() const; + + void set_cell_load_callback(std::function&)> f) { + usage_tree->set_cell_load_callback(std::move(f)); + } }; } // namespace vm diff --git a/crypto/vm/cells/UsageCell.h b/crypto/vm/cells/UsageCell.h index bf15bb56..3e6e8898 100644 --- a/crypto/vm/cells/UsageCell.h +++ b/crypto/vm/cells/UsageCell.h @@ -39,7 +39,7 @@ class UsageCell : public Cell { // load interface td::Result load_cell() const override { TRY_RESULT(loaded_cell, cell_->load_cell()); - if (tree_node_.on_load()) { + if (tree_node_.on_load(loaded_cell.data_cell)) { CHECK(loaded_cell.tree_node.empty()); loaded_cell.tree_node = tree_node_; } diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index 1ce0a4b4..fc31d264 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -661,6 +661,13 @@ void Collator::got_neighbor_msg_queue(unsigned i, td::Resultstate_root_}); state_root = neighbor_proof_builders_.back().root(); + if (full_collated_data_ && !block_id.is_masterchain()) { + neighbor_proof_builders_.back().set_cell_load_callback([&](const td::Ref& cell) { + if (block_limit_status_) { + block_limit_status_->collated_data_stat.add_cell(cell); + } + }); + } } auto state = ShardStateQ::fetch(block_id, {}, state_root); if (state.is_error()) { @@ -745,6 +752,13 @@ bool Collator::unpack_merge_last_state() { } // 1. prepare for creating a MerkleUpdate based on previous state state_usage_tree_ = std::make_shared(); + if (full_collated_data_ && !is_masterchain()) { + state_usage_tree_->set_cell_load_callback([&](const td::Ref& cell) { + if (block_limit_status_) { + block_limit_status_->collated_data_stat.add_cell(cell); + } + }); + } prev_state_root_ = vm::UsageCell::create(prev_state_root_pure_, state_usage_tree_->root_ptr()); // 2. extract back slightly virtualized roots of the two original states Ref root0, root1; @@ -783,6 +797,13 @@ bool Collator::unpack_last_state() { prev_state_root_pure_ = prev_states.at(0)->root_cell(); // prepare for creating a MerkleUpdate based on previous state state_usage_tree_ = std::make_shared(); + if (full_collated_data_ && !is_masterchain()) { + state_usage_tree_->set_cell_load_callback([&](const td::Ref& cell) { + if (block_limit_status_) { + block_limit_status_->collated_data_stat.add_cell(cell); + } + }); + } prev_state_root_ = vm::UsageCell::create(prev_state_root_pure_, state_usage_tree_->root_ptr()); // unpack previous state block::ShardState ss; @@ -3547,7 +3568,8 @@ bool Collator::check_block_overload() { block_size_estimate_ = block_limit_status_->estimate_block_size(); LOG(INFO) << "block load statistics: gas=" << block_limit_status_->gas_used << " lt_delta=" << block_limit_status_->cur_lt - block_limit_status_->limits.start_lt - << " size_estimate=" << block_size_estimate_; + << " size_estimate=" << block_size_estimate_ + << " collated_size_estimate=" << block_limit_status_->collated_data_stat.estimate_proof_size(); auto cl = block_limit_status_->classify(); if (cl <= block::ParamLimits::cl_underload) { underload_history_ |= 1; @@ -4153,11 +4175,13 @@ bool Collator::create_block_candidate() { cdata_slice = cdata_res.move_as_ok(); } LOG(INFO) << "serialized block size " << blk_slice.size() << " bytes (preliminary estimate was " - << block_size_estimate_ << "), collated data " << cdata_slice.size() << " bytes"; + << block_size_estimate_ << ")"; auto st = block_limit_status_->st_stat.get_total_stat(); LOG(INFO) << "size regression stats: " << blk_slice.size() << " " << st.cells << " " << st.bits << " " << st.internal_refs << " " << st.external_refs << " " << block_limit_status_->accounts << " " << block_limit_status_->transactions; + LOG(INFO) << "serialized collated data size " << cdata_slice.size() << " bytes (preliminary estimate was " + << block_limit_status_->collated_data_stat.estimate_proof_size() << ")"; // 3. create a BlockCandidate block_candidate = std::make_unique( created_by_,