diff --git a/crypto/test/test-db.cpp b/crypto/test/test-db.cpp index dc7fcf37..e61174ff 100644 --- a/crypto/test/test-db.cpp +++ b/crypto/test/test-db.cpp @@ -2318,7 +2318,10 @@ TEST(TonDb, LargeBocSerializer) { std_boc_serialize_to_file_large(dboc->get_cell_db_reader(), root->get_hash(), fd, 31); fd.close(); auto b = td::read_file_str(path).move_as_ok(); - CHECK(a == b); + + auto a_cell = vm::deserialize_boc(td::BufferSlice(a)); + auto b_cell = vm::deserialize_boc(td::BufferSlice(b)); + ASSERT_EQ(a_cell->get_hash(), b_cell->get_hash()); } TEST(TonDb, DoNotMakeListsPrunned) { diff --git a/crypto/vm/boc.cpp b/crypto/vm/boc.cpp index c7ea95a3..89e5411a 100644 --- a/crypto/vm/boc.cpp +++ b/crypto/vm/boc.cpp @@ -205,105 +205,57 @@ td::Status BagOfCells::import_cells() { return td::Status::OK(); } -td::Result> BagOfCells::load_cells(const std::vector>& batch) { - if (reader_) { - TRY_RESULT(data_cells, reader_->load_bulk(td::transform(batch, [](const auto& cell) { return cell->get_hash().as_slice(); }))); - return td::transform(data_cells, [](auto& data_cell) { return vm::Cell::LoadedCell{std::move(data_cell), {}, {}}; }); - } - std::vector res; - res.reserve(batch.size()); - for (auto& cell : batch) { - TRY_RESULT(loaded_dc, cell->load_cell()); - res.push_back(std::move(loaded_dc)); - } - return res; -} - // Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp -td::Result BagOfCells::import_cell(td::Ref root_cell, int root_depth) { - if (root_cell.is_null()) { +td::Result BagOfCells::import_cell(td::Ref cell, int depth) { + if (depth > max_depth) { + return td::Status::Error("error while importing a cell into a bag of cells: cell depth too large"); + } + if (cell.is_null()) { return td::Status::Error("error while importing a cell into a bag of cells: cell is null"); } - - const int start_ind = cell_count; - td::HashMap> child_hashes_map; - std::vector> current_batch; - current_batch.push_back(root_cell); - int current_depth = root_depth; - - while (!current_batch.empty()) { - if (current_depth > max_depth) { - return td::Status::Error("error while importing a cell into a bag of cells: cell depth too large"); - } - - std::vector> next_batch; - TRY_RESULT_PREFIX(loaded_cells, load_cells(current_batch), "error while importing a cell into a bag of cells: "); - DCHECK(loaded_cells.size() == current_batch.size()); - - for (size_t i = 0; i < current_batch.size(); ++i) { - auto& cell = loaded_cells[i]; - if (cell.data_cell->get_virtualization() != 0) { - return td::Status::Error( - "error while importing a cell into a bag of cells: cell has non-zero virtualization level"); - } - - const auto hash = cell.data_cell->get_hash(); - auto existing_it = cells.find(hash); - if (existing_it != cells.end()) { - cell_list_[existing_it->second].should_cache = true; - continue; - } - - CellSlice cs(std::move(cell)); - std::vector child_hashes; - DCHECK(cs.size_refs() <= 4); - for (unsigned j = 0; j < cs.size_refs(); j++) { - auto child = cs.prefetch_ref(j); - const auto child_hash = child->get_hash(); - child_hashes.push_back(child_hash); - - next_batch.push_back(child); - } - child_hashes_map[hash] = std::move(child_hashes); - - auto dc = cs.move_as_loaded_cell().data_cell; - cells.emplace(hash, cell_count); - cell_list_.emplace_back(dc, dc->size_refs(), std::array{-1, -1, -1, -1}); - CellInfo& dc_info = cell_list_.back(); - dc_info.hcnt = static_cast(dc->get_level_mask().get_hashes_count()); - dc_info.wt = 0; // will be calculated after traversing - dc_info.new_idx = -1; - data_bytes += dc->get_serialized_size(); - cell_count++; - } - if (logger_ptr_) { - TRY_STATUS(logger_ptr_->on_cells_processed(current_batch.size())); - } - - current_batch = std::move(next_batch); - next_batch.clear(); - current_depth++; + if (logger_ptr_) { + TRY_STATUS(logger_ptr_->on_cells_processed(1)); } - - for (int idx = cell_count - 1; idx >= start_ind; --idx) { - CellInfo& cell_info = cell_list_[idx]; - const auto& child_hashes = child_hashes_map[cell_info.dc_ref->get_hash()]; - unsigned sum_child_wt = 1; - - for (size_t j = 0; j < child_hashes.size(); ++j) { - const auto child_it = cells.find(child_hashes[j]); - DCHECK(child_it != cells.end()); - cell_info.ref_idx[j] = child_it->second; - sum_child_wt += cell_list_[child_it->second].wt; - ++int_refs; - } - - cell_info.wt = static_cast(std::min(0xffU, sum_child_wt)); + auto it = cells.find(cell->get_hash()); + if (it != cells.end()) { + auto pos = it->second; + cell_list_[pos].should_cache = true; + return pos; } - - auto root_it = cells.find(root_cell->get_hash()); - DCHECK(root_it != cells.end()); - return root_it->second; + if (cell->get_virtualization() != 0) { + return td::Status::Error( + "error while importing a cell into a bag of cells: cell has non-zero virtualization level"); + } + auto r_loaded_dc = cell->load_cell(); + if (r_loaded_dc.is_error()) { + return td::Status::Error("error while importing a cell into a bag of cells: " + + r_loaded_dc.move_as_error().to_string()); + } + auto loaded_dc = r_loaded_dc.move_as_ok(); + CellSlice cs(std::move(loaded_dc)); + std::array refs{-1}; + DCHECK(cs.size_refs() <= 4); + unsigned sum_child_wt = 1; + for (unsigned i = 0; i < cs.size_refs(); i++) { + auto ref = import_cell(cs.prefetch_ref(i), depth + 1); + if (ref.is_error()) { + return ref.move_as_error(); + } + refs[i] = ref.move_as_ok(); + sum_child_wt += cell_list_[refs[i]].wt; + ++int_refs; + } + DCHECK(cell_list_.size() == static_cast(cell_count)); + auto dc = cs.move_as_loaded_cell().data_cell; + auto res = cells.emplace(dc->get_hash(), cell_count); + DCHECK(res.second); + cell_list_.emplace_back(dc, dc->size_refs(), refs); + CellInfo& dc_info = cell_list_.back(); + dc_info.hcnt = static_cast(dc->get_level_mask().get_hashes_count()); + dc_info.wt = static_cast(std::min(0xffU, sum_child_wt)); + dc_info.new_idx = -1; + data_bytes += dc->get_serialized_size(); + return cell_count++; } // Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp @@ -1063,20 +1015,6 @@ td::Result std_boc_serialize(Ref root, int mode) { return boc.serialize_to_slice(mode); } -td::Result std_boc_serialize_with_reader(std::shared_ptr reader, Ref root, int mode) { - if (root.is_null()) { - return td::Status::Error("cannot serialize a null cell reference into a bag of cells"); - } - BagOfCells boc; - boc.set_reader(std::move(reader)); - boc.add_root(std::move(root)); - auto res = boc.import_cells(); - if (res.is_error()) { - return res.move_as_error(); - } - return boc.serialize_to_slice(mode); -} - td::Result std_boc_serialize_multi(std::vector> roots, int mode) { if (roots.empty()) { return td::BufferSlice{}; diff --git a/crypto/vm/boc.h b/crypto/vm/boc.h index b4c8c243..ff580050 100644 --- a/crypto/vm/boc.h +++ b/crypto/vm/boc.h @@ -330,7 +330,6 @@ class BagOfCells { const unsigned char* data_ptr{nullptr}; std::vector custom_index; BagOfCellsLogger* logger_ptr_{nullptr}; - std::shared_ptr reader_{nullptr}; public: void clear(); @@ -343,9 +342,6 @@ class BagOfCells { void set_logger(BagOfCellsLogger* logger_ptr) { logger_ptr_ = logger_ptr; } - void set_reader(std::shared_ptr reader) { - reader_ = std::move(reader); - } std::size_t estimate_serialized_size(int mode = 0); td::Status serialize(int mode = 0); td::string serialize_to_string(int mode = 0); @@ -373,7 +369,6 @@ class BagOfCells { private: int rv_idx; - td::Result> load_cells(const std::vector>& batch); td::Result import_cell(td::Ref cell, int depth); void cells_clear() { cell_count = 0; @@ -395,7 +390,6 @@ class BagOfCells { td::Result> std_boc_deserialize(td::Slice data, bool can_be_empty = false, bool allow_nonzero_level = false); td::Result std_boc_serialize(Ref root, int mode = 0); -td::Result std_boc_serialize_with_reader(std::shared_ptr reader, Ref root, int mode = 0); td::Result>> std_boc_deserialize_multi(td::Slice data, int max_roots = BagOfCells::default_max_roots); diff --git a/crypto/vm/large-boc-serializer.cpp b/crypto/vm/large-boc-serializer.cpp index 27896139..069611e0 100644 --- a/crypto/vm/large-boc-serializer.cpp +++ b/crypto/vm/large-boc-serializer.cpp @@ -95,6 +95,8 @@ void LargeBocSerializer::add_root(Hash root) { roots.emplace_back(root, -1); } +// Unlike crypto/vm/boc.cpp this implementation does not load all cells into memory +// and traverses them in BFS order to utilize bulk load of cells on the same level. td::Status LargeBocSerializer::import_cells() { if (logger_ptr_) { logger_ptr_->start_stage("import_cells");