diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt index 76c8ae9d..58b98088 100644 --- a/crypto/CMakeLists.txt +++ b/crypto/CMakeLists.txt @@ -36,6 +36,7 @@ set(TON_CRYPTO_SOURCE vm/debugops.cpp vm/tonops.cpp vm/boc.cpp + vm/large-boc-serializer.cpp vm/utils.cpp vm/vm.cpp tl/tlblib.cpp @@ -68,6 +69,7 @@ set(TON_CRYPTO_SOURCE vm/arithops.h vm/atom.h vm/boc.h + vm/boc-writers.h vm/box.hpp vm/cellops.h vm/continuation.h diff --git a/crypto/vm/boc-writers.h b/crypto/vm/boc-writers.h new file mode 100644 index 00000000..e33886df --- /dev/null +++ b/crypto/vm/boc-writers.h @@ -0,0 +1,146 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once +#include "td/utils/port/FileFd.h" +#include "td/utils/crypto.h" +#include + +namespace vm { +namespace boc_writers { +struct BufferWriter { + BufferWriter(unsigned char* store_start, unsigned char* store_end) + : store_start(store_start), store_ptr(store_start), store_end(store_end) {} + + size_t position() const { + return store_ptr - store_start; + } + size_t remaining() const { + return store_end - store_ptr; + } + void chk() const { + DCHECK(store_ptr <= store_end); + } + bool empty() const { + return store_ptr == store_end; + } + void store_uint(unsigned long long value, unsigned bytes) { + unsigned char* ptr = store_ptr += bytes; + chk(); + while (bytes) { + *--ptr = value & 0xff; + value >>= 8; + --bytes; + } + DCHECK(!bytes); + } + void store_bytes(unsigned char const* data, size_t s) { + store_ptr += s; + chk(); + memcpy(store_ptr - s, data, s); + } + unsigned get_crc32() const { + return td::crc32c(td::Slice{store_start, store_ptr}); + } + + private: + unsigned char* store_start; + unsigned char* store_ptr; + unsigned char* store_end; +}; + +struct FileWriter { + FileWriter(td::FileFd& fd, size_t expected_size) + : fd(fd), expected_size(expected_size) {} + + ~FileWriter() { + flush(); + } + + size_t position() const { + return flushed_size + writer.position(); + } + size_t remaining() const { + return expected_size - position(); + } + void chk() const { + DCHECK(position() <= expected_size); + } + bool empty() const { + return remaining() == 0; + } + void store_uint(unsigned long long value, unsigned bytes) { + flush_if_needed(bytes); + writer.store_uint(value, bytes); + } + void store_bytes(unsigned char const* data, size_t s) { + flush_if_needed(s); + writer.store_bytes(data, s); + } + unsigned get_crc32() const { + unsigned char const* start = buf.data(); + unsigned char const* end = start + writer.position(); + return td::crc32c_extend(current_crc32, td::Slice(start, end)); + } + + td::Status finalize() { + flush(); + return std::move(res); + } + + private: + void flush_if_needed(size_t s) { + DCHECK(s <= BUF_SIZE); + if (s > BUF_SIZE - writer.position()) { + flush(); + } + } + + void flush() { + chk(); + unsigned char* start = buf.data(); + unsigned char* end = start + writer.position(); + if (start == end) { + return; + } + flushed_size += end - start; + current_crc32 = td::crc32c_extend(current_crc32, td::Slice(start, end)); + if (res.is_ok()) { + while (end > start) { + auto R = fd.write(td::Slice(start, end)); + if (R.is_error()) { + res = R.move_as_error(); + break; + } + size_t s = R.move_as_ok(); + start += s; + } + } + writer = BufferWriter(buf.data(), buf.data() + buf.size()); + } + + td::FileFd& fd; + size_t expected_size; + size_t flushed_size = 0; + unsigned current_crc32 = td::crc32c(td::Slice()); + + static const size_t BUF_SIZE = 1 << 22; + std::vector buf = std::vector(BUF_SIZE, '\0'); + BufferWriter writer = BufferWriter(buf.data(), buf.data() + buf.size()); + td::Status res = td::Status::OK(); +}; +} +} \ No newline at end of file diff --git a/crypto/vm/boc.cpp b/crypto/vm/boc.cpp index 8ff244d0..d35291d3 100644 --- a/crypto/vm/boc.cpp +++ b/crypto/vm/boc.cpp @@ -20,6 +20,7 @@ #include #include #include "vm/boc.h" +#include "vm/boc-writers.h" #include "vm/cells.h" #include "vm/cellslice.h" #include "td/utils/bits.h" @@ -180,6 +181,7 @@ int BagOfCells::add_root(td::Ref add_root) { return 1; } +// Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp td::Status BagOfCells::import_cells() { cells_clear(); for (auto& root : roots) { @@ -197,6 +199,7 @@ td::Status BagOfCells::import_cells() { return td::Status::OK(); } +// Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp td::Result BagOfCells::import_cell(td::Ref cell, int depth) { if (depth > max_depth) { return td::Status::Error("error while importing a cell into a bag of cells: cell depth too large"); @@ -246,6 +249,7 @@ td::Result BagOfCells::import_cell(td::Ref cell, int depth) { return cell_count++; } +// Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp void BagOfCells::reorder_cells() { int_hashes = 0; for (int i = cell_count - 1; i >= 0; --i) { @@ -323,6 +327,7 @@ void BagOfCells::reorder_cells() { // force=0 : previsit (recursively until special cells are found; then visit them) // force=1 : visit (allocate and process all children) // force=2 : allocate (assign a new index; can be run only after visiting) +// Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp int BagOfCells::revisit(int cell_idx, int force) { DCHECK(cell_idx >= 0 && cell_idx < cell_count); CellInfo& dci = cell_list_[cell_idx]; @@ -369,6 +374,7 @@ int BagOfCells::revisit(int cell_idx, int force) { return dci.new_idx = -3; // mark as visited (and all children processed) } +// Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp td::uint64 BagOfCells::compute_sizes(int mode, int& r_size, int& o_size) { int rs = 0, os = 0; if (!root_count || !data_bytes) { @@ -395,6 +401,7 @@ td::uint64 BagOfCells::compute_sizes(int mode, int& r_size, int& o_size) { return data_bytes_adj; } +// Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp std::size_t BagOfCells::estimate_serialized_size(int mode) { if ((mode & Mode::WithCacheBits) && !(mode & Mode::WithIndex)) { info.invalidate(); @@ -475,130 +482,6 @@ std::string BagOfCells::extract_string() const { return std::string{serialized.data(), serialized.data() + serialized.size()}; } -namespace { -struct BufferWriter { - BufferWriter(unsigned char* store_start, unsigned char* store_end) - : store_start(store_start), store_ptr(store_start), store_end(store_end) {} - - size_t position() const { - return store_ptr - store_start; - } - size_t remaining() const { - return store_end - store_ptr; - } - void chk() const { - DCHECK(store_ptr <= store_end); - } - bool empty() const { - return store_ptr == store_end; - } - void store_uint(unsigned long long value, unsigned bytes) { - unsigned char* ptr = store_ptr += bytes; - chk(); - while (bytes) { - *--ptr = value & 0xff; - value >>= 8; - --bytes; - } - DCHECK(!bytes); - } - void store_bytes(unsigned char const* data, size_t s) { - store_ptr += s; - chk(); - memcpy(store_ptr - s, data, s); - } - unsigned get_crc32() const { - return td::crc32c(td::Slice{store_start, store_ptr}); - } - - private: - unsigned char* store_start; - unsigned char* store_ptr; - unsigned char* store_end; -}; - -struct FileWriter { - FileWriter(td::FileFd& fd, size_t expected_size) - : fd(fd), expected_size(expected_size) {} - - ~FileWriter() { - flush(); - } - - size_t position() const { - return flushed_size + writer.position(); - } - size_t remaining() const { - return expected_size - position(); - } - void chk() const { - DCHECK(position() <= expected_size); - } - bool empty() const { - return remaining() == 0; - } - void store_uint(unsigned long long value, unsigned bytes) { - flush_if_needed(bytes); - writer.store_uint(value, bytes); - } - void store_bytes(unsigned char const* data, size_t s) { - flush_if_needed(s); - writer.store_bytes(data, s); - } - unsigned get_crc32() const { - unsigned char const* start = buf.data(); - unsigned char const* end = start + writer.position(); - return td::crc32c_extend(current_crc32, td::Slice(start, end)); - } - - td::Status finalize() { - flush(); - return std::move(res); - } - - private: - void flush_if_needed(size_t s) { - DCHECK(s <= BUF_SIZE); - if (s > BUF_SIZE - writer.position()) { - flush(); - } - } - - void flush() { - chk(); - unsigned char* start = buf.data(); - unsigned char* end = start + writer.position(); - if (start == end) { - return; - } - flushed_size += end - start; - current_crc32 = td::crc32c_extend(current_crc32, td::Slice(start, end)); - if (res.is_ok()) { - while (end > start) { - auto R = fd.write(td::Slice(start, end)); - if (R.is_error()) { - res = R.move_as_error(); - break; - } - size_t s = R.move_as_ok(); - start += s; - } - } - writer = BufferWriter(buf.data(), buf.data() + buf.size()); - } - - td::FileFd& fd; - size_t expected_size; - size_t flushed_size = 0; - unsigned current_crc32 = td::crc32c(td::Slice()); - - static const size_t BUF_SIZE = 1 << 22; - std::vector buf = std::vector(BUF_SIZE, '\0'); - BufferWriter writer = BufferWriter(buf.data(), buf.data() + buf.size()); - td::Status res = td::Status::OK(); -}; -} - //serialized_boc#672fb0ac has_idx:(## 1) has_crc32c:(## 1) // has_cache_bits:(## 1) flags:(## 2) { flags = 0 } // size:(## 3) { size <= 4 } @@ -610,6 +493,7 @@ struct FileWriter { // index:(cells * ##(off_bytes * 8)) // cell_data:(tot_cells_size * [ uint8 ]) // = BagOfCells; +// Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp template std::size_t BagOfCells::serialize_to_impl(WriterT& writer, int mode) { auto store_ref = [&](unsigned long long value) { @@ -705,7 +589,7 @@ std::size_t BagOfCells::serialize_to(unsigned char* buffer, std::size_t buff_siz if (!size_est || size_est > buff_size) { return 0; } - BufferWriter writer{buffer, buffer + size_est}; + boc_writers::BufferWriter writer{buffer, buffer + size_est}; return serialize_to_impl(writer, mode); } @@ -714,7 +598,7 @@ td::Status BagOfCells::serialize_to_file(td::FileFd& fd, int mode) { if (!size_est) { return td::Status::Error("no cells to serialize to this bag of cells"); } - FileWriter writer{fd, size_est}; + boc_writers::FileWriter writer{fd, size_est}; size_t s = serialize_to_impl(writer, mode); TRY_STATUS(writer.finalize()); if (s != size_est) { diff --git a/crypto/vm/boc.h b/crypto/vm/boc.h index 02078e27..2fae1846 100644 --- a/crypto/vm/boc.h +++ b/crypto/vm/boc.h @@ -18,6 +18,7 @@ */ #pragma once #include +#include "vm/db/DynamicBagOfCellsDb.h" #include "vm/cells.h" #include "td/utils/Status.h" #include "td/utils/buffer.h" @@ -314,4 +315,7 @@ td::Result>> std_boc_deserialize_multi(td::Slice data, int max_roots = BagOfCells::default_max_roots); td::Result std_boc_serialize_multi(std::vector> root, int mode = 0); +td::Status std_boc_serialize_to_file_large(std::shared_ptr reader, Cell::Hash root_hash, + td::FileFd& fd, int mode = 0); + } // namespace vm diff --git a/crypto/vm/db/DynamicBagOfCellsDb.cpp b/crypto/vm/db/DynamicBagOfCellsDb.cpp index 5fcc3923..636be9c9 100644 --- a/crypto/vm/db/DynamicBagOfCellsDb.cpp +++ b/crypto/vm/db/DynamicBagOfCellsDb.cpp @@ -31,12 +31,6 @@ namespace vm { namespace { -class CellDbReader { - public: - virtual ~CellDbReader() = default; - virtual td::Result> load_cell(td::Slice hash) = 0; -}; - struct DynamicBocExtCellExtra { std::shared_ptr reader; }; @@ -176,6 +170,10 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat return td::Status::OK(); } + std::shared_ptr get_cell_db_reader() override { + return cell_db_reader_; + } + td::Status set_loader(std::unique_ptr loader) override { reset_cell_db_reader(); loader_ = std::move(loader); diff --git a/crypto/vm/db/DynamicBagOfCellsDb.h b/crypto/vm/db/DynamicBagOfCellsDb.h index 67d92ecf..9a87c619 100644 --- a/crypto/vm/db/DynamicBagOfCellsDb.h +++ b/crypto/vm/db/DynamicBagOfCellsDb.h @@ -34,6 +34,12 @@ class ExtCellCreator { virtual td::Result> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) = 0; }; +class CellDbReader { + public: + virtual ~CellDbReader() = default; + virtual td::Result> load_cell(td::Slice hash) = 0; +}; + class DynamicBagOfCellsDb { public: virtual ~DynamicBagOfCellsDb() = default; @@ -52,6 +58,7 @@ class DynamicBagOfCellsDb { virtual td::Status prepare_commit() = 0; virtual Stats get_stats_diff() = 0; virtual td::Status commit(CellStorer &) = 0; + virtual std::shared_ptr get_cell_db_reader() = 0; // restart with new loader will also reset stats_diff virtual td::Status set_loader(std::unique_ptr loader) = 0; diff --git a/crypto/vm/large-boc-serializer.cpp b/crypto/vm/large-boc-serializer.cpp new file mode 100644 index 00000000..31a393ec --- /dev/null +++ b/crypto/vm/large-boc-serializer.cpp @@ -0,0 +1,411 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "vm/boc.h" +#include "vm/boc-writers.h" +#include "vm/cellslice.h" +#include "td/utils/misc.h" + +namespace vm { + +namespace { +// LargeBocSerializer implements serialization of the bag of cells in the standard way +// (equivalent to the implementation in crypto/vm/boc.cpp) +// Changes in this file may require corresponding changes in boc.cpp +class LargeBocSerializer { + public: + using Hash = Cell::Hash; + + explicit LargeBocSerializer(std::shared_ptr reader) : reader(std::move(reader)) {} + + void add_root(Hash root); + td::Status import_cells(); + td::Status serialize(td::FileFd& fd, int mode); + + private: + std::shared_ptr reader; + struct CellInfo { + std::array ref_idx; + int idx; + unsigned short serialized_size; + unsigned char wt; + unsigned char hcnt : 6; + bool should_cache : 1; + bool is_root_cell : 1; + CellInfo(int idx, const std::array& ref_list) : ref_idx(ref_list), idx(idx) { + hcnt = 0; + should_cache = is_root_cell = 0; + } + bool is_special() const { + return !wt; + } + unsigned get_ref_num() const { + for (unsigned i = 0; i < 4; ++i) { + if (ref_idx[i] == -1) { + return i; + } + } + return 4; + } + }; + std::map cells; + std::vector*> cell_list; + struct RootInfo { + RootInfo(Hash hash, int idx) : hash(hash), idx(idx) {} + Hash hash; + int idx; + }; + std::vector roots; + int cell_count = 0, int_refs = 0, int_hashes = 0, top_hashes = 0; + int rv_idx = 0; + unsigned long long data_bytes = 0; + + td::Result import_cell(Hash hash, int depth = 0); + void reorder_cells(); + int revisit(int cell_idx, int force = 0); + td::uint64 compute_sizes(int mode, int& r_size, int& o_size); +}; + +void LargeBocSerializer::add_root(Hash root) { + roots.emplace_back(root, -1); +} + +td::Status LargeBocSerializer::import_cells() { + for (auto& root : roots) { + TRY_RESULT(idx, import_cell(root.hash)); + root.idx = idx; + } + reorder_cells(); + CHECK(!cell_list.empty()); + return td::Status::OK(); +} + +td::Result LargeBocSerializer::import_cell(Hash hash, int depth) { + if (depth > Cell::max_depth) { + return td::Status::Error("error while importing a cell into a bag of cells: cell depth too large"); + } + auto it = cells.find(hash); + if (it != cells.end()) { + it->second.should_cache = true; + return it->second.idx; + } + TRY_RESULT(cell, reader->load_cell(hash.as_slice())); + if (cell->get_virtualization() != 0) { + return td::Status::Error( + "error while importing a cell into a bag of cells: cell has non-zero virtualization level"); + } + CellSlice cs(std::move(cell)); + std::array refs; + std::fill(refs.begin(), refs.end(), -1); + DCHECK(cs.size_refs() <= 4); + unsigned sum_child_wt = 1; + for (unsigned i = 0; i < cs.size_refs(); i++) { + TRY_RESULT(ref, import_cell(cs.prefetch_ref(i)->get_hash(), depth + 1)); + refs[i] = ref; + sum_child_wt += cell_list[ref]->second.wt; + ++int_refs; + } + auto dc = cs.move_as_loaded_cell().data_cell; + auto res = cells.emplace(hash, CellInfo(cell_count, refs)); + DCHECK(res.second); + cell_list.push_back(&*res.first); + CellInfo& dc_info = res.first->second; + dc_info.wt = (unsigned char)std::min(0xffU, sum_child_wt); + unsigned hcnt = dc->get_level_mask().get_hashes_count(); + DCHECK(hcnt <= 4); + dc_info.hcnt = (unsigned char)hcnt; + TRY_RESULT(serialized_size, td::narrow_cast_safe(dc->get_serialized_size())); + data_bytes += dc_info.serialized_size = serialized_size; + return cell_count++; +} + +void LargeBocSerializer::reorder_cells() { + for (auto ptr : cell_list) { + ptr->second.idx = -1; + } + int_hashes = 0; + for (int i = cell_count - 1; i >= 0; --i) { + CellInfo& dci = cell_list[i]->second; + int s = dci.get_ref_num(), c = s, sum = BagOfCells::max_cell_whs - 1, mask = 0; + for (int j = 0; j < s; ++j) { + CellInfo& dcj = cell_list[dci.ref_idx[j]]->second; + int limit = (BagOfCells::max_cell_whs - 1 + j) / s; + if (dcj.wt <= limit) { + sum -= dcj.wt; + --c; + mask |= (1 << j); + } + } + if (c) { + for (int j = 0; j < s; ++j) { + if (!(mask & (1 << j))) { + CellInfo& dcj = cell_list[dci.ref_idx[j]]->second; + int limit = sum++ / c; + if (dcj.wt > limit) { + dcj.wt = (unsigned char)limit; + } + } + } + } + } + for (int i = 0; i < cell_count; i++) { + CellInfo& dci = cell_list[i]->second; + int s = dci.get_ref_num(), sum = 1; + for (int j = 0; j < s; ++j) { + sum += cell_list[dci.ref_idx[j]]->second.wt; + } + DCHECK(sum <= BagOfCells::max_cell_whs); + if (sum <= dci.wt) { + dci.wt = (unsigned char)sum; + } else { + dci.wt = 0; + int_hashes += dci.hcnt; + } + } + top_hashes = 0; + for (auto& root_info : roots) { + auto& cell_info = cell_list[root_info.idx]->second; + if (cell_info.is_root_cell) { + cell_info.is_root_cell = true; + if (cell_info.wt) { + top_hashes += cell_info.hcnt; + } + } + } + if (cell_count > 0) { + rv_idx = 0; + + for (const auto& root_info : roots) { + revisit(root_info.idx, 0); + revisit(root_info.idx, 1); + } + for (const auto& root_info : roots) { + revisit(root_info.idx, 2); + } + for (auto& root_info : roots) { + root_info.idx = cell_list[root_info.idx]->second.idx; + } + + DCHECK(rv_idx == cell_count); + for (int i = 0; i < cell_count; ++i) { + while (cell_list[i]->second.idx != i) { + std::swap(cell_list[i], cell_list[cell_list[i]->second.idx]); + } + } + } +} + +int LargeBocSerializer::revisit(int cell_idx, int force) { + DCHECK(cell_idx >= 0 && cell_idx < cell_count); + CellInfo& dci = cell_list[cell_idx]->second; + if (dci.idx >= 0) { + return dci.idx; + } + if (!force) { + // previsit + if (dci.idx != -1) { + // already previsited or visited + return dci.idx; + } + int n = dci.get_ref_num(); + for (int j = n - 1; j >= 0; --j) { + int child_idx = dci.ref_idx[j]; + // either previsit or visit child, depending on whether it is special + revisit(dci.ref_idx[j], cell_list[child_idx]->second.is_special()); + } + return dci.idx = -2; // mark as previsited + } + if (force > 1) { + // time to allocate + auto i = dci.idx = rv_idx++; + return i; + } + if (dci.idx == -3) { + // already visited + return dci.idx; + } + if (dci.is_special()) { + // if current cell is special, previsit it first + revisit(cell_idx, 0); + } + // visit children + int n = dci.get_ref_num(); + for (int j = n - 1; j >= 0; --j) { + revisit(dci.ref_idx[j], 1); + } + // allocate children + for (int j = n - 1; j >= 0; --j) { + dci.ref_idx[j] = revisit(dci.ref_idx[j], 2); + } + return dci.idx = -3; // mark as visited (and all children processed) +} + +td::uint64 LargeBocSerializer::compute_sizes(int mode, int& r_size, int& o_size) { + using Mode = BagOfCells::Mode; + int rs = 0, os = 0; + if (roots.empty() || !data_bytes) { + r_size = o_size = 0; + return 0; + } + while (cell_count >= (1LL << (rs << 3))) { + rs++; + } + td::uint64 hashes = + (((mode & Mode::WithTopHash) ? top_hashes : 0) + ((mode & Mode::WithIntHashes) ? int_hashes : 0)) * + (Cell::hash_bytes + Cell::depth_bytes); + td::uint64 data_bytes_adj = data_bytes + (unsigned long long)int_refs * rs + hashes; + td::uint64 max_offset = (mode & Mode::WithCacheBits) ? data_bytes_adj * 2 : data_bytes_adj; + while (max_offset >= (1ULL << (os << 3))) { + os++; + } + if (rs > 4 || os > 8) { + r_size = o_size = 0; + return 0; + } + r_size = rs; + o_size = os; + return data_bytes_adj; +} + +td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) { + using Mode = BagOfCells::Mode; + BagOfCells::Info info; + if ((mode & Mode::WithCacheBits) && !(mode & Mode::WithIndex)) { + return td::Status::Error("invalid flags"); + } + auto data_bytes_adj = compute_sizes(mode, info.ref_byte_size, info.offset_byte_size); + if (data_bytes_adj == 0) { + return td::Status::Error("no cells to serialize"); + } + info.valid = true; + info.has_crc32c = mode & Mode::WithCRC32C; + info.has_index = mode & Mode::WithIndex; + info.has_cache_bits = mode & Mode::WithCacheBits; + info.root_count = (int)roots.size(); + info.cell_count = cell_count; + info.absent_count = 0; + int crc_size = info.has_crc32c ? 4 : 0; + info.roots_offset = 4 + 1 + 1 + 3 * info.ref_byte_size + info.offset_byte_size; + info.index_offset = info.roots_offset + info.root_count * info.ref_byte_size; + info.data_offset = info.index_offset; + if (info.has_index) { + info.data_offset += (long long)cell_count * info.offset_byte_size; + } + info.magic = BagOfCells::Info::boc_generic; + info.data_size = data_bytes_adj; + info.total_size = info.data_offset + data_bytes_adj + crc_size; + auto res = td::narrow_cast_safe(info.total_size); + if (res.is_error()) { + return td::Status::Error("bag of cells is too large"); + } + + boc_writers::FileWriter writer{fd, info.total_size}; + auto store_ref = [&](unsigned long long value) { + writer.store_uint(value, info.ref_byte_size); + }; + auto store_offset = [&](unsigned long long value) { + writer.store_uint(value, info.offset_byte_size); + }; + + writer.store_uint(info.magic, 4); + + td::uint8 byte{0}; + if (info.has_index) { + byte |= 1 << 7; + } + if (info.has_crc32c) { + byte |= 1 << 6; + } + if (info.has_cache_bits) { + byte |= 1 << 5; + } + byte |= (td::uint8)info.ref_byte_size; + writer.store_uint(byte, 1); + + writer.store_uint(info.offset_byte_size, 1); + store_ref(cell_count); + store_ref(roots.size()); + store_ref(0); + store_offset(info.data_size); + for (const auto& root_info : roots) { + int k = cell_count - 1 - root_info.idx; + DCHECK(k >= 0 && k < cell_count); + store_ref(k); + } + DCHECK(writer.position() == info.index_offset); + DCHECK((unsigned)cell_count == cell_list.size()); + if (info.has_index) { + std::size_t offs = 0; + for (int i = cell_count - 1; i >= 0; --i) { + const auto& dc_info = cell_list[i]->second; + bool with_hash = (mode & Mode::WithIntHashes) && !dc_info.wt; + if (dc_info.is_root_cell && (mode & Mode::WithTopHash)) { + with_hash = true; + } + int hash_size = 0; + if (with_hash) { + hash_size = (Cell::hash_bytes + Cell::depth_bytes) * dc_info.hcnt; + } + offs += dc_info.serialized_size + hash_size + dc_info.get_ref_num() * info.ref_byte_size; + auto fixed_offset = offs; + if (info.has_cache_bits) { + fixed_offset = offs * 2 + dc_info.should_cache; + } + store_offset(fixed_offset); + } + DCHECK(offs == info.data_size); + } + DCHECK(writer.position() == info.data_offset); + size_t keep_position = writer.position(); + for (int i = 0; i < cell_count; ++i) { + auto hash = cell_list[cell_count - 1 - i]->first; + const auto& dc_info = cell_list[cell_count - 1 - i]->second; + TRY_RESULT(dc, reader->load_cell(hash.as_slice())); + bool with_hash = (mode & Mode::WithIntHashes) && !dc_info.wt; + if (dc_info.is_root_cell && (mode & Mode::WithTopHash)) { + with_hash = true; + } + unsigned char buf[256]; + int s = dc->serialize(buf, 256, with_hash); + writer.store_bytes(buf, s); + DCHECK(dc->size_refs() == dc_info.get_ref_num()); + unsigned ref_num = dc_info.get_ref_num(); + for (unsigned j = 0; j < ref_num; ++j) { + int k = cell_count - 1 - dc_info.ref_idx[j]; + DCHECK(k > i && k < cell_count); + store_ref(k); + } + } + DCHECK(writer.position() - keep_position == info.data_size); + if (info.has_crc32c) { + unsigned crc = writer.get_crc32(); + writer.store_uint(td::bswap32(crc), 4); + } + DCHECK(writer.empty()); + return writer.finalize(); +} +} + +td::Status std_boc_serialize_to_file_large(std::shared_ptr reader, Cell::Hash root_hash, + td::FileFd& fd, int mode) { + CHECK(reader != nullptr) + LargeBocSerializer serializer(reader); + serializer.add_root(root_hash); + TRY_STATUS(serializer.import_cells()); + return serializer.serialize(fd, mode); +} + +} diff --git a/validator/db/celldb.cpp b/validator/db/celldb.cpp index 6b1d99d8..e7458407 100644 --- a/validator/db/celldb.cpp +++ b/validator/db/celldb.cpp @@ -102,6 +102,10 @@ void CellDbIn::store_cell(BlockIdExt block_id, td::Ref cell, td::Promi promise.set_result(boc_->load_cell(cell->get_hash().as_slice())); } +void CellDbIn::get_cell_db_reader(td::Promise> promise) { + promise.set_result(boc_->get_cell_db_reader()); +} + void CellDbIn::alarm() { auto R = get_block(last_gc_); R.ensure(); @@ -264,6 +268,10 @@ void CellDb::store_cell(BlockIdExt block_id, td::Ref cell, td::Promise td::actor::send_closure(cell_db_, &CellDbIn::store_cell, block_id, std::move(cell), std::move(promise)); } +void CellDb::get_cell_db_reader(td::Promise> promise) { + td::actor::send_closure(cell_db_, &CellDbIn::get_cell_db_reader, std::move(promise)); +} + void CellDb::start_up() { boc_ = vm::DynamicBagOfCellsDb::create(); cell_db_ = td::actor::create_actor("celldbin", root_db_, actor_id(this), path_); diff --git a/validator/db/celldb.hpp b/validator/db/celldb.hpp index 691d8848..4eb1d6a8 100644 --- a/validator/db/celldb.hpp +++ b/validator/db/celldb.hpp @@ -40,6 +40,7 @@ class CellDbIn : public td::actor::Actor { void load_cell(RootHash hash, td::Promise> promise); void store_cell(BlockIdExt block_id, td::Ref cell, td::Promise> promise); + void get_cell_db_reader(td::Promise> promise); CellDbIn(td::actor::ActorId root_db, td::actor::ActorId parent, std::string path); @@ -96,6 +97,7 @@ class CellDb : public td::actor::Actor { started_ = true; boc_->set_loader(std::make_unique(std::move(snapshot))).ensure(); } + void get_cell_db_reader(td::Promise> promise); CellDb(td::actor::ActorId root_db, std::string path) : root_db_(root_db), path_(path) { } diff --git a/validator/db/rootdb.cpp b/validator/db/rootdb.cpp index 58e3dd2f..a7a1becf 100644 --- a/validator/db/rootdb.cpp +++ b/validator/db/rootdb.cpp @@ -270,6 +270,10 @@ void RootDb::get_block_state(ConstBlockHandle handle, td::Promise> promise) { + td::actor::send_closure(cell_db_, &CellDb::get_cell_db_reader, std::move(promise)); +} + void RootDb::store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) { td::actor::send_closure(archive_db_, &ArchiveManager::add_persistent_state, block_id, masterchain_block_id, diff --git a/validator/db/rootdb.hpp b/validator/db/rootdb.hpp index 2654d482..9b0d52a6 100644 --- a/validator/db/rootdb.hpp +++ b/validator/db/rootdb.hpp @@ -60,6 +60,7 @@ class RootDb : public Db { void store_block_state(BlockHandle handle, td::Ref state, td::Promise> promise) override; void get_block_state(ConstBlockHandle handle, td::Promise> promise) override; + void get_cell_db_reader(td::Promise> promise) override; void store_block_handle(BlockHandle handle, td::Promise promise) override; void get_block_handle(BlockIdExt id, td::Promise promise) override; diff --git a/validator/interfaces/db.h b/validator/interfaces/db.h index 9983572b..ba4d9dda 100644 --- a/validator/interfaces/db.h +++ b/validator/interfaces/db.h @@ -50,6 +50,7 @@ class Db : public td::actor::Actor { virtual void store_block_state(BlockHandle handle, td::Ref state, td::Promise> promise) = 0; virtual void get_block_state(ConstBlockHandle handle, td::Promise> promise) = 0; + virtual void get_cell_db_reader(td::Promise> promise) = 0; virtual void store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) = 0; diff --git a/validator/interfaces/validator-manager.h b/validator/interfaces/validator-manager.h index 42e99ecc..77f728eb 100644 --- a/validator/interfaces/validator-manager.h +++ b/validator/interfaces/validator-manager.h @@ -27,6 +27,7 @@ #include "message-queue.h" #include "validator/validator.h" #include "liteserver.h" +#include "crypto/vm/db/DynamicBagOfCellsDb.h" namespace ton { @@ -55,6 +56,7 @@ class ValidatorManager : public ValidatorManagerInterface { public: virtual void set_block_state(BlockHandle handle, td::Ref state, td::Promise> promise) = 0; + virtual void get_cell_db_reader(td::Promise> promise) = 0; virtual void store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) = 0; virtual void store_persistent_state_file_gen(BlockIdExt block_id, BlockIdExt masterchain_block_id, diff --git a/validator/manager-disk.cpp b/validator/manager-disk.cpp index 404f7fe8..46ad50fa 100644 --- a/validator/manager-disk.cpp +++ b/validator/manager-disk.cpp @@ -674,6 +674,10 @@ void ValidatorManagerImpl::set_block_state(BlockHandle handle, td::Ref> promise) { + td::actor::send_closure(db_, &Db::get_cell_db_reader, std::move(promise)); +} + void ValidatorManagerImpl::store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) { td::actor::send_closure(db_, &Db::store_persistent_state_file, block_id, masterchain_block_id, std::move(state), diff --git a/validator/manager-disk.hpp b/validator/manager-disk.hpp index bb740b4c..59475723 100644 --- a/validator/manager-disk.hpp +++ b/validator/manager-disk.hpp @@ -141,6 +141,7 @@ class ValidatorManagerImpl : public ValidatorManager { void set_block_state(BlockHandle handle, td::Ref state, td::Promise> promise) override; + void get_cell_db_reader(td::Promise> promise) override; void store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) override; void store_persistent_state_file_gen(BlockIdExt block_id, BlockIdExt masterchain_block_id, diff --git a/validator/manager-hardfork.cpp b/validator/manager-hardfork.cpp index 7b6ab8e9..d9718fa7 100644 --- a/validator/manager-hardfork.cpp +++ b/validator/manager-hardfork.cpp @@ -530,6 +530,10 @@ void ValidatorManagerImpl::get_block_handle(BlockIdExt id, bool force, td::Promi td::actor::send_closure(db_, &Db::get_block_handle, id, std::move(P)); } +void ValidatorManagerImpl::get_cell_db_reader(td::Promise> promise) { + td::actor::send_closure(db_, &Db::get_cell_db_reader, std::move(promise)); +} + void ValidatorManagerImpl::register_block_handle(BlockHandle handle, td::Promise promise) { auto it = handles_.find(handle->id()); if (it != handles_.end()) { diff --git a/validator/manager-hardfork.hpp b/validator/manager-hardfork.hpp index 71157dcc..98539f7e 100644 --- a/validator/manager-hardfork.hpp +++ b/validator/manager-hardfork.hpp @@ -165,6 +165,7 @@ class ValidatorManagerImpl : public ValidatorManager { td::Promise> promise) override { UNREACHABLE(); } + void get_cell_db_reader(td::Promise> promise) override; void store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) override { UNREACHABLE(); diff --git a/validator/manager.cpp b/validator/manager.cpp index 28e5cd3a..c33d23c2 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -1045,6 +1045,10 @@ void ValidatorManagerImpl::set_block_state(BlockHandle handle, td::Ref> promise) { + td::actor::send_closure(db_, &Db::get_cell_db_reader, std::move(promise)); +} + void ValidatorManagerImpl::store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) { td::actor::send_closure(db_, &Db::store_persistent_state_file, block_id, masterchain_block_id, std::move(state), diff --git a/validator/manager.hpp b/validator/manager.hpp index 123625b2..e053fa94 100644 --- a/validator/manager.hpp +++ b/validator/manager.hpp @@ -345,6 +345,7 @@ class ValidatorManagerImpl : public ValidatorManager { void set_block_state(BlockHandle handle, td::Ref state, td::Promise> promise) override; + void get_cell_db_reader(td::Promise> promise) override; void store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) override; void store_persistent_state_file_gen(BlockIdExt block_id, BlockIdExt masterchain_block_id, diff --git a/validator/state-serializer.cpp b/validator/state-serializer.cpp index 82cf31cc..d5d2f000 100644 --- a/validator/state-serializer.cpp +++ b/validator/state-serializer.cpp @@ -82,24 +82,24 @@ void AsyncStateSerializer::alarm() { } void AsyncStateSerializer::request_masterchain_state() { - auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { - if (R.is_error()) { - td::actor::send_closure(SelfId, &AsyncStateSerializer::fail_handler, - R.move_as_error_prefix("failed to get masterchain state: ")); - } else { - td::actor::send_closure(SelfId, &AsyncStateSerializer::got_masterchain_state, - td::Ref(R.move_as_ok())); - } - }); - td::actor::send_closure(manager_, &ValidatorManager::get_shard_state_from_db, masterchain_handle_, std::move(P)); + auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { + if (R.is_error()) { + td::actor::send_closure(SelfId, &AsyncStateSerializer::fail_handler, + R.move_as_error_prefix("failed to get masterchain state: ")); + } else { + td::actor::send_closure(SelfId, &AsyncStateSerializer::got_masterchain_state, + td::Ref(R.move_as_ok())); + } + }); + td::actor::send_closure(manager_, &ValidatorManager::get_shard_state_from_db, masterchain_handle_, std::move(P)); } void AsyncStateSerializer::request_shard_state(BlockIdExt shard) { - auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result R) { - R.ensure(); - td::actor::send_closure(SelfId, &AsyncStateSerializer::got_shard_handle, R.move_as_ok()); - }); - return td::actor::send_closure(manager_, &ValidatorManager::get_block_handle, shard, true, std::move(P)); + auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result R) { + R.ensure(); + td::actor::send_closure(SelfId, &AsyncStateSerializer::got_shard_handle, R.move_as_ok()); + }); + return td::actor::send_closure(manager_, &ValidatorManager::get_block_handle, shard, true, std::move(P)); } void AsyncStateSerializer::next_iteration() { @@ -122,8 +122,21 @@ void AsyncStateSerializer::next_iteration() { CHECK(masterchain_handle_->id() == last_block_id_); if (attempt_ < max_attempt() && last_key_block_id_.id.seqno < last_block_id_.id.seqno && need_serialize(masterchain_handle_)) { + if (!cell_db_reader_) { + running_ = true; + auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result> R) { + if (R.is_error()) { + td::actor::send_closure(SelfId, &AsyncStateSerializer::fail_handler, + R.move_as_error_prefix("failed to get cell db reader: ")); + } else { + td::actor::send_closure(SelfId, &AsyncStateSerializer::got_cell_db_reader, R.move_as_ok()); + } + }); + td::actor::send_closure(manager_, &ValidatorManager::get_cell_db_reader, std::move(P)); + return; + } if (!have_masterchain_state_) { - LOG(INFO) << "started serializing persistent state for " << masterchain_handle_->id().seqno(); + LOG(INFO) << "started serializing persistent state for " << masterchain_handle_->id().id; // block next attempts immediately, but send actual request later running_ = true; delay_action( @@ -143,9 +156,10 @@ void AsyncStateSerializer::next_iteration() { return; } } - LOG(INFO) << "finished serializing persistent state for " << masterchain_handle_->id().seqno(); + LOG(INFO) << "finished serializing persistent state for " << masterchain_handle_->id().id; last_key_block_ts_ = masterchain_handle_->unix_time(); last_key_block_id_ = masterchain_handle_->id(); + cell_db_reader_ = nullptr; } if (!saved_to_db_) { running_ = true; @@ -175,6 +189,13 @@ void AsyncStateSerializer::got_top_masterchain_handle(BlockIdExt block_id) { } } +void AsyncStateSerializer::got_cell_db_reader(std::shared_ptr cell_db_reader) { + cell_db_reader_ = std::move(cell_db_reader); + running_ = false; + attempt_ = 0; + next_iteration(); +} + void AsyncStateSerializer::got_masterchain_handle(BlockHandle handle) { CHECK(!masterchain_handle_); masterchain_handle_ = std::move(handle); @@ -184,7 +205,7 @@ void AsyncStateSerializer::got_masterchain_handle(BlockHandle handle) { } void AsyncStateSerializer::got_masterchain_state(td::Ref state) { - LOG(INFO) << "serializing masterchain state " << masterchain_handle_->id().seqno(); + LOG(INFO) << "serializing masterchain state " << masterchain_handle_->id().id; have_masterchain_state_ = true; CHECK(next_idx_ == 0); CHECK(shards_.size() == 0); @@ -194,8 +215,8 @@ void AsyncStateSerializer::got_masterchain_state(td::Ref state shards_.push_back(v->top_block_id()); } - auto write_data = [state] (td::FileFd& fd) { - return state->serialize_to_file(fd); + auto write_data = [hash = state->root_cell()->get_hash(), cell_db_reader = cell_db_reader_] (td::FileFd& fd) { + return vm::std_boc_serialize_to_file_large(cell_db_reader, hash, fd, 31); }; auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result R) { R.ensure(); @@ -207,7 +228,7 @@ void AsyncStateSerializer::got_masterchain_state(td::Ref state } void AsyncStateSerializer::stored_masterchain_state() { - LOG(INFO) << "finished serializing masterchain state " << masterchain_handle_->id().seqno(); + LOG(INFO) << "finished serializing masterchain state " << masterchain_handle_->id().id; running_ = false; next_iteration(); } @@ -225,13 +246,13 @@ void AsyncStateSerializer::got_shard_handle(BlockHandle handle) { } void AsyncStateSerializer::got_shard_state(BlockHandle handle, td::Ref state) { - LOG(INFO) << "serializing shard state " << handle->id().seqno(); - auto write_data = [state] (td::FileFd& fd) { - return state->serialize_to_file(fd); + LOG(INFO) << "serializing shard state " << handle->id().id; + auto write_data = [hash = state->root_cell()->get_hash(), cell_db_reader = cell_db_reader_] (td::FileFd& fd) { + return vm::std_boc_serialize_to_file_large(cell_db_reader, hash, fd, 31); }; auto P = td::PromiseCreator::lambda([SelfId = actor_id(this), handle](td::Result R) { R.ensure(); - LOG(INFO) << "finished serializing shard state " << handle->id().seqno(); + LOG(INFO) << "finished serializing shard state " << handle->id().id; td::actor::send_closure(SelfId, &AsyncStateSerializer::success_handler); }); td::actor::send_closure(manager_, &ValidatorManager::store_persistent_state_file_gen, handle->id(), diff --git a/validator/state-serializer.hpp b/validator/state-serializer.hpp index 14261df7..e971a3f8 100644 --- a/validator/state-serializer.hpp +++ b/validator/state-serializer.hpp @@ -42,6 +42,7 @@ class AsyncStateSerializer : public td::actor::Actor { td::uint32 next_idx_ = 0; + std::shared_ptr cell_db_reader_ = nullptr; BlockHandle masterchain_handle_; bool have_masterchain_state_ = false; @@ -70,6 +71,7 @@ class AsyncStateSerializer : public td::actor::Actor { void next_iteration(); void got_top_masterchain_handle(BlockIdExt block_id); + void got_cell_db_reader(std::shared_ptr cell_db_reader); void got_masterchain_handle(BlockHandle handle_); void got_masterchain_state(td::Ref state); void stored_masterchain_state();