mirror of
https://github.com/ton-blockchain/ton
synced 2025-03-09 15:40:10 +00:00
Merge dfdb036ccd
into cf50b4b5da
This commit is contained in:
commit
b54ff05b3c
15 changed files with 343 additions and 83 deletions
|
@ -2315,10 +2315,13 @@ TEST(TonDb, LargeBocSerializer) {
|
|||
td::unlink(path).ignore();
|
||||
fd = td::FileFd::open(path, td::FileFd::Flags::Create | td::FileFd::Flags::Truncate | td::FileFd::Flags::Write)
|
||||
.move_as_ok();
|
||||
std_boc_serialize_to_file_large(dboc->get_cell_db_reader(), root->get_hash(), fd, 31);
|
||||
boc_serialize_to_file_large(dboc->get_cell_db_reader(), root->get_hash(), fd, 31);
|
||||
fd.close();
|
||||
auto b = td::read_file_str(path).move_as_ok();
|
||||
CHECK(a == b);
|
||||
|
||||
auto a_cell = vm::deserialize_boc(td::BufferSlice(a));
|
||||
auto b_cell = vm::deserialize_boc(td::BufferSlice(b));
|
||||
ASSERT_EQ(a_cell->get_hash(), b_cell->get_hash());
|
||||
}
|
||||
|
||||
TEST(TonDb, DoNotMakeListsPrunned) {
|
||||
|
|
|
@ -214,7 +214,7 @@ td::Result<int> BagOfCells::import_cell(td::Ref<vm::Cell> cell, int depth) {
|
|||
return td::Status::Error("error while importing a cell into a bag of cells: cell is null");
|
||||
}
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cell_processed());
|
||||
TRY_STATUS(logger_ptr_->on_cells_processed(1));
|
||||
}
|
||||
auto it = cells.find(cell->get_hash());
|
||||
if (it != cells.end()) {
|
||||
|
@ -560,7 +560,7 @@ td::Result<std::size_t> BagOfCells::serialize_to_impl(WriterT& writer, int mode)
|
|||
}
|
||||
store_offset(fixed_offset);
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cell_processed());
|
||||
TRY_STATUS(logger_ptr_->on_cells_processed(1));
|
||||
}
|
||||
}
|
||||
if (logger_ptr_) {
|
||||
|
@ -593,7 +593,7 @@ td::Result<std::size_t> BagOfCells::serialize_to_impl(WriterT& writer, int mode)
|
|||
}
|
||||
// std::cerr << std::endl;
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cell_processed());
|
||||
TRY_STATUS(logger_ptr_->on_cells_processed(1));
|
||||
}
|
||||
}
|
||||
writer.chk();
|
||||
|
|
|
@ -210,6 +210,7 @@ class BagOfCellsLogger {
|
|||
|
||||
void start_stage(std::string stage) {
|
||||
log_speed_at_ = td::Timestamp::in(LOG_SPEED_PERIOD);
|
||||
last_speed_log_ = td::Timestamp::now();
|
||||
processed_cells_ = 0;
|
||||
timer_ = {};
|
||||
stage_ = std::move(stage);
|
||||
|
@ -217,15 +218,19 @@ class BagOfCellsLogger {
|
|||
void finish_stage(td::Slice desc) {
|
||||
LOG(ERROR) << "serializer: " << stage_ << " took " << timer_.elapsed() << "s, " << desc;
|
||||
}
|
||||
td::Status on_cell_processed() {
|
||||
++processed_cells_;
|
||||
if (processed_cells_ % 1000 == 0) {
|
||||
td::Status on_cells_processed(size_t count) {
|
||||
processed_cells_ += count;
|
||||
if (processed_cells_ / 1000 > last_token_check_) {
|
||||
TRY_STATUS(cancellation_token_.check());
|
||||
last_token_check_ = processed_cells_ / 1000;
|
||||
}
|
||||
if (log_speed_at_.is_in_past()) {
|
||||
log_speed_at_ += LOG_SPEED_PERIOD;
|
||||
LOG(WARNING) << "serializer: " << stage_ << " " << (double)processed_cells_ / LOG_SPEED_PERIOD << " cells/s";
|
||||
double period = td::Timestamp::now().at() - last_speed_log_.at();
|
||||
|
||||
LOG(WARNING) << "serializer: " << stage_ << " " << (double)processed_cells_ / period << " cells/s";
|
||||
processed_cells_ = 0;
|
||||
last_speed_log_ = td::Timestamp::now();
|
||||
log_speed_at_ = td::Timestamp::in(LOG_SPEED_PERIOD);
|
||||
}
|
||||
return td::Status::OK();
|
||||
}
|
||||
|
@ -236,6 +241,8 @@ class BagOfCellsLogger {
|
|||
td::CancellationToken cancellation_token_;
|
||||
td::Timestamp log_speed_at_;
|
||||
size_t processed_cells_ = 0;
|
||||
size_t last_token_check_ = 0;
|
||||
td::Timestamp last_speed_log_;
|
||||
static constexpr double LOG_SPEED_PERIOD = 120.0;
|
||||
};
|
||||
class BagOfCells {
|
||||
|
@ -390,7 +397,7 @@ td::Result<td::BufferSlice> std_boc_serialize_multi(std::vector<Ref<Cell>> root,
|
|||
|
||||
td::Status std_boc_serialize_to_file(Ref<Cell> root, td::FileFd& fd, int mode = 0,
|
||||
td::CancellationToken cancellation_token = {});
|
||||
td::Status std_boc_serialize_to_file_large(std::shared_ptr<CellDbReader> reader, Cell::Hash root_hash, td::FileFd& fd,
|
||||
td::Status boc_serialize_to_file_large(std::shared_ptr<CellDbReader> reader, Cell::Hash root_hash, td::FileFd& fd,
|
||||
int mode = 0, td::CancellationToken cancellation_token = {});
|
||||
|
||||
} // namespace vm
|
||||
|
|
|
@ -166,6 +166,28 @@ td::Result<CellLoader::LoadResult> CellLoader::load(td::Slice hash, bool need_da
|
|||
return res;
|
||||
}
|
||||
|
||||
td::Result<std::vector<CellLoader::LoadResult>> CellLoader::load_bulk(td::Span<td::Slice> hashes, bool need_data,
|
||||
ExtCellCreator &ext_cell_creator) {
|
||||
std::vector<std::string> values;
|
||||
TRY_RESULT(get_statuses, reader_->get_multi(hashes, &values));
|
||||
std::vector<LoadResult> res;
|
||||
res.reserve(hashes.size());
|
||||
for (size_t i = 0; i < hashes.size(); i++) {
|
||||
auto get_status = get_statuses[i];
|
||||
if (get_status != KeyValue::GetStatus::Ok) {
|
||||
DCHECK(get_status == KeyValue::GetStatus::NotFound);
|
||||
res.push_back(LoadResult{});
|
||||
continue;
|
||||
}
|
||||
TRY_RESULT(load_res, load(hashes[i], values[i], need_data, ext_cell_creator));
|
||||
if (on_load_callback_) {
|
||||
on_load_callback_(load_res);
|
||||
}
|
||||
res.push_back(std::move(load_res));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
td::Result<CellLoader::LoadResult> CellLoader::load(td::Slice hash, td::Slice value, bool need_data,
|
||||
ExtCellCreator &ext_cell_creator) {
|
||||
LoadResult res;
|
||||
|
|
|
@ -49,6 +49,7 @@ class CellLoader {
|
|||
};
|
||||
CellLoader(std::shared_ptr<KeyValueReader> reader, std::function<void(const LoadResult &)> on_load_callback = {});
|
||||
td::Result<LoadResult> load(td::Slice hash, bool need_data, ExtCellCreator &ext_cell_creator);
|
||||
td::Result<std::vector<LoadResult>> load_bulk(td::Span<td::Slice> hashes, bool need_data, ExtCellCreator &ext_cell_creator);
|
||||
static td::Result<LoadResult> load(td::Slice hash, td::Slice value, bool need_data, ExtCellCreator &ext_cell_creator);
|
||||
td::Result<LoadResult> load_refcnt(td::Slice hash); // This only loads refcnt_, cell_ == null
|
||||
|
||||
|
|
|
@ -100,22 +100,15 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
|
|||
return get_cell_info_lazy(level_mask, hash, depth).cell;
|
||||
}
|
||||
td::Result<Ref<DataCell>> load_cell(td::Slice hash) override {
|
||||
auto info = hash_table_.get_if_exists(hash);
|
||||
if (info && info->sync_with_db) {
|
||||
TRY_RESULT(loaded_cell, info->cell->load_cell());
|
||||
return std::move(loaded_cell.data_cell);
|
||||
}
|
||||
TRY_RESULT(res, loader_->load(hash, true, *this));
|
||||
if (res.status != CellLoader::LoadResult::Ok) {
|
||||
return td::Status::Error("cell not found");
|
||||
}
|
||||
Ref<DataCell> cell = res.cell();
|
||||
hash_table_.apply(hash, [&](CellInfo &info) { update_cell_info_loaded(info, hash, std::move(res)); });
|
||||
return cell;
|
||||
TRY_RESULT(loaded_cell, get_cell_info_force(hash).cell->load_cell());
|
||||
return std::move(loaded_cell.data_cell);
|
||||
}
|
||||
td::Result<Ref<DataCell>> load_root(td::Slice hash) override {
|
||||
return load_cell(hash);
|
||||
}
|
||||
td::Result<std::vector<Ref<DataCell>>> load_bulk(td::Span<td::Slice> hashes) override {
|
||||
return td::Status::Error("Not implemented");
|
||||
}
|
||||
td::Result<Ref<DataCell>> load_root_thread_safe(td::Slice hash) const override {
|
||||
return td::Status::Error("Not implemented");
|
||||
}
|
||||
|
@ -155,6 +148,9 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
|
|||
promise->set_result(std::move(cell));
|
||||
});
|
||||
}
|
||||
CellInfo &get_cell_info_force(td::Slice hash) {
|
||||
return hash_table_.apply(hash, [&](CellInfo &info) { update_cell_info_force(info, hash); });
|
||||
}
|
||||
CellInfo &get_cell_info_lazy(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) {
|
||||
return hash_table_.apply(hash.substr(hash.size() - Cell::hash_bytes),
|
||||
[&](CellInfo &info) { update_cell_info_lazy(info, level_mask, hash, depth); });
|
||||
|
@ -334,6 +330,23 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
|
|||
return std::move(load_result.cell());
|
||||
}
|
||||
|
||||
td::Result<std::vector<Ref<DataCell>>> load_bulk(td::Span<td::Slice> hashes) override {
|
||||
if (db_) {
|
||||
return db_->load_bulk(hashes);
|
||||
}
|
||||
TRY_RESULT(load_result, cell_loader_->load_bulk(hashes, true, *this));
|
||||
|
||||
std::vector<Ref<DataCell>> res;
|
||||
res.reserve(load_result.size());
|
||||
for (auto &load_res : load_result) {
|
||||
if (load_res.status != CellLoader::LoadResult::Ok) {
|
||||
return td::Status::Error("cell not found");
|
||||
}
|
||||
res.push_back(std::move(load_res.cell()));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
static td::NamedThreadSafeCounter::CounterRef get_thread_safe_counter() {
|
||||
static auto res = td::NamedThreadSafeCounter::get_default().get_counter("DynamicBagOfCellsDbLoader");
|
||||
|
|
|
@ -44,12 +44,14 @@ class CellDbReader {
|
|||
public:
|
||||
virtual ~CellDbReader() = default;
|
||||
virtual td::Result<Ref<DataCell>> load_cell(td::Slice hash) = 0;
|
||||
virtual td::Result<std::vector<Ref<DataCell>>> load_bulk(td::Span<td::Slice> hashes) = 0;
|
||||
};
|
||||
|
||||
class DynamicBagOfCellsDb {
|
||||
public:
|
||||
virtual ~DynamicBagOfCellsDb() = default;
|
||||
virtual td::Result<Ref<DataCell>> load_cell(td::Slice hash) = 0;
|
||||
virtual td::Result<std::vector<Ref<DataCell>>> load_bulk(td::Span<td::Slice> hashes) = 0;
|
||||
virtual td::Result<Ref<DataCell>> load_root(td::Slice hash) = 0;
|
||||
virtual td::Result<Ref<DataCell>> load_root_thread_safe(td::Slice hash) const = 0;
|
||||
struct Stats {
|
||||
|
|
|
@ -461,6 +461,16 @@ class CellStorage {
|
|||
return td::Status::Error("not found");
|
||||
}
|
||||
|
||||
td::Result<std::vector<Ref<DataCell>>> load_bulk(td::Span<CellHash> hashes) const {
|
||||
std::vector<Ref<DataCell>> res;
|
||||
res.reserve(hashes.size());
|
||||
for (auto &hash : hashes) {
|
||||
TRY_RESULT(cell, load_cell(hash));
|
||||
res.push_back(std::move(cell));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
td::Result<Ref<DataCell>> load_root_local(const CellHash &hash) const {
|
||||
auto lock = local_access_.lock();
|
||||
if (auto it = local_roots_.find(hash); it != local_roots_.end()) {
|
||||
|
@ -769,6 +779,9 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb {
|
|||
td::Result<Ref<DataCell>> load_root(td::Slice hash) override {
|
||||
return storage_->load_root_local(CellHash::from_slice(hash));
|
||||
}
|
||||
td::Result<std::vector<Ref<DataCell>>> load_bulk(td::Span<td::Slice> hashes) override {
|
||||
return storage_->load_bulk(td::transform(hashes, [](auto &hash) { return CellHash::from_slice(hash); }));
|
||||
}
|
||||
td::Result<Ref<DataCell>> load_root_thread_safe(td::Slice hash) const override {
|
||||
return storage_->load_root_shared(CellHash::from_slice(hash));
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ namespace {
|
|||
class LargeBocSerializer {
|
||||
public:
|
||||
using Hash = Cell::Hash;
|
||||
constexpr static int load_batch_size = 4'000'000;
|
||||
|
||||
explicit LargeBocSerializer(std::shared_ptr<CellDbReader> reader) : reader(std::move(reader)) {
|
||||
}
|
||||
|
@ -46,7 +47,6 @@ class LargeBocSerializer {
|
|||
private:
|
||||
std::shared_ptr<CellDbReader> reader;
|
||||
struct CellInfo {
|
||||
Cell::Hash hash;
|
||||
std::array<int, 4> ref_idx;
|
||||
int idx;
|
||||
unsigned short serialized_size;
|
||||
|
@ -95,6 +95,8 @@ void LargeBocSerializer::add_root(Hash root) {
|
|||
roots.emplace_back(root, -1);
|
||||
}
|
||||
|
||||
// Unlike crypto/vm/boc.cpp this implementation does not load all cells into memory
|
||||
// and traverses them in BFS order to utilize bulk load of cells on the same level.
|
||||
td::Status LargeBocSerializer::import_cells() {
|
||||
if (logger_ptr_) {
|
||||
logger_ptr_->start_stage("import_cells");
|
||||
|
@ -111,46 +113,124 @@ td::Status LargeBocSerializer::import_cells() {
|
|||
return td::Status::OK();
|
||||
}
|
||||
|
||||
td::Result<int> LargeBocSerializer::import_cell(Hash hash, int depth) {
|
||||
if (depth > Cell::max_depth) {
|
||||
return td::Status::Error("error while importing a cell into a bag of cells: cell depth too large");
|
||||
td::Result<int> LargeBocSerializer::import_cell(Hash root_hash, int root_depth) {
|
||||
const int start_ind = cell_count;
|
||||
td::HashMap<Hash, std::pair<int, bool>> current_depth_hashes;
|
||||
|
||||
auto existing_it = cells.find(root_hash);
|
||||
if (existing_it != cells.end()) {
|
||||
existing_it->second.should_cache = true;
|
||||
} else {
|
||||
current_depth_hashes.emplace(root_hash, std::make_pair(cell_count, false));
|
||||
}
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cell_processed());
|
||||
int current_depth = root_depth;
|
||||
int next_child_idx = cell_count + 1;
|
||||
while (!current_depth_hashes.empty()) {
|
||||
if (current_depth > Cell::max_depth) {
|
||||
return td::Status::Error("error while importing a cell into a bag of cells: cell depth too large");
|
||||
}
|
||||
|
||||
cell_list.resize(cell_list.size() + current_depth_hashes.size());
|
||||
td::HashMap<Hash, std::pair<int, bool>> next_depth_hashes;
|
||||
auto batch_start = current_depth_hashes.begin();
|
||||
while (batch_start != current_depth_hashes.end()) {
|
||||
std::vector<td::Slice> batch_hashes;
|
||||
batch_hashes.reserve(load_batch_size);
|
||||
std::vector<std::pair<int, bool>*> batch_idxs_should_cache;
|
||||
batch_idxs_should_cache.reserve(load_batch_size);
|
||||
|
||||
while (batch_hashes.size() < load_batch_size && batch_start != current_depth_hashes.end()) {
|
||||
batch_hashes.push_back(batch_start->first.as_slice());
|
||||
batch_idxs_should_cache.push_back(&batch_start->second);
|
||||
++batch_start;
|
||||
}
|
||||
|
||||
TRY_RESULT_PREFIX(loaded_results, reader->load_bulk(batch_hashes),
|
||||
"error while importing a cell into a bag of cells: ");
|
||||
DCHECK(loaded_results.size() == batch_hashes.size());
|
||||
|
||||
for (size_t i = 0; i < loaded_results.size(); ++i) {
|
||||
auto& cell = loaded_results[i];
|
||||
|
||||
if (cell->get_virtualization() != 0) {
|
||||
return td::Status::Error(
|
||||
"error while importing a cell into a bag of cells: cell has non-zero virtualization level");
|
||||
}
|
||||
|
||||
const auto hash = cell->get_hash();
|
||||
CellSlice cs(std::move(cell));
|
||||
|
||||
DCHECK(cs.size_refs() <= 4);
|
||||
std::array<int, 4> refs{-1, -1, -1, -1};
|
||||
for (unsigned j = 0; j < cs.size_refs(); j++) {
|
||||
auto child = cs.prefetch_ref(j);
|
||||
const auto child_hash = child->get_hash();
|
||||
|
||||
auto existing_global_it = cells.find(child_hash);
|
||||
if (existing_global_it != cells.end()) {
|
||||
existing_global_it->second.should_cache = true;
|
||||
refs[j] = existing_global_it->second.idx;
|
||||
continue;
|
||||
}
|
||||
auto current_depth_it = current_depth_hashes.find(child_hash);
|
||||
if (current_depth_it != current_depth_hashes.end()) {
|
||||
current_depth_it->second.second = true;
|
||||
refs[j] = current_depth_it->second.first;
|
||||
continue;
|
||||
}
|
||||
auto next_depth_it = next_depth_hashes.find(child_hash);
|
||||
if (next_depth_it != next_depth_hashes.end()) {
|
||||
next_depth_it->second.second = true;
|
||||
refs[j] = next_depth_it->second.first;
|
||||
continue;
|
||||
}
|
||||
auto res = next_depth_hashes.emplace(child_hash, std::make_pair(next_child_idx, false));
|
||||
refs[j] = next_child_idx++;
|
||||
}
|
||||
|
||||
auto dc = cs.move_as_loaded_cell().data_cell;
|
||||
auto idx_should_cache = batch_idxs_should_cache[i];
|
||||
auto res = cells.emplace(hash, CellInfo(idx_should_cache->first, std::move(refs)));
|
||||
DCHECK(res.second);
|
||||
cell_list[idx_should_cache->first] = &*res.first;
|
||||
CellInfo& dc_info = res.first->second;
|
||||
dc_info.should_cache = idx_should_cache->second;
|
||||
dc_info.hcnt = static_cast<unsigned char>(dc->get_level_mask().get_hashes_count());
|
||||
DCHECK(dc_info.hcnt <= 4);
|
||||
dc_info.wt = 0; // will be calculated after traversing
|
||||
TRY_RESULT(serialized_size, td::narrow_cast_safe<unsigned short>(dc->get_serialized_size()));
|
||||
data_bytes += dc_info.serialized_size = serialized_size;
|
||||
cell_count++;
|
||||
}
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cells_processed(batch_hashes.size()));
|
||||
}
|
||||
}
|
||||
|
||||
current_depth_hashes = std::move(next_depth_hashes);
|
||||
next_depth_hashes.clear();
|
||||
current_depth++;
|
||||
}
|
||||
auto it = cells.find(hash);
|
||||
if (it != cells.end()) {
|
||||
it->second.should_cache = true;
|
||||
return it->second.idx;
|
||||
DCHECK(next_child_idx == cell_count);
|
||||
|
||||
for (int idx = cell_count - 1; idx >= start_ind; --idx) {
|
||||
CellInfo& cell_info = cell_list[idx]->second;
|
||||
|
||||
unsigned sum_child_wt = 1;
|
||||
for (size_t j = 0; j < cell_info.ref_idx.size(); ++j) {
|
||||
int child_idx = cell_info.ref_idx[j];
|
||||
if (child_idx == -1) {
|
||||
continue;
|
||||
}
|
||||
sum_child_wt += cell_list[child_idx]->second.wt;
|
||||
++int_refs;
|
||||
}
|
||||
cell_info.wt = static_cast<unsigned char>(std::min(0xffU, sum_child_wt));
|
||||
}
|
||||
TRY_RESULT(cell, reader->load_cell(hash.as_slice()));
|
||||
if (cell->get_virtualization() != 0) {
|
||||
return td::Status::Error(
|
||||
"error while importing a cell into a bag of cells: cell has non-zero virtualization level");
|
||||
}
|
||||
CellSlice cs(std::move(cell));
|
||||
std::array<int, 4> refs;
|
||||
std::fill(refs.begin(), refs.end(), -1);
|
||||
DCHECK(cs.size_refs() <= 4);
|
||||
unsigned sum_child_wt = 1;
|
||||
for (unsigned i = 0; i < cs.size_refs(); i++) {
|
||||
TRY_RESULT(ref, import_cell(cs.prefetch_ref(i)->get_hash(), depth + 1));
|
||||
refs[i] = ref;
|
||||
sum_child_wt += cell_list[ref]->second.wt;
|
||||
++int_refs;
|
||||
}
|
||||
auto dc = cs.move_as_loaded_cell().data_cell;
|
||||
auto res = cells.emplace(hash, CellInfo(cell_count, refs));
|
||||
DCHECK(res.second);
|
||||
cell_list.push_back(&*res.first);
|
||||
CellInfo& dc_info = res.first->second;
|
||||
dc_info.wt = (unsigned char)std::min(0xffU, sum_child_wt);
|
||||
unsigned hcnt = dc->get_level_mask().get_hashes_count();
|
||||
DCHECK(hcnt <= 4);
|
||||
dc_info.hcnt = (unsigned char)hcnt;
|
||||
TRY_RESULT(serialized_size, td::narrow_cast_safe<unsigned short>(dc->get_serialized_size()));
|
||||
data_bytes += dc_info.serialized_size = serialized_size;
|
||||
return cell_count++;
|
||||
|
||||
auto root_it = cells.find(root_hash);
|
||||
DCHECK(root_it != cells.end());
|
||||
return root_it->second.idx;
|
||||
}
|
||||
|
||||
void LargeBocSerializer::reorder_cells() {
|
||||
|
@ -386,7 +466,7 @@ td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
|
|||
}
|
||||
store_offset(fixed_offset);
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cell_processed());
|
||||
TRY_STATUS(logger_ptr_->on_cells_processed(1));
|
||||
}
|
||||
}
|
||||
DCHECK(offs == info.data_size);
|
||||
|
@ -399,26 +479,42 @@ td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
|
|||
if (logger_ptr_) {
|
||||
logger_ptr_->start_stage("serialize");
|
||||
}
|
||||
for (int i = 0; i < cell_count; ++i) {
|
||||
auto hash = cell_list[cell_count - 1 - i]->first;
|
||||
const auto& dc_info = cell_list[cell_count - 1 - i]->second;
|
||||
TRY_RESULT(dc, reader->load_cell(hash.as_slice()));
|
||||
bool with_hash = (mode & Mode::WithIntHashes) && !dc_info.wt;
|
||||
if (dc_info.is_root_cell && (mode & Mode::WithTopHash)) {
|
||||
with_hash = true;
|
||||
for (int batch_start = 0; batch_start < cell_count; batch_start += load_batch_size) {
|
||||
int batch_end = std::min(batch_start + static_cast<int>(load_batch_size), cell_count);
|
||||
|
||||
std::vector<td::Slice> batch_hashes;
|
||||
batch_hashes.reserve(batch_end - batch_start);
|
||||
for (int i = batch_start; i < batch_end; ++i) {
|
||||
int cell_index = cell_count - 1 - i;
|
||||
batch_hashes.push_back(cell_list[cell_index]->first.as_slice());
|
||||
}
|
||||
unsigned char buf[256];
|
||||
int s = dc->serialize(buf, 256, with_hash);
|
||||
writer.store_bytes(buf, s);
|
||||
DCHECK(dc->size_refs() == dc_info.get_ref_num());
|
||||
unsigned ref_num = dc_info.get_ref_num();
|
||||
for (unsigned j = 0; j < ref_num; ++j) {
|
||||
int k = cell_count - 1 - dc_info.ref_idx[j];
|
||||
DCHECK(k > i && k < cell_count);
|
||||
store_ref(k);
|
||||
|
||||
TRY_RESULT(batch_cells, reader->load_bulk(std::move(batch_hashes)));
|
||||
|
||||
for (int i = batch_start; i < batch_end; ++i) {
|
||||
int idx_in_batch = i - batch_start;
|
||||
int cell_index = cell_count - 1 - i;
|
||||
|
||||
const auto& dc_info = cell_list[cell_index]->second;
|
||||
auto& dc = batch_cells[idx_in_batch];
|
||||
|
||||
bool with_hash = (mode & Mode::WithIntHashes) && !dc_info.wt;
|
||||
if (dc_info.is_root_cell && (mode & Mode::WithTopHash)) {
|
||||
with_hash = true;
|
||||
}
|
||||
unsigned char buf[256];
|
||||
int s = dc->serialize(buf, 256, with_hash);
|
||||
writer.store_bytes(buf, s);
|
||||
DCHECK(dc->size_refs() == dc_info.get_ref_num());
|
||||
unsigned ref_num = dc_info.get_ref_num();
|
||||
for (unsigned j = 0; j < ref_num; ++j) {
|
||||
int k = cell_count - 1 - dc_info.ref_idx[j];
|
||||
DCHECK(k > i && k < cell_count);
|
||||
store_ref(k);
|
||||
}
|
||||
}
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cell_processed());
|
||||
TRY_STATUS(logger_ptr_->on_cells_processed(batch_hashes.size()));
|
||||
}
|
||||
}
|
||||
DCHECK(writer.position() - keep_position == info.data_size);
|
||||
|
@ -435,7 +531,7 @@ td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
|
|||
}
|
||||
} // namespace
|
||||
|
||||
td::Status std_boc_serialize_to_file_large(std::shared_ptr<CellDbReader> reader, Cell::Hash root_hash, td::FileFd& fd,
|
||||
td::Status boc_serialize_to_file_large(std::shared_ptr<CellDbReader> reader, Cell::Hash root_hash, td::FileFd& fd,
|
||||
int mode, td::CancellationToken cancellation_token) {
|
||||
td::Timer timer;
|
||||
CHECK(reader != nullptr)
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "td/utils/Status.h"
|
||||
#include "td/utils/Time.h"
|
||||
#include "td/utils/logging.h"
|
||||
#include "td/utils/Span.h"
|
||||
#include <functional>
|
||||
namespace td {
|
||||
class KeyValueReader {
|
||||
|
@ -28,6 +29,7 @@ class KeyValueReader {
|
|||
enum class GetStatus : int32 { Ok, NotFound };
|
||||
|
||||
virtual Result<GetStatus> get(Slice key, std::string &value) = 0;
|
||||
virtual Result<std::vector<GetStatus>> get_multi(td::Span<Slice> keys, std::vector<std::string> *values) = 0;
|
||||
virtual Result<size_t> count(Slice prefix) = 0;
|
||||
virtual Status for_each(std::function<Status(Slice, Slice)> f) {
|
||||
return Status::Error("for_each is not supported");
|
||||
|
@ -45,6 +47,14 @@ class PrefixedKeyValueReader : public KeyValueReader {
|
|||
Result<GetStatus> get(Slice key, std::string &value) override {
|
||||
return reader_->get(PSLICE() << prefix_ << key, value);
|
||||
}
|
||||
Result<std::vector<GetStatus>> get_multi(td::Span<Slice> keys, std::vector<std::string> *values) override {
|
||||
std::vector<Slice> prefixed_keys;
|
||||
prefixed_keys.reserve(keys.size());
|
||||
for (auto &key : keys) {
|
||||
prefixed_keys.push_back(PSLICE() << prefix_ << key);
|
||||
}
|
||||
return reader_->get_multi(prefixed_keys, values);
|
||||
}
|
||||
Result<size_t> count(Slice prefix) override {
|
||||
return reader_->count(PSLICE() << prefix_ << prefix);
|
||||
}
|
||||
|
@ -88,6 +98,14 @@ class PrefixedKeyValue : public KeyValue {
|
|||
Result<GetStatus> get(Slice key, std::string &value) override {
|
||||
return kv_->get(PSLICE() << prefix_ << key, value);
|
||||
}
|
||||
Result<std::vector<GetStatus>> get_multi(td::Span<Slice> keys, std::vector<std::string> *values) override {
|
||||
std::vector<Slice> prefixed_keys;
|
||||
prefixed_keys.reserve(keys.size());
|
||||
for (auto &key : keys) {
|
||||
prefixed_keys.push_back(PSLICE() << prefix_ << key);
|
||||
}
|
||||
return kv_->get_multi(prefixed_keys, values);
|
||||
}
|
||||
Result<size_t> count(Slice prefix) override {
|
||||
return kv_->count(PSLICE() << prefix_ << prefix);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "td/db/MemoryKeyValue.h"
|
||||
|
||||
#include "td/utils/format.h"
|
||||
#include "td/utils/Span.h"
|
||||
|
||||
namespace td {
|
||||
Result<MemoryKeyValue::GetStatus> MemoryKeyValue::get(Slice key, std::string &value) {
|
||||
|
@ -30,6 +31,23 @@ Result<MemoryKeyValue::GetStatus> MemoryKeyValue::get(Slice key, std::string &va
|
|||
return GetStatus::Ok;
|
||||
}
|
||||
|
||||
Result<std::vector<MemoryKeyValue::GetStatus>> MemoryKeyValue::get_multi(td::Span<Slice> keys,
|
||||
std::vector<std::string> *values) {
|
||||
values->resize(keys.size());
|
||||
std::vector<GetStatus> res;
|
||||
for (size_t i = 0; i < keys.size(); i++) {
|
||||
auto it = map_.find(keys[i]);
|
||||
if (it == map_.end()) {
|
||||
res.push_back(GetStatus::NotFound);
|
||||
(*values)[i] = "";
|
||||
} else {
|
||||
res.push_back(GetStatus::Ok);
|
||||
(*values)[i] = it->second;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
Status MemoryKeyValue::for_each(std::function<Status(Slice, Slice)> f) {
|
||||
for (auto &it : map_) {
|
||||
TRY_STATUS(f(it.first, it.second));
|
||||
|
|
|
@ -25,6 +25,7 @@ namespace td {
|
|||
class MemoryKeyValue : public KeyValue {
|
||||
public:
|
||||
Result<GetStatus> get(Slice key, std::string &value) override;
|
||||
Result<std::vector<GetStatus>> get_multi(td::Span<Slice> keys, std::vector<std::string> *values) override;
|
||||
Status for_each(std::function<Status(Slice, Slice)> f) override;
|
||||
Status for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) override;
|
||||
Status set(Slice key, Slice value) override;
|
||||
|
|
|
@ -153,6 +153,40 @@ Result<RocksDb::GetStatus> RocksDb::get(Slice key, std::string &value) {
|
|||
return from_rocksdb(status);
|
||||
}
|
||||
|
||||
Result<std::vector<RocksDb::GetStatus>> RocksDb::get_multi(td::Span<Slice> keys, std::vector<std::string> *values) {
|
||||
std::vector<rocksdb::Status> statuses(keys.size());
|
||||
std::vector<rocksdb::Slice> keys_rocksdb;
|
||||
keys_rocksdb.reserve(keys.size());
|
||||
for (auto &key : keys) {
|
||||
keys_rocksdb.push_back(to_rocksdb(key));
|
||||
}
|
||||
std::vector<rocksdb::PinnableSlice> values_rocksdb(keys.size());
|
||||
rocksdb::ReadOptions options;
|
||||
if (snapshot_) {
|
||||
options.snapshot = snapshot_.get();
|
||||
db_->MultiGet(options, db_->DefaultColumnFamily(), keys_rocksdb.size(), keys_rocksdb.data(), values_rocksdb.data(), statuses.data());
|
||||
} else if (transaction_) {
|
||||
transaction_->MultiGet(options, db_->DefaultColumnFamily(), keys_rocksdb.size(), keys_rocksdb.data(), values_rocksdb.data(), statuses.data());
|
||||
} else {
|
||||
db_->MultiGet(options, db_->DefaultColumnFamily(), keys_rocksdb.size(), keys_rocksdb.data(), values_rocksdb.data(), statuses.data());
|
||||
}
|
||||
std::vector<GetStatus> res(statuses.size());
|
||||
values->resize(statuses.size());
|
||||
for (size_t i = 0; i < statuses.size(); i++) {
|
||||
auto &status = statuses[i];
|
||||
if (status.ok()) {
|
||||
res[i] = GetStatus::Ok;
|
||||
values->at(i) = values_rocksdb[i].ToString();
|
||||
} else if (status.code() == rocksdb::Status::kNotFound) {
|
||||
res[i] = GetStatus::NotFound;
|
||||
values->at(i) = "";
|
||||
} else {
|
||||
return from_rocksdb(status);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
Status RocksDb::set(Slice key, Slice value) {
|
||||
if (write_batch_) {
|
||||
return from_rocksdb(write_batch_->Put(to_rocksdb(key), to_rocksdb(value)));
|
||||
|
|
|
@ -72,6 +72,7 @@ class RocksDb : public KeyValue {
|
|||
static Result<RocksDb> open(std::string path, RocksDbOptions options = {});
|
||||
|
||||
Result<GetStatus> get(Slice key, std::string &value) override;
|
||||
Result<std::vector<RocksDb::GetStatus>> get_multi(td::Span<Slice> keys, std::vector<std::string> *values) override;
|
||||
Status set(Slice key, Slice value) override;
|
||||
Status erase(Slice key) override;
|
||||
Result<size_t> count(Slice prefix) override;
|
||||
|
|
|
@ -282,8 +282,38 @@ class CachedCellDbReader : public vm::CellDbReader {
|
|||
}
|
||||
return parent_->load_cell(hash);
|
||||
}
|
||||
td::Result<std::vector<Ref<vm::DataCell>>> load_bulk(td::Span<td::Slice> hashes) override {
|
||||
total_reqs_ += hashes.size();
|
||||
if (!cache_) {
|
||||
++bulk_reqs_;
|
||||
return parent_->load_bulk(hashes);
|
||||
}
|
||||
std::vector<td::Slice> missing_hashes;
|
||||
std::vector<size_t> missing_indices;
|
||||
std::vector<td::Ref<vm::DataCell>> res(hashes.size());
|
||||
for (size_t i = 0; i < hashes.size(); i++) {
|
||||
auto it = cache_->find(hashes[i]);
|
||||
if (it != cache_->end()) {
|
||||
++cached_reqs_;
|
||||
TRY_RESULT(loaded_cell, (*it)->load_cell());
|
||||
res[i] = loaded_cell.data_cell;
|
||||
continue;
|
||||
}
|
||||
missing_hashes.push_back(hashes[i]);
|
||||
missing_indices.push_back(i);
|
||||
}
|
||||
if (missing_hashes.empty()) {
|
||||
return std::move(res);
|
||||
}
|
||||
TRY_RESULT(missing_cells, parent_->load_bulk(missing_hashes));
|
||||
for (size_t i = 0; i < missing_indices.size(); i++) {
|
||||
res[missing_indices[i]] = missing_cells[i];
|
||||
}
|
||||
return res;
|
||||
};
|
||||
void print_stats() const {
|
||||
LOG(WARNING) << "CachedCellDbReader stats : " << total_reqs_ << " reads, " << cached_reqs_ << " cached";
|
||||
LOG(WARNING) << "CachedCellDbReader stats : " << total_reqs_ << " reads, " << cached_reqs_ << " cached, "
|
||||
<< bulk_reqs_ << " bulk reqs";
|
||||
}
|
||||
private:
|
||||
std::shared_ptr<vm::CellDbReader> parent_;
|
||||
|
@ -291,6 +321,7 @@ class CachedCellDbReader : public vm::CellDbReader {
|
|||
|
||||
td::uint64 total_reqs_ = 0;
|
||||
td::uint64 cached_reqs_ = 0;
|
||||
td::uint64 bulk_reqs_ = 0;
|
||||
};
|
||||
|
||||
void AsyncStateSerializer::PreviousStateCache::prepare_cache(ShardIdFull shard) {
|
||||
|
@ -373,7 +404,7 @@ void AsyncStateSerializer::got_masterchain_state(td::Ref<MasterchainState> state
|
|||
previous_state_cache->prepare_cache(shard);
|
||||
}
|
||||
auto new_cell_db_reader = std::make_shared<CachedCellDbReader>(cell_db_reader, previous_state_cache->cache);
|
||||
auto res = vm::std_boc_serialize_to_file_large(new_cell_db_reader, root->get_hash(), fd, 31, std::move(cancellation_token));
|
||||
auto res = vm::boc_serialize_to_file_large(new_cell_db_reader, root->get_hash(), fd, 31, std::move(cancellation_token));
|
||||
new_cell_db_reader->print_stats();
|
||||
return res;
|
||||
};
|
||||
|
@ -443,7 +474,7 @@ void AsyncStateSerializer::got_shard_state(BlockHandle handle, td::Ref<ShardStat
|
|||
previous_state_cache->prepare_cache(shard);
|
||||
}
|
||||
auto new_cell_db_reader = std::make_shared<CachedCellDbReader>(cell_db_reader, previous_state_cache->cache);
|
||||
auto res = vm::std_boc_serialize_to_file_large(new_cell_db_reader, root->get_hash(), fd, 31, std::move(cancellation_token));
|
||||
auto res = vm::boc_serialize_to_file_large(new_cell_db_reader, root->get_hash(), fd, 31, std::move(cancellation_token));
|
||||
new_cell_db_reader->print_stats();
|
||||
return res;
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue