mirror of
https://github.com/ton-blockchain/ton
synced 2025-03-09 15:40:10 +00:00
serialize boc with bfs (with multiget)
This commit is contained in:
parent
3c245c6146
commit
fbb9954391
14 changed files with 448 additions and 122 deletions
|
@ -32,6 +32,7 @@ namespace {
|
|||
class LargeBocSerializer {
|
||||
public:
|
||||
using Hash = Cell::Hash;
|
||||
constexpr static int load_batch_size = 4'000'000;
|
||||
|
||||
explicit LargeBocSerializer(std::shared_ptr<CellDbReader> reader) : reader(std::move(reader)) {
|
||||
}
|
||||
|
@ -46,7 +47,6 @@ class LargeBocSerializer {
|
|||
private:
|
||||
std::shared_ptr<CellDbReader> reader;
|
||||
struct CellInfo {
|
||||
Cell::Hash hash;
|
||||
std::array<int, 4> ref_idx;
|
||||
int idx;
|
||||
unsigned short serialized_size;
|
||||
|
@ -111,46 +111,124 @@ td::Status LargeBocSerializer::import_cells() {
|
|||
return td::Status::OK();
|
||||
}
|
||||
|
||||
td::Result<int> LargeBocSerializer::import_cell(Hash hash, int depth) {
|
||||
if (depth > Cell::max_depth) {
|
||||
return td::Status::Error("error while importing a cell into a bag of cells: cell depth too large");
|
||||
td::Result<int> LargeBocSerializer::import_cell(Hash root_hash, int root_depth) {
|
||||
const int start_ind = cell_count;
|
||||
td::HashMap<Hash, std::pair<int, bool>> current_depth_hashes;
|
||||
|
||||
auto existing_it = cells.find(root_hash);
|
||||
if (existing_it != cells.end()) {
|
||||
existing_it->second.should_cache = true;
|
||||
} else {
|
||||
current_depth_hashes.emplace(root_hash, std::make_pair(cell_count, false));
|
||||
}
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cell_processed());
|
||||
int current_depth = root_depth;
|
||||
int next_child_idx = cell_count + 1;
|
||||
while (!current_depth_hashes.empty()) {
|
||||
if (current_depth > Cell::max_depth) {
|
||||
return td::Status::Error("error while importing a cell into a bag of cells: cell depth too large");
|
||||
}
|
||||
|
||||
cell_list.resize(cell_list.size() + current_depth_hashes.size());
|
||||
td::HashMap<Hash, std::pair<int, bool>> next_depth_hashes;
|
||||
auto batch_start = current_depth_hashes.begin();
|
||||
while (batch_start != current_depth_hashes.end()) {
|
||||
std::vector<td::Slice> batch_hashes;
|
||||
batch_hashes.reserve(load_batch_size);
|
||||
std::vector<std::pair<int, bool>*> batch_idxs_should_cache;
|
||||
batch_idxs_should_cache.reserve(load_batch_size);
|
||||
|
||||
while (batch_hashes.size() < load_batch_size && batch_start != current_depth_hashes.end()) {
|
||||
batch_hashes.push_back(batch_start->first.as_slice());
|
||||
batch_idxs_should_cache.push_back(&batch_start->second);
|
||||
++batch_start;
|
||||
}
|
||||
|
||||
TRY_RESULT_PREFIX(loaded_results, reader->load_bulk(batch_hashes),
|
||||
"error while importing a cell into a bag of cells: ");
|
||||
DCHECK(loaded_results.size() == batch_hashes.size());
|
||||
|
||||
for (size_t i = 0; i < loaded_results.size(); ++i) {
|
||||
auto& cell = loaded_results[i];
|
||||
|
||||
if (cell->get_virtualization() != 0) {
|
||||
return td::Status::Error(
|
||||
"error while importing a cell into a bag of cells: cell has non-zero virtualization level");
|
||||
}
|
||||
|
||||
const auto hash = cell->get_hash();
|
||||
CellSlice cs(std::move(cell));
|
||||
|
||||
DCHECK(cs.size_refs() <= 4);
|
||||
std::array<int, 4> refs{-1, -1, -1, -1};
|
||||
for (unsigned j = 0; j < cs.size_refs(); j++) {
|
||||
auto child = cs.prefetch_ref(j);
|
||||
const auto child_hash = child->get_hash();
|
||||
|
||||
auto existing_global_it = cells.find(child_hash);
|
||||
if (existing_global_it != cells.end()) {
|
||||
existing_global_it->second.should_cache = true;
|
||||
refs[j] = existing_global_it->second.idx;
|
||||
continue;
|
||||
}
|
||||
auto current_depth_it = current_depth_hashes.find(child_hash);
|
||||
if (current_depth_it != current_depth_hashes.end()) {
|
||||
current_depth_it->second.second = true;
|
||||
refs[j] = current_depth_it->second.first;
|
||||
continue;
|
||||
}
|
||||
auto next_depth_it = next_depth_hashes.find(child_hash);
|
||||
if (next_depth_it != next_depth_hashes.end()) {
|
||||
next_depth_it->second.second = true;
|
||||
refs[j] = next_depth_it->second.first;
|
||||
continue;
|
||||
}
|
||||
auto res = next_depth_hashes.emplace(child_hash, std::make_pair(next_child_idx, false));
|
||||
refs[j] = next_child_idx++;
|
||||
}
|
||||
|
||||
auto dc = cs.move_as_loaded_cell().data_cell;
|
||||
auto idx_should_cache = batch_idxs_should_cache[i];
|
||||
auto res = cells.emplace(hash, CellInfo(idx_should_cache->first, std::move(refs)));
|
||||
DCHECK(res.second);
|
||||
cell_list[idx_should_cache->first] = &*res.first;
|
||||
CellInfo& dc_info = res.first->second;
|
||||
dc_info.should_cache = idx_should_cache->second;
|
||||
dc_info.hcnt = static_cast<unsigned char>(dc->get_level_mask().get_hashes_count());
|
||||
DCHECK(dc_info.hcnt <= 4);
|
||||
dc_info.wt = 0; // will be calculated after traversing
|
||||
TRY_RESULT(serialized_size, td::narrow_cast_safe<unsigned short>(dc->get_serialized_size()));
|
||||
data_bytes += dc_info.serialized_size = serialized_size;
|
||||
cell_count++;
|
||||
}
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cells_processed(batch_hashes.size()));
|
||||
}
|
||||
}
|
||||
|
||||
current_depth_hashes = std::move(next_depth_hashes);
|
||||
next_depth_hashes.clear();
|
||||
current_depth++;
|
||||
}
|
||||
auto it = cells.find(hash);
|
||||
if (it != cells.end()) {
|
||||
it->second.should_cache = true;
|
||||
return it->second.idx;
|
||||
DCHECK(next_child_idx == cell_count);
|
||||
|
||||
for (int idx = cell_count - 1; idx >= start_ind; --idx) {
|
||||
CellInfo& cell_info = cell_list[idx]->second;
|
||||
|
||||
unsigned sum_child_wt = 1;
|
||||
for (size_t j = 0; j < cell_info.ref_idx.size(); ++j) {
|
||||
int child_idx = cell_info.ref_idx[j];
|
||||
if (child_idx == -1) {
|
||||
continue;
|
||||
}
|
||||
sum_child_wt += cell_list[child_idx]->second.wt;
|
||||
++int_refs;
|
||||
}
|
||||
cell_info.wt = static_cast<unsigned char>(std::min(0xffU, sum_child_wt));
|
||||
}
|
||||
TRY_RESULT(cell, reader->load_cell(hash.as_slice()));
|
||||
if (cell->get_virtualization() != 0) {
|
||||
return td::Status::Error(
|
||||
"error while importing a cell into a bag of cells: cell has non-zero virtualization level");
|
||||
}
|
||||
CellSlice cs(std::move(cell));
|
||||
std::array<int, 4> refs;
|
||||
std::fill(refs.begin(), refs.end(), -1);
|
||||
DCHECK(cs.size_refs() <= 4);
|
||||
unsigned sum_child_wt = 1;
|
||||
for (unsigned i = 0; i < cs.size_refs(); i++) {
|
||||
TRY_RESULT(ref, import_cell(cs.prefetch_ref(i)->get_hash(), depth + 1));
|
||||
refs[i] = ref;
|
||||
sum_child_wt += cell_list[ref]->second.wt;
|
||||
++int_refs;
|
||||
}
|
||||
auto dc = cs.move_as_loaded_cell().data_cell;
|
||||
auto res = cells.emplace(hash, CellInfo(cell_count, refs));
|
||||
DCHECK(res.second);
|
||||
cell_list.push_back(&*res.first);
|
||||
CellInfo& dc_info = res.first->second;
|
||||
dc_info.wt = (unsigned char)std::min(0xffU, sum_child_wt);
|
||||
unsigned hcnt = dc->get_level_mask().get_hashes_count();
|
||||
DCHECK(hcnt <= 4);
|
||||
dc_info.hcnt = (unsigned char)hcnt;
|
||||
TRY_RESULT(serialized_size, td::narrow_cast_safe<unsigned short>(dc->get_serialized_size()));
|
||||
data_bytes += dc_info.serialized_size = serialized_size;
|
||||
return cell_count++;
|
||||
|
||||
auto root_it = cells.find(root_hash);
|
||||
DCHECK(root_it != cells.end());
|
||||
return root_it->second.idx;
|
||||
}
|
||||
|
||||
void LargeBocSerializer::reorder_cells() {
|
||||
|
@ -386,7 +464,7 @@ td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
|
|||
}
|
||||
store_offset(fixed_offset);
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cell_processed());
|
||||
TRY_STATUS(logger_ptr_->on_cells_processed(1));
|
||||
}
|
||||
}
|
||||
DCHECK(offs == info.data_size);
|
||||
|
@ -399,26 +477,42 @@ td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
|
|||
if (logger_ptr_) {
|
||||
logger_ptr_->start_stage("serialize");
|
||||
}
|
||||
for (int i = 0; i < cell_count; ++i) {
|
||||
auto hash = cell_list[cell_count - 1 - i]->first;
|
||||
const auto& dc_info = cell_list[cell_count - 1 - i]->second;
|
||||
TRY_RESULT(dc, reader->load_cell(hash.as_slice()));
|
||||
bool with_hash = (mode & Mode::WithIntHashes) && !dc_info.wt;
|
||||
if (dc_info.is_root_cell && (mode & Mode::WithTopHash)) {
|
||||
with_hash = true;
|
||||
for (int batch_start = 0; batch_start < cell_count; batch_start += load_batch_size) {
|
||||
int batch_end = std::min(batch_start + static_cast<int>(load_batch_size), cell_count);
|
||||
|
||||
std::vector<td::Slice> batch_hashes;
|
||||
batch_hashes.reserve(batch_end - batch_start);
|
||||
for (int i = batch_start; i < batch_end; ++i) {
|
||||
int cell_index = cell_count - 1 - i;
|
||||
batch_hashes.push_back(cell_list[cell_index]->first.as_slice());
|
||||
}
|
||||
unsigned char buf[256];
|
||||
int s = dc->serialize(buf, 256, with_hash);
|
||||
writer.store_bytes(buf, s);
|
||||
DCHECK(dc->size_refs() == dc_info.get_ref_num());
|
||||
unsigned ref_num = dc_info.get_ref_num();
|
||||
for (unsigned j = 0; j < ref_num; ++j) {
|
||||
int k = cell_count - 1 - dc_info.ref_idx[j];
|
||||
DCHECK(k > i && k < cell_count);
|
||||
store_ref(k);
|
||||
|
||||
TRY_RESULT(batch_cells, reader->load_bulk(std::move(batch_hashes)));
|
||||
|
||||
for (int i = batch_start; i < batch_end; ++i) {
|
||||
int idx_in_batch = i - batch_start;
|
||||
int cell_index = cell_count - 1 - i;
|
||||
|
||||
const auto& dc_info = cell_list[cell_index]->second;
|
||||
auto& dc = batch_cells[idx_in_batch];
|
||||
|
||||
bool with_hash = (mode & Mode::WithIntHashes) && !dc_info.wt;
|
||||
if (dc_info.is_root_cell && (mode & Mode::WithTopHash)) {
|
||||
with_hash = true;
|
||||
}
|
||||
unsigned char buf[256];
|
||||
int s = dc->serialize(buf, 256, with_hash);
|
||||
writer.store_bytes(buf, s);
|
||||
DCHECK(dc->size_refs() == dc_info.get_ref_num());
|
||||
unsigned ref_num = dc_info.get_ref_num();
|
||||
for (unsigned j = 0; j < ref_num; ++j) {
|
||||
int k = cell_count - 1 - dc_info.ref_idx[j];
|
||||
DCHECK(k > i && k < cell_count);
|
||||
store_ref(k);
|
||||
}
|
||||
}
|
||||
if (logger_ptr_) {
|
||||
TRY_STATUS(logger_ptr_->on_cell_processed());
|
||||
TRY_STATUS(logger_ptr_->on_cells_processed(batch_hashes.size()));
|
||||
}
|
||||
}
|
||||
DCHECK(writer.position() - keep_position == info.data_size);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue