1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

Use parallel write to celldb (#1264)

* Parallel write in celldb

* Add TD_PERF_COUNTER to gc_cell and store_cell

* More error handling

* Tests for prepare_commit_async

* Install g++11 for ubuntu 20.04

---------

Co-authored-by: SpyCheese <mikle98@yandex.ru>
This commit is contained in:
EmelyanenkoK 2024-10-11 15:31:59 +03:00 committed by GitHub
parent fd1735f6ec
commit d04cdfa0dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 582 additions and 127 deletions

View file

@ -61,10 +61,81 @@
#include "openssl/digest.hpp"
#include "vm/dict.h"
#include <condition_variable>
#include <latch>
#include <numeric>
#include <optional>
#include <queue>
namespace vm {
class ThreadExecutor : public DynamicBagOfCellsDb::AsyncExecutor {
public:
explicit ThreadExecutor(size_t threads_n) {
for (size_t i = 0; i < threads_n; ++i) {
threads_.emplace_back([this]() {
while (true) {
auto task = pop_task();
if (!task) {
break;
}
CHECK(generation_.load() % 2 == 1);
task();
}
});
}
}
~ThreadExecutor() override {
for (size_t i = 0; i < threads_.size(); ++i) {
push_task({});
}
for (auto &t : threads_) {
t.join();
}
}
void execute_async(std::function<void()> f) override {
push_task(std::move(f));
}
void execute_sync(std::function<void()> f) override {
auto x = generation_.load();
std::scoped_lock lock(sync_mutex_);
CHECK(x == generation_);
CHECK(generation_.load() % 2 == 1);
f();
CHECK(generation_.load() % 2 == 1);
}
void inc_generation() {
generation_.fetch_add(1);
}
private:
std::atomic<size_t> generation_{0};
std::queue<std::pair<std::function<void()>, size_t>> queue_;
std::mutex queue_mutex_;
std::condition_variable cv_;
std::mutex sync_mutex_;
std::vector<td::thread> threads_;
std::function<void()> pop_task() {
std::unique_lock lock(queue_mutex_);
cv_.wait(lock, [&] { return !queue_.empty(); });
CHECK(!queue_.empty());
auto task = std::move(queue_.front());
queue_.pop();
CHECK(task.second == generation_);
return task.first;
}
void push_task(std::function<void()> task) {
{
std::scoped_lock lock(queue_mutex_);
queue_.emplace(std::move(task), generation_.load());
}
cv_.notify_one();
}
};
std::vector<int> do_get_serialization_modes() {
std::vector<int> res;
@ -890,25 +961,91 @@ TEST(TonDb, InMemoryDynamicBocSimple) {
boc = DynamicBagOfCellsDb::create_in_memory(kv.get(), {});
}
void test_dynamic_boc(std::optional<DynamicBagOfCellsDb::CreateInMemoryOptions> o_in_memory) {
int VERBOSITY_NAME(boc) = VERBOSITY_NAME(DEBUG) + 10;
struct BocOptions {
std::shared_ptr<ThreadExecutor> async_executor;
std::optional<DynamicBagOfCellsDb::CreateInMemoryOptions> o_in_memory;
td::uint64 seed{123};
auto create_dboc(td::KeyValueReader *kv, std::optional<td::int64> o_root_n) {
if (o_in_memory) {
auto res = DynamicBagOfCellsDb::create_in_memory(kv, *o_in_memory);
auto stats = res->get_stats().move_as_ok();
if (o_root_n) {
ASSERT_EQ(*o_root_n, stats.roots_total_count);
}
VLOG(boc) << "reset roots_n=" << stats.roots_total_count << " cells_n=" << stats.cells_total_count;
return res;
}
return DynamicBagOfCellsDb::create();
};
void prepare_commit(DynamicBagOfCellsDb &dboc) {
if (async_executor) {
async_executor->inc_generation();
std::latch latch(1);
td::Result<td::Unit> res;
async_executor->execute_sync([&] {
dboc.prepare_commit_async(async_executor, [&](auto r) {
res = std::move(r);
latch.count_down();
});
});
latch.wait();
async_executor->execute_sync([&] {});
async_executor->inc_generation();
} else {
dboc.prepare_commit();
}
}
};
template <class F>
void with_all_boc_options(F &&f, size_t tests_n = 500) {
LOG(INFO) << "Test dynamic boc";
auto counter = [] { return td::NamedThreadSafeCounter::get_default().get_counter("DataCell").sum(); };
auto run = [&](BocOptions options) {
LOG(INFO) << "\t" << (options.o_in_memory ? "in memory" : "on disk") << (options.async_executor ? " async" : "");
if (options.o_in_memory) {
LOG(INFO) << "\t\tuse_arena=" << options.o_in_memory->use_arena
<< " less_memory=" << options.o_in_memory->use_less_memory_during_creation;
}
for (td::uint32 i = 0; i < tests_n; i++) {
auto before = counter();
options.seed = i == 0 ? 123 : i;
f(options);
auto after = counter();
LOG_CHECK((options.o_in_memory && options.o_in_memory->use_arena) || before == after)
<< before << " vs " << after;
}
};
run({.async_executor = std::make_shared<ThreadExecutor>(4)});
run({});
for (auto use_arena : {false, true}) {
for (auto less_memory : {false, true}) {
run({.o_in_memory =
DynamicBagOfCellsDb::CreateInMemoryOptions{.extra_threads = std::thread::hardware_concurrency(),
.verbose = false,
.use_arena = use_arena,
.use_less_memory_during_creation = less_memory}});
}
}
}
void test_dynamic_boc(BocOptions options) {
auto counter = [] { return td::NamedThreadSafeCounter::get_default().get_counter("DataCell").sum(); };
auto before = counter();
SCOPE_EXIT {
LOG_CHECK((o_in_memory && o_in_memory->use_arena) || before == counter()) << before << " vs " << counter();
;
LOG_CHECK((options.o_in_memory && options.o_in_memory->use_arena) || before == counter())
<< before << " vs " << counter();
};
td::Random::Xorshift128plus rnd{123};
td::Random::Xorshift128plus rnd{options.seed};
std::string old_root_hash;
std::string old_root_serialization;
auto kv = std::make_shared<td::MemoryKeyValue>();
auto create_dboc = [&]() {
if (o_in_memory) {
auto res = DynamicBagOfCellsDb::create_in_memory(kv.get(), *o_in_memory);
auto roots_n = old_root_hash.empty() ? 0 : 1;
ASSERT_EQ(roots_n, res->get_stats().ok().roots_total_count);
return res;
}
return DynamicBagOfCellsDb::create();
auto roots_n = old_root_hash.empty() ? 0 : 1;
return options.create_dboc(kv.get(), roots_n);
};
auto dboc = create_dboc();
dboc->set_loader(std::make_unique<CellLoader>(kv));
@ -947,51 +1084,28 @@ void test_dynamic_boc(std::optional<DynamicBagOfCellsDb::CreateInMemoryOptions>
ASSERT_EQ(0u, kv->count("").ok());
}
template <class F>
void with_all_boc_options(F &&f) {
LOG(INFO) << "Test dynamic boc";
LOG(INFO) << "\ton disk";
f({});
for (auto use_arena : {false, true}) {
for (auto less_memory : {false, true}) {
LOG(INFO) << "\tuse_arena=" << use_arena << " less_memory=" << less_memory;
f(DynamicBagOfCellsDb::CreateInMemoryOptions{.extra_threads = std::thread::hardware_concurrency(),
.verbose = false,
.use_arena = use_arena,
.use_less_memory_during_creation = less_memory});
}
}
}
TEST(TonDb, DynamicBoc) {
with_all_boc_options(test_dynamic_boc);
with_all_boc_options(test_dynamic_boc, 1);
};
void test_dynamic_boc2(std::optional<DynamicBagOfCellsDb::CreateInMemoryOptions> o_in_memory) {
int VERBOSITY_NAME(boc) = VERBOSITY_NAME(DEBUG) + 10;
td::Random::Xorshift128plus rnd{123};
int total_roots = 10000;
int max_roots = 20;
void test_dynamic_boc2(BocOptions options) {
td::Random::Xorshift128plus rnd{options.seed};
int total_roots = rnd.fast(1, !rnd.fast(0, 10) * 100 + 10);
int max_roots = rnd.fast(1, 20);
int last_commit_at = 0;
int first_root_id = 0;
int last_root_id = 0;
auto kv = std::make_shared<td::MemoryKeyValue>();
auto create_dboc = [&](td::int64 root_n) {
if (o_in_memory) {
auto res = DynamicBagOfCellsDb::create_in_memory(kv.get(), *o_in_memory);
auto stats = res->get_stats().move_as_ok();
ASSERT_EQ(root_n, stats.roots_total_count);
VLOG(boc) << "reset roots_n=" << stats.roots_total_count << " cells_n=" << stats.cells_total_count;
return res;
}
return DynamicBagOfCellsDb::create();
};
auto create_dboc = [&](td::int64 root_n) { return options.create_dboc(kv.get(), root_n); };
auto dboc = create_dboc(0);
dboc->set_loader(std::make_unique<CellLoader>(kv));
auto counter = [] { return td::NamedThreadSafeCounter::get_default().get_counter("DataCell").sum(); };
auto before = counter();
SCOPE_EXIT {
LOG_CHECK((o_in_memory && o_in_memory->use_arena) || before == counter()) << before << " vs " << counter();
SCOPE_EXIT{
// LOG_CHECK((options.o_in_memory && options.o_in_memory->use_arena) || before == counter())
// << before << " vs " << counter();
};
std::vector<Ref<Cell>> roots(max_roots);
@ -1009,7 +1123,7 @@ void test_dynamic_boc2(std::optional<DynamicBagOfCellsDb::CreateInMemoryOptions>
if (from_root.is_null()) {
VLOG(boc) << " from db";
auto from_root_hash = root_hashes[root_id % max_roots];
if (o_in_memory && (rnd() % 2 == 0)) {
if (rnd() % 2 == 0) {
from_root = dboc->load_root(from_root_hash).move_as_ok();
} else {
from_root = dboc->load_cell(from_root_hash).move_as_ok();
@ -1041,7 +1155,8 @@ void test_dynamic_boc2(std::optional<DynamicBagOfCellsDb::CreateInMemoryOptions>
auto commit = [&] {
VLOG(boc) << "commit";
dboc->prepare_commit();
//rnd.fast(0, 1);
options.prepare_commit(*dboc);
{
CellStorer cell_storer(*kv);
dboc->commit(cell_storer);
@ -2147,18 +2262,18 @@ TEST(TonDb, BocRespectsUsageCell) {
ASSERT_STREQ(serialization, serialization_of_virtualized_cell);
}
void test_dynamic_boc_respectes_usage_cell(std::optional<vm::DynamicBagOfCellsDb::CreateInMemoryOptions> o_in_memory) {
td::Random::Xorshift128plus rnd(123);
void test_dynamic_boc_respectes_usage_cell(vm::BocOptions options) {
td::Random::Xorshift128plus rnd(options.seed);
auto cell = vm::gen_random_cell(20, rnd, true);
auto usage_tree = std::make_shared<vm::CellUsageTree>();
auto usage_cell = vm::UsageCell::create(cell, usage_tree->root_ptr());
auto kv = std::make_shared<td::MemoryKeyValue>();
auto dboc = o_in_memory ? vm::DynamicBagOfCellsDb::create_in_memory(kv.get(), *o_in_memory)
: vm::DynamicBagOfCellsDb::create();
auto dboc = options.create_dboc(kv.get(), {});
dboc->set_loader(std::make_unique<vm::CellLoader>(kv));
dboc->inc(usage_cell);
{
options.prepare_commit(*dboc);
vm::CellStorer cell_storer(*kv);
dboc->commit(cell_storer);
}
@ -2171,7 +2286,7 @@ void test_dynamic_boc_respectes_usage_cell(std::optional<vm::DynamicBagOfCellsDb
}
TEST(TonDb, DynamicBocRespectsUsageCell) {
vm::with_all_boc_options(test_dynamic_boc_respectes_usage_cell);
vm::with_all_boc_options(test_dynamic_boc_respectes_usage_cell, 20);
}
TEST(TonDb, LargeBocSerializer) {

View file

@ -43,7 +43,7 @@ class CellHashTable {
template <class F>
void for_each(F &&f) {
for (auto &info : set_) {
f(info);
f(const_cast<InfoT &>(info));
}
}
template <class F>

View file

@ -184,6 +184,24 @@ td::Result<CellLoader::LoadResult> CellLoader::load(td::Slice hash, td::Slice va
return res;
}
td::Result<CellLoader::LoadResult> CellLoader::load_refcnt(td::Slice hash) {
LoadResult res;
std::string serialized;
TRY_RESULT(get_status, reader_->get(hash, serialized));
if (get_status != KeyValue::GetStatus::Ok) {
DCHECK(get_status == KeyValue::GetStatus::NotFound);
return res;
}
res.status = LoadResult::Ok;
td::TlParser parser(serialized);
td::parse(res.refcnt_, parser);
if (res.refcnt_ == -1) {
parse(res.refcnt_, parser);
}
TRY_STATUS(parser.get_status());
return res;
}
CellStorer::CellStorer(KeyValue &kv) : kv_(kv) {
}

View file

@ -50,6 +50,7 @@ class CellLoader {
CellLoader(std::shared_ptr<KeyValueReader> reader, std::function<void(const LoadResult &)> on_load_callback = {});
td::Result<LoadResult> load(td::Slice hash, bool need_data, ExtCellCreator &ext_cell_creator);
static td::Result<LoadResult> load(td::Slice hash, td::Slice value, bool need_data, ExtCellCreator &ext_cell_creator);
td::Result<LoadResult> load_refcnt(td::Slice hash); // This only loads refcnt_, cell_ == null
private:
std::shared_ptr<KeyValueReader> reader_;

View file

@ -27,6 +27,9 @@
#include "td/utils/ThreadSafeCounter.h"
#include "vm/cellslice.h"
#include <queue>
#include "td/actor/actor.h"
#include "common/delay.h"
namespace vm {
namespace {
@ -180,6 +183,9 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
}
td::Status prepare_commit() override {
if (pca_state_) {
return td::Status::Error("prepare_commit_async is not finished");
}
if (is_prepared_for_commit()) {
return td::Status::OK();
}
@ -585,6 +591,221 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
DynamicBocExtCellExtra{cell_db_reader_}));
return std::move(res);
}
struct PrepareCommitAsyncState {
size_t remaining_ = 0;
std::shared_ptr<AsyncExecutor> executor_;
td::Promise<td::Unit> promise_;
struct CellInfo2 {
CellInfo *info{};
std::vector<CellInfo2 *> parents;
unsigned remaining_children = 0;
Cell::Hash key() const {
return info->key();
}
bool operator<(const CellInfo2 &other) const {
return key() < other.key();
}
friend bool operator<(const CellInfo2 &a, td::Slice b) {
return a.key().as_slice() < b;
}
friend bool operator<(td::Slice a, const CellInfo2 &b) {
return a < b.key().as_slice();
}
struct Eq {
using is_transparent = void; // Pred to use
bool operator()(const CellInfo2 &info, const CellInfo2 &other_info) const {
return info.key() == other_info.key();
}
bool operator()(const CellInfo2 &info, td::Slice hash) const {
return info.key().as_slice() == hash;
}
bool operator()(td::Slice hash, const CellInfo2 &info) const {
return info.key().as_slice() == hash;
}
};
struct Hash {
using is_transparent = void; // Pred to use
using transparent_key_equal = Eq;
size_t operator()(td::Slice hash) const {
return cell_hash_slice_hash(hash);
}
size_t operator()(const CellInfo2 &info) const {
return cell_hash_slice_hash(info.key().as_slice());
}
};
};
CellHashTable<CellInfo2> cells_;
std::queue<CellInfo2*> load_queue_;
td::uint32 active_load_ = 0;
td::uint32 max_parallel_load_ = 4;
};
std::unique_ptr<PrepareCommitAsyncState> pca_state_;
void prepare_commit_async(std::shared_ptr<AsyncExecutor> executor, td::Promise<td::Unit> promise) override {
hash_table_ = {};
if (pca_state_) {
promise.set_error(td::Status::Error("Other prepare_commit_async is not finished"));
return;
}
if (is_prepared_for_commit()) {
promise.set_result(td::Unit());
return;
}
pca_state_ = std::make_unique<PrepareCommitAsyncState>();
pca_state_->executor_ = std::move(executor);
pca_state_->promise_ = std::move(promise);
for (auto &new_cell : to_inc_) {
dfs_new_cells_in_db_async(new_cell);
}
pca_state_->cells_.for_each([&](PrepareCommitAsyncState::CellInfo2 &info) {
++pca_state_->remaining_;
if (info.remaining_children == 0) {
pca_load_from_db(&info);
}
});
if (pca_state_->remaining_ == 0) {
prepare_commit_async_cont();
}
}
void dfs_new_cells_in_db_async(const td::Ref<vm::Cell> &cell, PrepareCommitAsyncState::CellInfo2 *parent = nullptr) {
bool exists = true;
pca_state_->cells_.apply(cell->get_hash().as_slice(), [&](PrepareCommitAsyncState::CellInfo2 &info) {
if (info.info == nullptr) {
exists = false;
info.info = &get_cell_info(cell);
}
});
auto info = pca_state_->cells_.get_if_exists(cell->get_hash().as_slice());
if (parent) {
info->parents.push_back(parent);
++parent->remaining_children;
}
if (exists) {
return;
}
if (cell->is_loaded()) {
vm::CellSlice cs(vm::NoVm{}, cell);
for (unsigned i = 0; i < cs.size_refs(); i++) {
dfs_new_cells_in_db_async(cs.prefetch_ref(i), info);
}
}
}
void pca_load_from_db(PrepareCommitAsyncState::CellInfo2 *info) {
if (pca_state_->active_load_ >= pca_state_->max_parallel_load_) {
pca_state_->load_queue_.push(info);
return;
}
++pca_state_->active_load_;
pca_state_->executor_->execute_async(
[db = this, info, executor = pca_state_->executor_, loader = *loader_]() mutable {
auto res = loader.load_refcnt(info->info->cell->get_hash().as_slice()).move_as_ok();
executor->execute_sync([db, info, res = std::move(res)]() {
--db->pca_state_->active_load_;
db->pca_process_load_queue();
db->pca_set_in_db(info, std::move(res));
});
});
}
void pca_process_load_queue() {
while (pca_state_->active_load_ < pca_state_->max_parallel_load_ && !pca_state_->load_queue_.empty()) {
PrepareCommitAsyncState::CellInfo2 *info = pca_state_->load_queue_.front();
pca_state_->load_queue_.pop();
pca_load_from_db(info);
}
}
void pca_set_in_db(PrepareCommitAsyncState::CellInfo2 *info, CellLoader::LoadResult result) {
info->info->sync_with_db = true;
if (result.status == CellLoader::LoadResult::Ok) {
info->info->in_db = true;
info->info->db_refcnt = result.refcnt();
} else {
info->info->in_db = false;
}
for (PrepareCommitAsyncState::CellInfo2 *parent_info : info->parents) {
if (parent_info->info->sync_with_db) {
continue;
}
if (!info->info->in_db) {
pca_set_in_db(parent_info, {});
} else if (--parent_info->remaining_children == 0) {
pca_load_from_db(parent_info);
}
}
CHECK(pca_state_->remaining_ != 0);
if (--pca_state_->remaining_ == 0) {
prepare_commit_async_cont();
}
}
void prepare_commit_async_cont() {
for (auto &new_cell : to_inc_) {
auto &new_cell_info = get_cell_info(new_cell);
dfs_new_cells(new_cell_info);
}
CHECK(pca_state_->remaining_ == 0);
for (auto &old_cell : to_dec_) {
auto &old_cell_info = get_cell_info(old_cell);
dfs_old_cells_async(old_cell_info);
}
if (pca_state_->remaining_ == 0) {
prepare_commit_async_cont2();
}
}
void dfs_old_cells_async(CellInfo &info) {
if (!info.was) {
info.was = true;
visited_.push_back(&info);
if (!info.sync_with_db) {
++pca_state_->remaining_;
load_cell_async(
info.cell->get_hash().as_slice(), pca_state_->executor_,
[executor = pca_state_->executor_, db = this, info = &info](td::Result<td::Ref<vm::DataCell>> R) {
R.ensure();
executor->execute_sync([db, info]() {
CHECK(info->sync_with_db);
db->dfs_old_cells_async(*info);
if (--db->pca_state_->remaining_ == 0) {
db->prepare_commit_async_cont2();
}
});
});
return;
}
}
info.refcnt_diff--;
if (!info.sync_with_db) {
return;
}
auto new_refcnt = info.refcnt_diff + info.db_refcnt;
CHECK(new_refcnt >= 0);
if (new_refcnt != 0) {
return;
}
for_each(info, [this](auto &child_info) { dfs_old_cells_async(child_info); });
}
void prepare_commit_async_cont2() {
save_diff_prepare();
to_inc_.clear();
to_dec_.clear();
pca_state_->promise_.set_result(td::Unit());
pca_state_ = {};
}
};
} // namespace

View file

@ -105,6 +105,7 @@ class DynamicBagOfCellsDb {
virtual void load_cell_async(td::Slice hash, std::shared_ptr<AsyncExecutor> executor,
td::Promise<Ref<DataCell>> promise) = 0;
virtual void prepare_commit_async(std::shared_ptr<AsyncExecutor> executor, td::Promise<td::Unit> promise) = 0;
};
} // namespace vm

View file

@ -848,6 +848,10 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb {
to_inc_ = {};
return td::Status::OK();
}
void prepare_commit_async(std::shared_ptr<AsyncExecutor> executor, td::Promise<td::Unit> promise) override {
TRY_STATUS_PROMISE(promise, prepare_commit());
promise.set_value(td::Unit());
}
Stats get_stats_diff() override {
LOG(FATAL) << "Not implemented";
return {};