1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

celldb in-memory mode (--celldb-in-memory option)

This commit is contained in:
birydrad 2024-09-09 18:08:15 +02:00
parent 420029b056
commit 1723562748
48 changed files with 1966 additions and 201 deletions

View file

@ -17,10 +17,12 @@
Copyright 2017-2020 Telegram Systems LLP
*/
#include "celldb.hpp"
#include "files-async.hpp"
#include "rootdb.hpp"
#include "td/db/RocksDb.h"
#include "td/utils/filesystem.h"
#include "rocksdb/utilities/optimistic_transaction_db.h"
#include "ton/ton-tl.hpp"
#include "ton/ton-io.hpp"
@ -29,7 +31,6 @@
namespace ton {
namespace validator {
class CellDbAsyncExecutor : public vm::DynamicBagOfCellsDb::AsyncExecutor {
public:
explicit CellDbAsyncExecutor(td::actor::ActorId<CellDbBase> cell_db) : cell_db_(std::move(cell_db)) {
@ -38,11 +39,13 @@ class CellDbAsyncExecutor : public vm::DynamicBagOfCellsDb::AsyncExecutor {
void execute_async(std::function<void()> f) override {
class Runner : public td::actor::Actor {
public:
explicit Runner(std::function<void()> f) : f_(std::move(f)) {}
explicit Runner(std::function<void()> f) : f_(std::move(f)) {
}
void start_up() override {
f_();
stop();
}
private:
std::function<void()> f_;
};
@ -52,6 +55,7 @@ class CellDbAsyncExecutor : public vm::DynamicBagOfCellsDb::AsyncExecutor {
void execute_sync(std::function<void()> f) override {
td::actor::send_closure(cell_db_, &CellDbBase::execute_sync, std::move(f));
}
private:
td::actor::ActorId<CellDbBase> cell_db_;
};
@ -97,13 +101,30 @@ void CellDbIn::start_up() {
LOG(WARNING) << "Set CellDb block cache size to " << td::format::as_size(opts_->get_celldb_cache_size().value());
}
db_options.use_direct_reads = opts_->get_celldb_direct_io();
cell_db_ = std::make_shared<td::RocksDb>(td::RocksDb::open(path_, std::move(db_options)).move_as_ok());
if (opts_->get_celldb_in_memory()) {
td::RocksDbOptions read_db_options;
read_db_options.use_direct_reads = true;
read_db_options.no_block_cache = true;
read_db_options.block_cache = {};
LOG(WARNING) << "Loading all cells in memory (because of --celldb-in-memory)";
td::Timer timer;
auto read_cell_db =
std::make_shared<td::RocksDb>(td::RocksDb::open(path_, std::move(read_db_options)).move_as_ok());
boc_ = vm::DynamicBagOfCellsDb::create_in_memory(read_cell_db.get(), {});
in_memory_load_time_ = timer.elapsed();
td::actor::send_closure(parent_, &CellDb::set_in_memory_boc, boc_);
}
boc_ = vm::DynamicBagOfCellsDb::create();
boc_->set_celldb_compress_depth(opts_->get_celldb_compress_depth());
boc_->set_loader(std::make_unique<vm::CellLoader>(cell_db_->snapshot(), on_load_callback_)).ensure();
td::actor::send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot());
auto rocks_db = std::make_shared<td::RocksDb>(td::RocksDb::open(path_, std::move(db_options)).move_as_ok());
rocks_db_ = rocks_db->raw_db();
cell_db_ = std::move(rocks_db);
if (!opts_->get_celldb_in_memory()) {
boc_ = vm::DynamicBagOfCellsDb::create();
boc_->set_celldb_compress_depth(opts_->get_celldb_compress_depth());
boc_->set_loader(std::make_unique<vm::CellLoader>(cell_db_->snapshot(), on_load_callback_)).ensure();
td::actor::send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot());
}
alarm_timestamp() = td::Timestamp::in(10.0);
@ -117,27 +138,34 @@ void CellDbIn::start_up() {
if (opts_->get_celldb_preload_all()) {
// Iterate whole DB in a separate thread
delay_action([snapshot = cell_db_->snapshot()]() {
LOG(WARNING) << "CellDb: pre-loading all keys";
td::uint64 total = 0;
td::Timer timer;
auto S = snapshot->for_each([&](td::Slice, td::Slice) {
++total;
if (total % 1000000 == 0) {
LOG(INFO) << "CellDb: iterated " << total << " keys";
}
return td::Status::OK();
});
if (S.is_error()) {
LOG(ERROR) << "CellDb: pre-load failed: " << S.move_as_error();
} else {
LOG(WARNING) << "CellDb: iterated " << total << " keys in " << timer.elapsed() << "s";
}
}, td::Timestamp::now());
delay_action(
[snapshot = cell_db_->snapshot()]() {
LOG(WARNING) << "CellDb: pre-loading all keys";
td::uint64 total = 0;
td::Timer timer;
auto S = snapshot->for_each([&](td::Slice, td::Slice) {
++total;
if (total % 1000000 == 0) {
LOG(INFO) << "CellDb: iterated " << total << " keys";
}
return td::Status::OK();
});
if (S.is_error()) {
LOG(ERROR) << "CellDb: pre-load failed: " << S.move_as_error();
} else {
LOG(WARNING) << "CellDb: iterated " << total << " keys in " << timer.elapsed() << "s";
}
},
td::Timestamp::now());
}
}
void CellDbIn::load_cell(RootHash hash, td::Promise<td::Ref<vm::DataCell>> promise) {
if (opts_->get_celldb_in_memory()) {
auto result = boc_->load_root(hash.as_slice());
async_apply("load_cell_result", std::move(promise), std::move(result));
return;
}
boc_->load_cell_async(hash.as_slice(), async_executor, std::move(promise));
}
@ -182,8 +210,10 @@ void CellDbIn::store_cell(BlockIdExt block_id, td::Ref<vm::Cell> cell, td::Promi
set_block(key_hash, std::move(D));
cell_db_->commit_write_batch().ensure();
boc_->set_loader(std::make_unique<vm::CellLoader>(cell_db_->snapshot(), on_load_callback_)).ensure();
td::actor::send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot());
if (!opts_->get_celldb_in_memory()) {
boc_->set_loader(std::make_unique<vm::CellLoader>(cell_db_->snapshot(), on_load_callback_)).ensure();
td::actor::send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot());
}
promise.set_result(boc_->load_cell(cell->get_hash().as_slice()));
if (!opts_->get_disable_rocksdb_stats()) {
@ -200,12 +230,50 @@ void CellDbIn::get_last_deleted_mc_state(td::Promise<BlockSeqno> promise) {
promise.set_result(last_deleted_mc_state_);
}
std::vector<std::pair<std::string, std::string>> CellDbIn::prepare_stats() {
TD_PERF_COUNTER(celldb_prepare_stats);
auto r_boc_stats = boc_->get_stats();
if (r_boc_stats.is_ok()) {
cell_db_statistics_.boc_stats_ = r_boc_stats.move_as_ok();
}
cell_db_statistics_.in_memory_load_time_ = in_memory_load_time_;
auto stats = cell_db_statistics_.prepare_stats();
auto add_stat = [&](const auto& key, const auto& value) { stats.emplace_back(key, PSTRING() << value); };
add_stat("started", "true");
auto r_mem_stat = td::mem_stat();
auto r_total_mem_stat = td::get_total_mem_stat();
td::uint64 celldb_size = 0;
bool ok_celldb_size = rocks_db_->GetIntProperty("rocksdb.total-sst-files-size", &celldb_size);
if (celldb_size > 0 && r_mem_stat.is_ok() && r_total_mem_stat.is_ok() && ok_celldb_size) {
auto mem_stat = r_mem_stat.move_as_ok();
auto total_mem_stat = r_total_mem_stat.move_as_ok();
add_stat("rss", td::format::as_size(mem_stat.resident_size_));
add_stat("available_ram", td::format::as_size(total_mem_stat.available_ram));
add_stat("total_ram", td::format::as_size(total_mem_stat.total_ram));
add_stat("actual_ram_to_celldb_ratio", double(total_mem_stat.available_ram) / double(celldb_size));
add_stat("if_restarted_ram_to_celldb_ratio",
double(total_mem_stat.available_ram + mem_stat.resident_size_ - 10 * (td::uint64(1) << 30)) /
double(celldb_size));
add_stat("max_possible_ram_to_celldb_ratio", double(total_mem_stat.total_ram) / double(celldb_size));
}
return stats;
// do not clear statistics, it is needed for flush_db_stats
}
void CellDbIn::flush_db_stats() {
if (opts_->get_disable_rocksdb_stats()) {
return;
}
auto stats = td::RocksDb::statistics_to_string(statistics_) + snapshot_statistics_->to_string() +
cell_db_statistics_.to_string();
auto celldb_stats = prepare_stats();
td::StringBuilder ss;
for (auto& [key, value] : celldb_stats) {
ss << "ton.celldb." << key << " " << value << "\n";
}
auto stats =
td::RocksDb::statistics_to_string(statistics_) + snapshot_statistics_->to_string() + ss.as_cslice().str();
auto to_file_r =
td::FileFd::open(path_ + "/db_stats.txt", td::FileFd::Truncate | td::FileFd::Create | td::FileFd::Write, 0644);
if (to_file_r.is_error()) {
@ -287,7 +355,9 @@ void CellDbIn::gc_cont(BlockHandle handle) {
void CellDbIn::gc_cont2(BlockHandle handle) {
TD_PERF_COUNTER(celldb_gc_cell);
td::PerfWarningTimer timer{"gccell", 0.1};
td::PerfWarningTimer timer_all{"gccell_all", 0.05};
td::PerfWarningTimer timer_get_keys{"gccell_get_keys", 0.05};
auto key_hash = get_key_hash(handle->id());
auto FR = get_block(key_hash);
FR.ensure();
@ -306,22 +376,41 @@ void CellDbIn::gc_cont2(BlockHandle handle) {
P.prev = P.next;
N.next = N.prev;
}
timer_get_keys.reset();
td::PerfWarningTimer timer_boc{"gccell_boc", 0.05};
auto cell = boc_->load_cell(F.root_hash.as_slice()).move_as_ok();
boc_->dec(cell);
boc_->prepare_commit().ensure();
vm::CellStorer stor{*cell_db_};
timer_boc.reset();
td::PerfWarningTimer timer_write_batch{"gccell_write_batch", 0.05};
cell_db_->begin_write_batch().ensure();
boc_->commit(stor).ensure();
cell_db_->erase(get_key(key_hash)).ensure();
set_block(F.prev, std::move(P));
set_block(F.next, std::move(N));
cell_db_->commit_write_batch().ensure();
alarm_timestamp() = td::Timestamp::now();
timer_write_batch.reset();
boc_->set_loader(std::make_unique<vm::CellLoader>(cell_db_->snapshot(), on_load_callback_)).ensure();
td::actor::send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot());
td::PerfWarningTimer timer_free_cells{"gccell_free_cells", 0.05};
auto before = td::ref_get_delete_count();
cell = {};
auto after = td::ref_get_delete_count();
if (timer_free_cells.elapsed() > 0.04) {
LOG(ERROR) << "deleted " << after - before << " cells";
}
timer_free_cells.reset();
td::PerfWarningTimer timer_finish{"gccell_finish", 0.05};
if (!opts_->get_celldb_in_memory()) {
boc_->set_loader(std::make_unique<vm::CellLoader>(cell_db_->snapshot(), on_load_callback_)).ensure();
td::actor::send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot());
}
DCHECK(get_block(key_hash).is_error());
if (!opts_->get_disable_rocksdb_stats()) {
@ -331,6 +420,8 @@ void CellDbIn::gc_cont2(BlockHandle handle) {
last_deleted_mc_state_ = handle->id().seqno();
}
LOG(DEBUG) << "Deleted state " << handle->id().to_str();
timer_finish.reset();
timer_all.reset();
}
void CellDbIn::skip_gc() {
@ -403,7 +494,7 @@ void CellDbIn::migrate_cells() {
boc_->set_loader(std::make_unique<vm::CellLoader>(*loader)).ensure();
cell_db_->begin_write_batch().ensure();
td::uint32 checked = 0, migrated = 0;
for (auto it = cells_to_migrate_.begin(); it != cells_to_migrate_.end() && checked < 128; ) {
for (auto it = cells_to_migrate_.begin(); it != cells_to_migrate_.end() && checked < 128;) {
++checked;
td::Bits256 hash = *it;
it = cells_to_migrate_.erase(it);
@ -440,7 +531,32 @@ void CellDbIn::migrate_cells() {
}
}
void CellDb::prepare_stats(td::Promise<std::vector<std::pair<std::string, std::string>>> promise) {
promise.set_value(decltype(prepared_stats_)(prepared_stats_));
}
void CellDb::update_stats(td::Result<std::vector<std::pair<std::string, std::string>>> r_stats) {
if (r_stats.is_error()) {
LOG(ERROR) << "error updating stats: " << r_stats.error();
} else {
prepared_stats_ = r_stats.move_as_ok();
}
alarm_timestamp() = td::Timestamp::in(2.0);
}
void CellDb::alarm() {
send_closure(cell_db_, &CellDbIn::prepare_stats, td::promise_send_closure(actor_id(this), &CellDb::update_stats));
}
void CellDb::load_cell(RootHash hash, td::Promise<td::Ref<vm::DataCell>> promise) {
if (in_memory_boc_) {
auto result = in_memory_boc_->load_root_thread_safe(hash.as_slice());
if (result.is_ok()) {
return async_apply("load_cell_result", std::move(promise), std::move(result));
} else {
LOG(ERROR) << "load_root_thread_safe failed - this is suspicious";
}
}
if (!started_) {
td::actor::send_closure(cell_db_, &CellDbIn::load_cell, hash, std::move(promise));
} else {
@ -498,12 +614,24 @@ td::BufferSlice CellDbIn::DbEntry::release() {
return create_serialize_tl_object<ton_api::db_celldb_value>(create_tl_block_id(block_id), prev, next, root_hash);
}
std::string CellDbIn::CellDbStatistics::to_string() {
td::StringBuilder ss;
ss << "ton.celldb.store_cell.micros " << store_cell_time_.to_string() << "\n";
ss << "ton.celldb.gc_cell.micros " << gc_cell_time_.to_string() << "\n";
ss << "ton.celldb.total_time.micros : " << (td::Timestamp::now().at() - stats_start_time_.at()) * 1e6 << "\n";
return ss.as_cslice().str();
std::vector<std::pair<std::string, std::string>> CellDbIn::CellDbStatistics::prepare_stats() {
std::vector<std::pair<std::string, std::string>> stats;
stats.emplace_back("store_cell.micros", PSTRING() << store_cell_time_.to_string());
stats.emplace_back("gc_cell.micros", PSTRING() << gc_cell_time_.to_string());
stats.emplace_back("total_time.micros", PSTRING() << (td::Timestamp::now().at() - stats_start_time_.at()) * 1e6);
stats.emplace_back("in_memory", PSTRING() << bool(in_memory_load_time_));
if (in_memory_load_time_) {
stats.emplace_back("in_memory_load_time", PSTRING() << in_memory_load_time_.value());
}
if (boc_stats_) {
stats.emplace_back("cells_count", PSTRING() << boc_stats_->cells_total_count);
stats.emplace_back("cells_size", PSTRING() << boc_stats_->cells_total_size);
stats.emplace_back("roots_count", PSTRING() << boc_stats_->roots_total_count);
for (auto& [key, value] : boc_stats_->custom_stats) {
stats.emplace_back(key, value);
}
}
return stats;
}
} // namespace validator