1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

celldb: version 2

- thread safe cache
- parallel commit
- multiple optimizations
- support of key-value merge operations
- improved tests and benchmarks
- in-memory version won't read from key value after start - uses vector in-memory table now
- use rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords - do not silently ignore errors during recovery
This commit is contained in:
birydrad 2024-12-11 14:48:48 +03:00
parent 1b70e48327
commit c863c42ed1
32 changed files with 3276 additions and 318 deletions

View file

@ -156,6 +156,7 @@ if (TON_USE_ROCKSDB)
set(WITH_GFLAGS OFF CACHE BOOL "build with GFlags") set(WITH_GFLAGS OFF CACHE BOOL "build with GFlags")
set(WITH_TESTS OFF CACHE BOOL "build with tests") set(WITH_TESTS OFF CACHE BOOL "build with tests")
set(WITH_TOOLS OFF CACHE BOOL "build with tools") set(WITH_TOOLS OFF CACHE BOOL "build with tools")
set(USE_RTTI ON CACHE BOOL "use rtti")
set(FAIL_ON_WARNINGS OFF CACHE BOOL "fail on warnings") set(FAIL_ON_WARNINGS OFF CACHE BOOL "fail on warnings")
message("Add rocksdb") message("Add rocksdb")
add_subdirectory(third-party/rocksdb EXCLUDE_FROM_ALL) add_subdirectory(third-party/rocksdb EXCLUDE_FROM_ALL)

View file

@ -144,6 +144,7 @@ set(TON_CRYPTO_SOURCE
set(TON_DB_SOURCE set(TON_DB_SOURCE
vm/db/DynamicBagOfCellsDb.cpp vm/db/DynamicBagOfCellsDb.cpp
vm/db/DynamicBagOfCellsDbV2.cpp
vm/db/CellStorage.cpp vm/db/CellStorage.cpp
vm/db/TonDb.cpp vm/db/TonDb.cpp
@ -541,7 +542,7 @@ target_include_directories(create-state PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT
if (INTERNAL_COMPILE) if (INTERNAL_COMPILE)
target_link_libraries(create-state PUBLIC ton_crypto fift-lib tonlib git) target_link_libraries(create-state PUBLIC ton_crypto fift-lib tonlib git)
else() else()
if (TONLIB_COMPILE) if (TONLIB_COMPILE)
target_link_libraries(create-state PUBLIC ton_crypto fift-lib tonlib git) target_link_libraries(create-state PUBLIC ton_crypto fift-lib tonlib git)
else() else()
target_link_libraries(create-state PUBLIC ton_crypto fift-lib git) target_link_libraries(create-state PUBLIC ton_crypto fift-lib git)

File diff suppressed because it is too large Load diff

View file

@ -55,6 +55,7 @@ class Cell : public CellTraits {
} }
// load interface // load interface
virtual td::Status set_data_cell(Ref<DataCell> &&data_cell) const = 0;
virtual td::Result<LoadedCell> load_cell() const = 0; virtual td::Result<LoadedCell> load_cell() const = 0;
virtual Ref<Cell> virtualize(VirtualizationParameters virt) const; virtual Ref<Cell> virtualize(VirtualizationParameters virt) const;
virtual td::uint32 get_virtualization() const = 0; virtual td::uint32 get_virtualization() const = 0;

View file

@ -36,7 +36,8 @@ struct ArenaAllocator {
T* obj = new (ptr) T(std::forward<ArgsT>(args)...); T* obj = new (ptr) T(std::forward<ArgsT>(args)...);
return std::unique_ptr<T>(obj); return std::unique_ptr<T>(obj);
} }
private:
private:
td::MutableSlice alloc_batch() { td::MutableSlice alloc_batch() {
size_t batch_size = 1 << 20; size_t batch_size = 1 << 20;
auto batch = std::make_unique<char[]>(batch_size); auto batch = std::make_unique<char[]>(batch_size);
@ -53,7 +54,7 @@ private:
return res; return res;
} }
}; };
} } // namespace
std::unique_ptr<DataCell> DataCell::create_empty_data_cell(Info info) { std::unique_ptr<DataCell> DataCell::create_empty_data_cell(Info info) {
if (use_arena) { if (use_arena) {
ArenaAllocator<DataCell> allocator; ArenaAllocator<DataCell> allocator;

View file

@ -31,6 +31,9 @@ class DataCell : public Cell {
static thread_local bool use_arena; static thread_local bool use_arena;
DataCell(const DataCell& other) = delete; DataCell(const DataCell& other) = delete;
DataCell(DataCell&& other) = delete;
DataCell& operator=(const DataCell& other) = delete;
DataCell& operator=(DataCell&& other) = delete;
~DataCell() override; ~DataCell() override;
static void store_depth(td::uint8* dest, td::uint16 depth) { static void store_depth(td::uint8* dest, td::uint16 depth) {
@ -126,6 +129,10 @@ class DataCell : public Cell {
explicit DataCell(Info info); explicit DataCell(Info info);
public: public:
td::Status set_data_cell(Ref<DataCell>&& data_cell) const override {
CHECK(get_hash() == data_cell->get_hash());
return td::Status::OK();
}
td::Result<LoadedCell> load_cell() const override { td::Result<LoadedCell> load_cell() const override {
return LoadedCell{Ref<DataCell>{this}, {}, {}}; return LoadedCell{Ref<DataCell>{this}, {}, {}};
} }
@ -228,4 +235,3 @@ inline CellHash as_cell_hash(const Ref<DataCell>& cell) {
} }
} // namespace vm } // namespace vm

View file

@ -65,6 +65,9 @@ class ExtCell : public Cell {
bool is_loaded() const override { bool is_loaded() const override {
return CellView(this)->is_loaded(); return CellView(this)->is_loaded();
} }
Ref<PrunnedCell<ExtraT>> get_prunned_cell() const {
return prunned_cell_.load();
}
private: private:
mutable td::AtomicRef<DataCell> data_cell_; mutable td::AtomicRef<DataCell> data_cell_;
@ -112,6 +115,23 @@ class ExtCell : public Cell {
return CellView(this)->get_depth(level); return CellView(this)->get_depth(level);
} }
td::Status set_data_cell(Ref<DataCell>&& new_data_cell) const override {
auto prunned_cell = prunned_cell_.load();
if (prunned_cell.is_null()) {
auto old_data_cell = data_cell_.get_unsafe();
DCHECK(old_data_cell);
TRY_STATUS(old_data_cell->check_equals_unloaded(new_data_cell));
return td::Status::OK();
}
TRY_STATUS(prunned_cell->check_equals_unloaded(new_data_cell));
if (data_cell_.store_if_empty(new_data_cell)) {
prunned_cell_.store({});
get_thread_safe_counter_unloaded().add(-1);
}
return td::Status::OK();
}
td::Result<Ref<DataCell>> load_data_cell() const { td::Result<Ref<DataCell>> load_data_cell() const {
auto data_cell = data_cell_.get_unsafe(); auto data_cell = data_cell_.get_unsafe();
if (data_cell) { if (data_cell) {

View file

@ -142,6 +142,10 @@ class PrunnedCell : public Cell {
return info_.get_depth(get_storage())[get_level_mask().apply(level).get_hash_i()]; return info_.get_depth(get_storage())[get_level_mask().apply(level).get_hash_i()];
} }
td::Status set_data_cell(Ref<DataCell> &&data_cell) const override {
return td::Status::OK();
}
td::Result<LoadedCell> load_cell() const override { td::Result<LoadedCell> load_cell() const override {
return td::Status::Error("Can't load prunned branch"); return td::Status::Error("Can't load prunned branch");
} }

View file

@ -36,6 +36,9 @@ class UsageCell : public Cell {
return Ref<UsageCell>{true, std::move(cell), std::move(tree_node), PrivateTag{}}; return Ref<UsageCell>{true, std::move(cell), std::move(tree_node), PrivateTag{}};
} }
td::Status set_data_cell(Ref<DataCell> &&data_cell) const override {
return cell_->set_data_cell(std::move(data_cell));
}
// load interface // load interface
td::Result<LoadedCell> load_cell() const override { td::Result<LoadedCell> load_cell() const override {
TRY_RESULT(loaded_cell, cell_->load_cell()); TRY_RESULT(loaded_cell, cell_->load_cell());

View file

@ -37,6 +37,9 @@ class VirtualCell : public Cell {
} }
// load interface // load interface
td::Status set_data_cell(Ref<DataCell> &&data_cell) const override {
return cell_->set_data_cell(std::move(data_cell));
}
td::Result<LoadedCell> load_cell() const override { td::Result<LoadedCell> load_cell() const override {
TRY_RESULT(loaded_cell, cell_->load_cell()); TRY_RESULT(loaded_cell, cell_->load_cell());
loaded_cell.virt = loaded_cell.virt.apply(virt_); loaded_cell.virt = loaded_cell.virt.apply(virt_);

View file

@ -40,6 +40,17 @@ class CellHashTable {
return res; return res;
} }
template <class... ArgsT>
std::pair<InfoT &, bool> emplace(td::Slice hash, ArgsT &&...args) {
auto it = set_.find(hash);
if (it != set_.end()) {
return std::pair<InfoT &, bool>(const_cast<InfoT &>(*it), false);
}
auto res = set_.emplace(std::forward<ArgsT>(args)...);
CHECK(res.second);
return std::pair<InfoT &, bool>(const_cast<InfoT &>(*res.first), res.second);
}
template <class F> template <class F>
void for_each(F &&f) { void for_each(F &&f) {
for (auto &info : set_) { for (auto &info : set_) {
@ -64,7 +75,7 @@ class CellHashTable {
size_t size() const { size_t size() const {
return set_.size(); return set_.size();
} }
InfoT* get_if_exists(td::Slice hash) { InfoT *get_if_exists(td::Slice hash) {
auto it = set_.find(hash); auto it = set_.find(hash);
if (it != set_.end()) { if (it != set_.end()) {
return &const_cast<InfoT &>(*it); return &const_cast<InfoT &>(*it);

View file

@ -17,14 +17,19 @@
Copyright 2017-2020 Telegram Systems LLP Copyright 2017-2020 Telegram Systems LLP
*/ */
#include "vm/db/CellStorage.h" #include "vm/db/CellStorage.h"
#include "td/utils/Parser.h"
#include "vm/db/DynamicBagOfCellsDb.h" #include "vm/db/DynamicBagOfCellsDb.h"
#include "vm/boc.h" #include "vm/boc.h"
#include "td/utils/base64.h" #include "td/utils/base64.h"
#include "td/utils/tl_parsers.h" #include "td/utils/tl_parsers.h"
#include "td/utils/tl_helpers.h" #include "td/utils/tl_helpers.h"
#include <block-auto.h>
namespace vm { namespace vm {
namespace { namespace {
class RefcntCellStorer { class RefcntCellStorer {
public: public:
RefcntCellStorer(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc) RefcntCellStorer(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc)
@ -43,7 +48,9 @@ class RefcntCellStorer {
storer.store_slice(data); storer.store_slice(data);
return; return;
} }
CHECK(refcnt_ > 0);
store(refcnt_, storer); store(refcnt_, storer);
CHECK(cell_.not_null())
store(*cell_, storer); store(*cell_, storer);
for (unsigned i = 0; i < cell_->size_refs(); i++) { for (unsigned i = 0; i < cell_->size_refs(); i++) {
auto cell = cell_->get_ref(i); auto cell = cell_->get_ref(i);
@ -91,6 +98,7 @@ class RefcntCellParser {
stored_boc_ = true; stored_boc_ = true;
parse(refcnt, parser); parse(refcnt, parser);
} }
CHECK(refcnt > 0);
if (!need_data_) { if (!need_data_) {
return; return;
} }
@ -159,6 +167,9 @@ td::Result<CellLoader::LoadResult> CellLoader::load(td::Slice hash, bool need_da
DCHECK(get_status == KeyValue::GetStatus::NotFound); DCHECK(get_status == KeyValue::GetStatus::NotFound);
return LoadResult{}; return LoadResult{};
} }
if (serialized.empty()) {
return LoadResult{};
}
TRY_RESULT(res, load(hash, serialized, need_data, ext_cell_creator)); TRY_RESULT(res, load(hash, serialized, need_data, ext_cell_creator));
if (on_load_callback_) { if (on_load_callback_) {
on_load_callback_(res); on_load_callback_(res);
@ -198,6 +209,7 @@ td::Result<CellLoader::LoadResult> CellLoader::load_refcnt(td::Slice hash) {
if (res.refcnt_ == -1) { if (res.refcnt_ == -1) {
parse(res.refcnt_, parser); parse(res.refcnt_, parser);
} }
CHECK(res.refcnt_ > 0);
TRY_STATUS(parser.get_status()); TRY_STATUS(parser.get_status());
return res; return res;
} }
@ -216,4 +228,77 @@ std::string CellStorer::serialize_value(td::int32 refcnt, const td::Ref<DataCell
td::Status CellStorer::set(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc) { td::Status CellStorer::set(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc) {
return kv_.set(cell->get_hash().as_slice(), serialize_value(refcnt, cell, as_boc)); return kv_.set(cell->get_hash().as_slice(), serialize_value(refcnt, cell, as_boc));
} }
td::Status CellStorer::merge(td::Slice hash, td::int32 refcnt_diff) {
return kv_.merge(hash, serialize_refcnt_diffs(refcnt_diff));
}
void CellStorer::merge_value_and_refcnt_diff(std::string &left, td::Slice right) {
if (right.empty()) {
return;
}
CHECK(left.size() > 4);
CHECK(right.size() == 4);
td::int32 left_refcnt = td::as<td::int32>(left.data());
size_t shift = 0;
if (left_refcnt == -1) {
CHECK(left.size() >= 8);
left_refcnt = td::as<td::int32>(left.data() + 4);
shift = 4;
}
td::int32 right_refcnt_diff = td::as<td::int32>(right.data());
td::int32 new_refcnt = left_refcnt + right_refcnt_diff;
CHECK(new_refcnt > 0);
td::as<td::int32>(left.data() + shift) = new_refcnt;
}
void CellStorer::merge_refcnt_diffs(std::string &left, td::Slice right) {
if (right.empty()) {
return;
}
if (left.empty()) {
left = right.str();
return;
}
CHECK(left.size() == 4);
CHECK(right.size() == 4);
td::int32 left_refcnt_diff = td::as<td::int32>(left.data());
td::int32 right_refcnt_diff = td::as<td::int32>(right.data());
td::int32 total_refcnt_diff = left_refcnt_diff + right_refcnt_diff;
td::as<td::int32>(left.data()) = total_refcnt_diff;
}
std::string CellStorer::serialize_refcnt_diffs(td::int32 refcnt_diff) {
TD_PERF_COUNTER(cell_store_refcnt_diff);
std::string s(4, 0);
td::as<td::int32>(s.data()) = refcnt_diff;
return s;
}
td::Status CellStorer::apply_diff(const Diff &diff) {
switch (diff.type) {
case Diff::Set:
return kv_.set(diff.key.as_slice(), diff.value);
case Diff::Erase:
return kv_.erase(diff.key.as_slice());
case Diff::Merge:
return kv_.merge(diff.key.as_slice(), diff.value);
default:
UNREACHABLE();
}
}
td::Status CellStorer::apply_meta_diff(const MetaDiff &diff) {
switch (diff.type) {
case MetaDiff::Set:
CHECK(diff.key.size() != CellTraits::hash_bytes);
CHECK(!diff.value.empty());
return kv_.set(diff.key, diff.value);
case MetaDiff::Erase:
CHECK(diff.key.size() != CellTraits::hash_bytes);
CHECK(diff.value.empty());
return kv_.erase(diff.key);
default:
UNREACHABLE();
}
}
} // namespace vm } // namespace vm

View file

@ -51,6 +51,9 @@ class CellLoader {
td::Result<LoadResult> load(td::Slice hash, bool need_data, ExtCellCreator &ext_cell_creator); td::Result<LoadResult> load(td::Slice hash, bool need_data, ExtCellCreator &ext_cell_creator);
static td::Result<LoadResult> load(td::Slice hash, td::Slice value, bool need_data, ExtCellCreator &ext_cell_creator); static td::Result<LoadResult> load(td::Slice hash, td::Slice value, bool need_data, ExtCellCreator &ext_cell_creator);
td::Result<LoadResult> load_refcnt(td::Slice hash); // This only loads refcnt_, cell_ == null td::Result<LoadResult> load_refcnt(td::Slice hash); // This only loads refcnt_, cell_ == null
KeyValueReader &key_value_reader() const {
return *reader_;
}
private: private:
std::shared_ptr<KeyValueReader> reader_; std::shared_ptr<KeyValueReader> reader_;
@ -62,8 +65,28 @@ class CellStorer {
CellStorer(KeyValue &kv); CellStorer(KeyValue &kv);
td::Status erase(td::Slice hash); td::Status erase(td::Slice hash);
td::Status set(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc); td::Status set(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc);
td::Status merge(td::Slice hash, td::int32 refcnt_diff);
static void merge_value_and_refcnt_diff(std::string &value, td::Slice right);
static void merge_refcnt_diffs(std::string &left, td::Slice right);
static std::string serialize_refcnt_diffs(td::int32 refcnt_diff);
static std::string serialize_value(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc); static std::string serialize_value(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc);
struct Diff {
enum Type { Set, Erase, Merge } type{Set};
CellHash key;
std::string value{};
};
td::Status apply_diff(const Diff &diff);
struct MetaDiff {
enum Type { Set, Erase } type{Set};
std::string key;
std::string value{};
};
td::Status apply_meta_diff(const MetaDiff &diff);
private: private:
KeyValue &kv_; KeyValue &kv_;
}; };

View file

@ -66,19 +66,27 @@ struct CellInfo {
struct Eq { struct Eq {
using is_transparent = void; // Pred to use using is_transparent = void; // Pred to use
bool operator()(const CellInfo &info, const CellInfo &other_info) const { return info.key() == other_info.key();} bool operator()(const CellInfo &info, const CellInfo &other_info) const {
bool operator()(const CellInfo &info, td::Slice hash) const { return info.key().as_slice() == hash;} return info.key() == other_info.key();
bool operator()(td::Slice hash, const CellInfo &info) const { return info.key().as_slice() == hash;} }
bool operator()(const CellInfo &info, td::Slice hash) const {
return info.key().as_slice() == hash;
}
bool operator()(td::Slice hash, const CellInfo &info) const {
return info.key().as_slice() == hash;
}
}; };
struct Hash { struct Hash {
using is_transparent = void; // Pred to use using is_transparent = void; // Pred to use
using transparent_key_equal = Eq; using transparent_key_equal = Eq;
size_t operator()(td::Slice hash) const { return cell_hash_slice_hash(hash); } size_t operator()(td::Slice hash) const {
size_t operator()(const CellInfo &info) const { return cell_hash_slice_hash(info.key().as_slice());} return cell_hash_slice_hash(hash);
}
size_t operator()(const CellInfo &info) const {
return cell_hash_slice_hash(info.key().as_slice());
}
}; };
}; };
bool operator<(const CellInfo &a, td::Slice b) { bool operator<(const CellInfo &a, td::Slice b) {
return a.key().as_slice() < b; return a.key().as_slice() < b;
} }
@ -99,6 +107,30 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
td::Result<Ref<Cell>> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override { td::Result<Ref<Cell>> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override {
return get_cell_info_lazy(level_mask, hash, depth).cell; return get_cell_info_lazy(level_mask, hash, depth).cell;
} }
td::Result<std::vector<std::pair<std::string, std::string>>> meta_get_all() const override {
std::vector<std::pair<std::string, std::string>> result;
auto s = loader_->key_value_reader().for_each_in_range("desc", "desd",
[&](const td::Slice &key, const td::Slice &value) {
if (td::begins_with(key, "desc") && key.size() != 32) {
result.emplace_back(key.str(), value.str());
}
return td::Status::OK();
});
TRY_STATUS(std::move(s));
return result;
}
td::Result<KeyValue::GetStatus> meta_get(td::Slice key, std::string &value) override {
return loader_->key_value_reader().get(key, value);
}
td::Status meta_set(td::Slice key, td::Slice value) override {
meta_diffs_.push_back(
CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Set, .key = key.str(), .value = value.str()});
return td::Status::OK();
}
td::Status meta_erase(td::Slice key) override {
meta_diffs_.push_back(CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Erase, .key = key.str()});
return td::Status::OK();
}
td::Result<Ref<DataCell>> load_cell(td::Slice hash) override { td::Result<Ref<DataCell>> load_cell(td::Slice hash) override {
auto info = hash_table_.get_if_exists(hash); auto info = hash_table_.get_if_exists(hash);
if (info && info->sync_with_db) { if (info && info->sync_with_db) {
@ -198,21 +230,29 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
if (is_prepared_for_commit()) { if (is_prepared_for_commit()) {
return td::Status::OK(); return td::Status::OK();
} }
td::PerfWarningTimer timer_dfs_new_cells_in_db("dfs_new_cells_in_db");
for (auto &new_cell : to_inc_) { for (auto &new_cell : to_inc_) {
auto &new_cell_info = get_cell_info(new_cell); auto &new_cell_info = get_cell_info(new_cell);
dfs_new_cells_in_db(new_cell_info); dfs_new_cells_in_db(new_cell_info);
} }
timer_dfs_new_cells_in_db.reset();
td::PerfWarningTimer timer_dfs_new_cells("dfs_new_cells");
for (auto &new_cell : to_inc_) { for (auto &new_cell : to_inc_) {
auto &new_cell_info = get_cell_info(new_cell); auto &new_cell_info = get_cell_info(new_cell);
dfs_new_cells(new_cell_info); dfs_new_cells(new_cell_info);
} }
timer_dfs_new_cells.reset();
td::PerfWarningTimer timer_dfs_old_cells("dfs_old_cells");
for (auto &old_cell : to_dec_) { for (auto &old_cell : to_dec_) {
auto &old_cell_info = get_cell_info(old_cell); auto &old_cell_info = get_cell_info(old_cell);
dfs_old_cells(old_cell_info); dfs_old_cells(old_cell_info);
} }
timer_dfs_old_cells.reset();
td::PerfWarningTimer timer_save_diff_prepare("save_diff_prepare");
save_diff_prepare(); save_diff_prepare();
timer_save_diff_prepare.reset();
to_inc_.clear(); to_inc_.clear();
to_dec_.clear(); to_dec_.clear();
@ -222,6 +262,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
td::Status commit(CellStorer &storer) override { td::Status commit(CellStorer &storer) override {
prepare_commit(); prepare_commit();
td::PerfWarningTimer times_save_diff("save diff", 0.01);
save_diff(storer); save_diff(storer);
// Some elements are erased from hash table, to keep it small. // Some elements are erased from hash table, to keep it small.
// Hash table is no longer represents the difference between the loader and // Hash table is no longer represents the difference between the loader and
@ -249,7 +290,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
celldb_compress_depth_ = value; celldb_compress_depth_ = value;
} }
vm::ExtCellCreator& as_ext_cell_creator() override { vm::ExtCellCreator &as_ext_cell_creator() override {
return *this; return *this;
} }
@ -259,6 +300,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
std::vector<Ref<Cell>> to_dec_; std::vector<Ref<Cell>> to_dec_;
CellHashTable<CellInfo> hash_table_; CellHashTable<CellInfo> hash_table_;
std::vector<CellInfo *> visited_; std::vector<CellInfo *> visited_;
std::vector<CellStorer::MetaDiff> meta_diffs_;
Stats stats_diff_; Stats stats_diff_;
td::uint32 celldb_compress_depth_{0}; td::uint32 celldb_compress_depth_{0};
@ -269,8 +311,9 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
class SimpleExtCellCreator : public ExtCellCreator { class SimpleExtCellCreator : public ExtCellCreator {
public: public:
explicit SimpleExtCellCreator(std::shared_ptr<CellDbReader> cell_db_reader) : explicit SimpleExtCellCreator(std::shared_ptr<CellDbReader> cell_db_reader)
cell_db_reader_(std::move(cell_db_reader)) {} : cell_db_reader_(std::move(cell_db_reader)) {
}
td::Result<Ref<Cell>> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override { td::Result<Ref<Cell>> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override {
TRY_RESULT(ext_cell, DynamicBocExtCell::create(PrunnedCellInfo{level_mask, hash, depth}, TRY_RESULT(ext_cell, DynamicBocExtCell::create(PrunnedCellInfo{level_mask, hash, depth},
@ -279,7 +322,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
return std::move(ext_cell); return std::move(ext_cell);
} }
std::vector<Ref<Cell>>& get_created_cells() { std::vector<Ref<Cell>> &get_created_cells() {
return created_cells_; return created_cells_;
} }
@ -382,8 +425,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
} }
bool not_in_db = false; bool not_in_db = false;
for_each( for_each(info, [&not_in_db, this](auto &child_info) { not_in_db |= !dfs_new_cells_in_db(child_info); }, false);
info, [&not_in_db, this](auto &child_info) { not_in_db |= !dfs_new_cells_in_db(child_info); }, false);
if (not_in_db) { if (not_in_db) {
CHECK(!info.in_db); CHECK(!info.in_db);
@ -441,6 +483,10 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
for (auto info_ptr : visited_) { for (auto info_ptr : visited_) {
save_cell(*info_ptr, storer); save_cell(*info_ptr, storer);
} }
for (auto meta_diff : meta_diffs_) {
storer.apply_meta_diff(meta_diff);
}
meta_diffs_.clear();
visited_.clear(); visited_.clear();
} }
@ -558,6 +604,8 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
} }
auto res = r_res.move_as_ok(); auto res = r_res.move_as_ok();
if (res.status != CellLoader::LoadResult::Ok) { if (res.status != CellLoader::LoadResult::Ok) {
LOG_CHECK(info.cell.not_null()) << "Trying to load nonexistent cell from db "
<< CellHash::from_slice(hash).to_hex();
break; break;
} }
info.cell = std::move(res.cell()); info.cell = std::move(res.cell());
@ -651,7 +699,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
CellHashTable<CellInfo2> cells_; CellHashTable<CellInfo2> cells_;
std::queue<CellInfo2*> load_queue_; std::queue<CellInfo2 *> load_queue_;
td::uint32 active_load_ = 0; td::uint32 active_load_ = 0;
td::uint32 max_parallel_load_ = 4; td::uint32 max_parallel_load_ = 4;
}; };
@ -814,11 +862,10 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
pca_state_->promise_.set_result(td::Unit()); pca_state_->promise_.set_result(td::Unit());
pca_state_ = {}; pca_state_ = {};
} }
}; };
} // namespace } // namespace
std::unique_ptr<DynamicBagOfCellsDb> DynamicBagOfCellsDb::create() { std::unique_ptr<DynamicBagOfCellsDb> DynamicBagOfCellsDb::create(CreateV1Options) {
return std::make_unique<DynamicBagOfCellsDbImpl>(); return std::make_unique<DynamicBagOfCellsDbImpl>();
} }
} // namespace vm } // namespace vm

View file

@ -17,6 +17,7 @@
Copyright 2017-2020 Telegram Systems LLP Copyright 2017-2020 Telegram Systems LLP
*/ */
#pragma once #pragma once
#include "td/db/KeyValue.h"
#include "vm/cells.h" #include "vm/cells.h"
#include "td/utils/Slice.h" #include "td/utils/Slice.h"
@ -49,13 +50,23 @@ class CellDbReader {
class DynamicBagOfCellsDb { class DynamicBagOfCellsDb {
public: public:
virtual ~DynamicBagOfCellsDb() = default; virtual ~DynamicBagOfCellsDb() = default;
virtual td::Result<std::vector<std::pair<std::string, std::string>>> meta_get_all() const = 0;
virtual td::Result<td::KeyValue::GetStatus> meta_get(td::Slice key, std::string &value) = 0;
virtual td::Status meta_set(td::Slice key, td::Slice value) = 0;
virtual td::Status meta_erase(td::Slice key) = 0;
virtual td::Result<Ref<DataCell>> load_cell(td::Slice hash) = 0; virtual td::Result<Ref<DataCell>> load_cell(td::Slice hash) = 0;
virtual td::Result<Ref<DataCell>> load_root(td::Slice hash) = 0; virtual td::Result<Ref<DataCell>> load_root(td::Slice hash) = 0;
virtual td::Result<Ref<DataCell>> load_root_thread_safe(td::Slice hash) const = 0; virtual td::Result<Ref<DataCell>> load_root_thread_safe(td::Slice hash) const = 0;
virtual td::Result<std::vector<Ref<DataCell>>> load_known_roots() const {
return std::vector<Ref<DataCell>>();
}
struct Stats { struct Stats {
td::int64 roots_total_count{0}; td::int64 roots_total_count{0};
td::int64 cells_total_count{0}; td::int64 cells_total_count{0};
td::int64 cells_total_size{0}; td::int64 cells_total_size{0};
td::NamedStats named_stats;
std::vector<std::pair<std::string, std::string>> custom_stats; std::vector<std::pair<std::string, std::string>> custom_stats;
void apply_diff(const Stats &diff) { void apply_diff(const Stats &diff) {
roots_total_count += diff.roots_total_count; roots_total_count += diff.roots_total_count;
@ -64,6 +75,20 @@ class DynamicBagOfCellsDb {
CHECK(roots_total_count >= 0); CHECK(roots_total_count >= 0);
CHECK(cells_total_count >= 0); CHECK(cells_total_count >= 0);
CHECK(cells_total_size >= 0); CHECK(cells_total_size >= 0);
named_stats.apply_diff(diff.named_stats);
}
friend td::StringBuilder &operator<<(td::StringBuilder &sb, const Stats &stats) {
sb << "STATS\n";
for (auto &p : stats.custom_stats) {
sb << "\t" << p.first << "\t" << p.second << "\n";
}
for (auto &p : stats.named_stats.stats_int) {
sb << "\t" << p.first << "\t" << p.second << "\n";
}
for (auto &p : stats.named_stats.stats_str) {
sb << "\t" << p.first << "\t" << p.second << "\n";
}
return sb;
} }
}; };
virtual void inc(const Ref<Cell> &old_root) = 0; virtual void inc(const Ref<Cell> &old_root) = 0;
@ -72,7 +97,7 @@ class DynamicBagOfCellsDb {
virtual td::Status prepare_commit() = 0; virtual td::Status prepare_commit() = 0;
virtual Stats get_stats_diff() = 0; virtual Stats get_stats_diff() = 0;
virtual td::Result<Stats> get_stats() { virtual td::Result<Stats> get_stats() {
return td::Status::Error("Not implemented"); return Stats{};
} }
virtual td::Status commit(CellStorer &) = 0; virtual td::Status commit(CellStorer &) = 0;
virtual std::shared_ptr<CellDbReader> get_cell_db_reader() = 0; virtual std::shared_ptr<CellDbReader> get_cell_db_reader() = 0;
@ -83,26 +108,50 @@ class DynamicBagOfCellsDb {
virtual void set_celldb_compress_depth(td::uint32 value) = 0; virtual void set_celldb_compress_depth(td::uint32 value) = 0;
virtual vm::ExtCellCreator &as_ext_cell_creator() = 0; virtual vm::ExtCellCreator &as_ext_cell_creator() = 0;
static std::unique_ptr<DynamicBagOfCellsDb> create();
struct CreateInMemoryOptions {
size_t extra_threads{std::thread::hardware_concurrency()};
bool verbose{true};
// Allocated DataCels will never be deleted
bool use_arena{false};
// Almost no overhead in memory during creation, but will scan database twice
bool use_less_memory_during_creation{true};
};
static std::unique_ptr<DynamicBagOfCellsDb> create_in_memory(td::KeyValueReader *kv, CreateInMemoryOptions options);
class AsyncExecutor { class AsyncExecutor {
public: public:
virtual ~AsyncExecutor() { virtual ~AsyncExecutor() {
} }
virtual void execute_async(std::function<void()> f) = 0; virtual void execute_async(std::function<void()> f) = 0;
virtual void execute_sync(std::function<void()> f) = 0; virtual void execute_sync(std::function<void()> f) = 0;
virtual std::string describe() const {
return "AsyncExecutor";
}
}; };
struct CreateV1Options {
friend td::StringBuilder &operator<<(td::StringBuilder &sb, const CreateV1Options &options) {
return sb << "V1{}";
}
};
static std::unique_ptr<DynamicBagOfCellsDb> create(CreateV1Options = {});
struct CreateV2Options {
size_t extra_threads{std::thread::hardware_concurrency()};
std::shared_ptr<AsyncExecutor> executor{};
size_t cache_ttl_max{2000};
size_t cache_size_max{1000000};
friend td::StringBuilder &operator<<(td::StringBuilder &sb, const CreateV2Options &options) {
return sb << "V2{extra_threads=" << options.extra_threads << ", cache_ttl_max=" << options.cache_ttl_max
<< ", cache_size_max=" << options.cache_size_max << "}";
}
};
static std::unique_ptr<DynamicBagOfCellsDb> create_v2(CreateV2Options options);
struct CreateInMemoryOptions {
size_t extra_threads{std::thread::hardware_concurrency()};
bool verbose{true};
// Allocated DataCells will never be deleted
bool use_arena{false};
// Almost no overhead in memory during creation, but will scan database twice
bool use_less_memory_during_creation{true};
friend td::StringBuilder &operator<<(td::StringBuilder &sb, const CreateInMemoryOptions &options) {
return sb << "InMemory{extra_threads=" << options.extra_threads << ", use_arena=" << options.use_arena
<< ", use_less_memory_during_creation=" << options.use_less_memory_during_creation << "}";
}
};
static std::unique_ptr<DynamicBagOfCellsDb> create_in_memory(td::KeyValueReader *kv, CreateInMemoryOptions options);
virtual void load_cell_async(td::Slice hash, std::shared_ptr<AsyncExecutor> executor, virtual void load_cell_async(td::Slice hash, std::shared_ptr<AsyncExecutor> executor,
td::Promise<Ref<DataCell>> promise) = 0; td::Promise<Ref<DataCell>> promise) = 0;
virtual void prepare_commit_async(std::shared_ptr<AsyncExecutor> executor, td::Promise<td::Unit> promise) = 0; virtual void prepare_commit_async(std::shared_ptr<AsyncExecutor> executor, td::Promise<td::Unit> promise) = 0;

File diff suppressed because it is too large Load diff

View file

@ -413,6 +413,7 @@ class CellStorage {
size_t dense_ht_size = 0; size_t dense_ht_size = 0;
size_t new_ht_size = 0; size_t new_ht_size = 0;
for_each_bucket(0, [&](auto bucket_id, CellBucket &bucket) { for_each_bucket(0, [&](auto bucket_id, CellBucket &bucket) {
// TODO: this leads to CE when use_dense_hash_map == false
dense_ht_capacity += bucket.infos_.dense_ht_values_.size(); dense_ht_capacity += bucket.infos_.dense_ht_values_.size();
dense_ht_size += bucket.infos_.dense_ht_size_; dense_ht_size += bucket.infos_.dense_ht_size_;
new_ht_capacity += bucket.infos_.new_ht_.bucket_count(); new_ht_capacity += bucket.infos_.new_ht_.bucket_count();
@ -468,6 +469,14 @@ class CellStorage {
} }
return td::Status::Error("not found"); return td::Status::Error("not found");
} }
td::Result<std::vector<Ref<DataCell>>> load_known_roots_local() const {
auto lock = local_access_.lock();
std::vector<Ref<DataCell>> result;
for (auto &root : roots_) {
result.emplace_back(root);
}
return result;
}
td::Result<Ref<DataCell>> load_root_shared(const CellHash &hash) const { td::Result<Ref<DataCell>> load_root_shared(const CellHash &hash) const {
std::lock_guard<std::mutex> lock(root_mutex_); std::lock_guard<std::mutex> lock(root_mutex_);
if (auto it = roots_.find(hash); it != roots_.end()) { if (auto it = roots_.find(hash); it != roots_.end()) {
@ -620,7 +629,7 @@ class CellStorage {
sb << "\n\t" << key << "=" << value; sb << "\n\t" << key << "=" << value;
} }
LOG_IF(ERROR, desc_count != 0 && desc_count != stats.roots_total_count + 1) LOG_IF(ERROR, desc_count != 0 && desc_count != stats.roots_total_count + 1)
<< "desc<> keys count is " << desc_count << " wich is different from roots count " << stats.roots_total_count; << "desc<> keys count is " << desc_count << " which is different from roots count " << stats.roots_total_count;
LOG_IF(WARNING, verbose) LOG_IF(WARNING, verbose)
<< P << "done in " << full_timer.elapsed() << "\n\troots_count=" << stats.roots_total_count << "\n\t" << P << "done in " << full_timer.elapsed() << "\n\troots_count=" << stats.roots_total_count << "\n\t"
<< desc_count << "\n\tcells_count=" << stats.cells_total_count << desc_count << "\n\tcells_count=" << stats.cells_total_count
@ -757,15 +766,77 @@ class CellStorage {
} }
}; };
class MetaStorage {
public:
explicit MetaStorage(std::vector<std::pair<std::string, std::string>> values)
: meta_(std::move_iterator(values.begin()), std::move_iterator(values.end())) {
for (auto &p : meta_) {
CHECK(p.first.size() != CellTraits::hash_bytes);
}
}
std::vector<std::pair<std::string, std::string>> meta_get_all() const {
return td::transform(meta_, [](const auto &p) { return std::make_pair(p.first, p.second); });
}
KeyValue::GetStatus meta_get(td::Slice key, std::string &value) const {
auto lock = local_access_.lock();
auto it = meta_.find(key.str());
if (it == meta_.end()) {
return KeyValue::GetStatus::NotFound;
}
value = it->second;
return KeyValue::GetStatus::Ok;
}
void meta_set(td::Slice key, td::Slice value) {
auto lock = local_access_.lock();
meta_[key.str()] = value.str();
meta_diffs_.push_back(
CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Set, .key = key.str(), .value = value.str()});
}
void meta_erase(td::Slice key) {
auto lock = local_access_.lock();
meta_.erase(key.str());
meta_diffs_.push_back(CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Erase, .key = key.str()});
}
std::vector<CellStorer::MetaDiff> extract_diffs() {
auto lock = local_access_.lock();
return std::move(meta_diffs_);
}
private:
mutable UniqueAccess local_access_;
std::unordered_map<std::string, std::string> meta_;
std::vector<CellStorer::MetaDiff> meta_diffs_;
};
class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb { class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb {
public: public:
explicit InMemoryBagOfCellsDb(td::unique_ptr<CellStorage> storage) : storage_(std::move(storage)) { explicit InMemoryBagOfCellsDb(td::unique_ptr<CellStorage> storage, td::unique_ptr<MetaStorage> meta_storage)
: storage_(std::move(storage)), meta_storage_(std::move(meta_storage)) {
}
td::Result<std::vector<std::pair<std::string, std::string>>> meta_get_all() const override {
return meta_storage_->meta_get_all();
}
td::Result<KeyValue::GetStatus> meta_get(td::Slice key, std::string &value) override {
CHECK(key.size() != CellTraits::hash_bytes);
return meta_storage_->meta_get(key, value);
}
td::Status meta_set(td::Slice key, td::Slice value) override {
meta_storage_->meta_set(key, value);
return td::Status::OK();
}
td::Status meta_erase(td::Slice key) override {
meta_storage_->meta_erase(key);
return td::Status::OK();
} }
td::Result<Ref<DataCell>> load_cell(td::Slice hash) override { td::Result<Ref<DataCell>> load_cell(td::Slice hash) override {
return storage_->load_cell(CellHash::from_slice(hash)); return storage_->load_cell(CellHash::from_slice(hash));
} }
td::Result<std::vector<Ref<DataCell>>> load_known_roots() const override {
return storage_->load_known_roots_local();
}
td::Result<Ref<DataCell>> load_root(td::Slice hash) override { td::Result<Ref<DataCell>> load_root(td::Slice hash) override {
return storage_->load_root_local(CellHash::from_slice(hash)); return storage_->load_root_local(CellHash::from_slice(hash));
} }
@ -798,29 +869,37 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb {
TRY_STATUS(prepare_commit()); TRY_STATUS(prepare_commit());
} }
td::PerfWarningTimer times_save_diff("save diff");
Stats diff; Stats diff;
CHECK(to_dec_.empty()); CHECK(to_dec_.empty());
for (auto &it : info_) { for (auto &info : info_) {
auto &info = it.second;
if (info.diff_refcnt == 0) { if (info.diff_refcnt == 0) {
continue; continue;
} }
auto refcnt = td::narrow_cast<td::int32>(static_cast<td::int64>(info.db_refcnt) + info.diff_refcnt); auto refcnt = td::narrow_cast<td::int32>(static_cast<td::int64>(info.db_refcnt) + info.diff_refcnt);
CHECK(refcnt >= 0); LOG_CHECK(refcnt >= 0) << info.db_refcnt << " + " << info.diff_refcnt;
if (refcnt > 0) { if (refcnt > 0) {
cell_storer.set(refcnt, info.cell, false); if (info.db_refcnt == 0) {
TRY_STATUS(cell_storer.set(refcnt, info.cell, false));
} else {
TRY_STATUS(cell_storer.merge(info.cell->get_hash().as_slice(), info.diff_refcnt));
}
storage_->set(refcnt, info.cell); storage_->set(refcnt, info.cell);
if (info.db_refcnt == 0) { if (info.db_refcnt == 0) {
diff.cells_total_count++; diff.cells_total_count++;
diff.cells_total_size += static_cast<td::int64>(info.cell->get_storage_size()); diff.cells_total_size += static_cast<td::int64>(info.cell->get_storage_size());
} }
} else { } else {
cell_storer.erase(info.cell->get_hash().as_slice()); TRY_STATUS(cell_storer.erase(info.cell->get_hash().as_slice()));
storage_->erase(info.cell->get_hash()); storage_->erase(info.cell->get_hash());
diff.cells_total_count--; diff.cells_total_count--;
diff.cells_total_size -= static_cast<td::int64>(info.cell->get_storage_size()); diff.cells_total_size -= static_cast<td::int64>(info.cell->get_storage_size());
} }
} }
auto meta_diffs = meta_storage_->extract_diffs();
for (const auto &meta_diff : meta_diffs) {
TRY_STATUS(cell_storer.apply_meta_diff(meta_diff));
}
storage_->apply_stats_diff(diff); storage_->apply_stats_diff(diff);
info_ = {}; info_ = {};
return td::Status::OK(); return td::Status::OK();
@ -872,13 +951,39 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb {
private: private:
td::unique_ptr<CellStorage> storage_; td::unique_ptr<CellStorage> storage_;
td::unique_ptr<MetaStorage> meta_storage_;
struct Info { struct Info {
td::int32 db_refcnt{0}; mutable td::int32 db_refcnt{0};
td::int32 diff_refcnt{0}; mutable td::int32 diff_refcnt{0};
Ref<DataCell> cell; Ref<DataCell> cell;
vm::CellHash key() const {
return cell->get_hash();
}
struct Eq {
using is_transparent = void; // Pred to use
bool operator()(const Info &info, const Info &other_info) const {
return info.key() == other_info.key();
}
bool operator()(const Info &info, td::Slice hash) const {
return info.key().as_slice() == hash;
}
bool operator()(td::Slice hash, const Info &info) const {
return info.key().as_slice() == hash;
}
};
struct Hash {
using is_transparent = void; // Pred to use
using transparent_key_equal = Eq;
size_t operator()(td::Slice hash) const {
return cell_hash_slice_hash(hash);
}
size_t operator()(const Info &info) const {
return cell_hash_slice_hash(info.key().as_slice());
}
};
}; };
td::HashMap<CellHash, Info> info_; td::HashSet<Info, Info::Hash, Info::Eq> info_;
std::unique_ptr<CellLoader> loader_; std::unique_ptr<CellLoader> loader_;
std::vector<Ref<Cell>> to_inc_; std::vector<Ref<Cell>> to_inc_;
@ -886,13 +991,13 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb {
Ref<DataCell> do_inc(Ref<Cell> cell) { Ref<DataCell> do_inc(Ref<Cell> cell) {
auto cell_hash = cell->get_hash(); auto cell_hash = cell->get_hash();
if (auto it = info_.find(cell_hash); it != info_.end()) { if (auto it = info_.find(cell_hash.as_slice()); it != info_.end()) {
CHECK(it->second.diff_refcnt != std::numeric_limits<td::int32>::max()); CHECK(it->diff_refcnt != std::numeric_limits<td::int32>::max());
it->second.diff_refcnt++; it->diff_refcnt++;
return it->second.cell; return it->cell;
} }
if (auto o_info = storage_->get_info(cell_hash)) { if (auto o_info = storage_->get_info(cell_hash)) {
info_.emplace(cell_hash, Info{.db_refcnt = o_info->db_refcnt, .diff_refcnt = 1, .cell = o_info->cell}); info_.emplace(Info{.db_refcnt = o_info->db_refcnt, .diff_refcnt = 1, .cell = o_info->cell});
return std::move(o_info->cell); return std::move(o_info->cell);
} }
@ -905,21 +1010,21 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb {
} }
auto res = cb.finalize(cs.is_special()); auto res = cb.finalize(cs.is_special());
CHECK(res->get_hash() == cell_hash); CHECK(res->get_hash() == cell_hash);
info_.emplace(cell_hash, Info{.db_refcnt = 0, .diff_refcnt = 1, .cell = res}); info_.emplace(Info{.db_refcnt = 0, .diff_refcnt = 1, .cell = res});
return res; return res;
} }
void do_dec(Ref<Cell> cell) { void do_dec(Ref<Cell> cell) {
auto cell_hash = cell->get_hash(); auto cell_hash = cell->get_hash();
auto it = info_.find(cell_hash); auto it = info_.find(cell_hash.as_slice());
if (it != info_.end()) { if (it != info_.end()) {
CHECK(it->second.diff_refcnt != std::numeric_limits<td::int32>::min()); CHECK(it->diff_refcnt != std::numeric_limits<td::int32>::min());
--it->second.diff_refcnt; --it->diff_refcnt;
} else { } else {
auto info = *storage_->get_info(cell_hash); auto info = *storage_->get_info(cell_hash);
it = info_.emplace(cell_hash, Info{.db_refcnt = info.db_refcnt, .diff_refcnt = -1, .cell = info.cell}).first; it = info_.emplace(Info{.db_refcnt = info.db_refcnt, .diff_refcnt = -1, .cell = info.cell}).first;
} }
if (it->second.diff_refcnt + it->second.db_refcnt != 0) { if (it->diff_refcnt + it->db_refcnt != 0) {
return; return;
} }
CellSlice cs(NoVm{}, std::move(cell)); CellSlice cs(NoVm{}, std::move(cell));
@ -936,7 +1041,8 @@ std::unique_ptr<DynamicBagOfCellsDb> DynamicBagOfCellsDb::create_in_memory(td::K
if (kv == nullptr) { if (kv == nullptr) {
LOG_IF(WARNING, options.verbose) << "Create empty in-memory cells database (no key value is given)"; LOG_IF(WARNING, options.verbose) << "Create empty in-memory cells database (no key value is given)";
auto storage = CellStorage::build(options, [](auto, auto, auto) { return std::make_pair(0, 0); }); auto storage = CellStorage::build(options, [](auto, auto, auto) { return std::make_pair(0, 0); });
return std::make_unique<InMemoryBagOfCellsDb>(std::move(storage)); auto meta_storage = td::make_unique<MetaStorage>(std::vector<std::pair<std::string, std::string>>{});
return std::make_unique<InMemoryBagOfCellsDb>(std::move(storage), std::move(meta_storage));
} }
std::vector<std::string> keys; std::vector<std::string> keys;
@ -962,6 +1068,9 @@ std::unique_ptr<DynamicBagOfCellsDb> DynamicBagOfCellsDb::create_in_memory(td::K
local_desc_count++; local_desc_count++;
return td::Status::OK(); return td::Status::OK();
} }
if (key.size() != 32) {
return td::Status::OK();
}
auto r_res = CellLoader::load(key, value.str(), true, pc_creator); auto r_res = CellLoader::load(key, value.str(), true, pc_creator);
if (r_res.is_error()) { if (r_res.is_error()) {
LOG(ERROR) << r_res.error() << " at " << td::format::escaped(key); LOG(ERROR) << r_res.error() << " at " << td::format::escaped(key);
@ -983,6 +1092,24 @@ std::unique_ptr<DynamicBagOfCellsDb> DynamicBagOfCellsDb::create_in_memory(td::K
}; };
auto storage = CellStorage::build(options, parallel_scan_cells); auto storage = CellStorage::build(options, parallel_scan_cells);
return std::make_unique<InMemoryBagOfCellsDb>(std::move(storage));
std::vector<std::pair<std::string, std::string>> meta;
// NB: it scans 1/(2^32) of the database which is not much
kv->for_each_in_range("desc", "desd", [&meta](td::Slice key, td::Slice value) {
if (key.size() != 32) {
meta.emplace_back(key.str(), value.str());
}
return td::Status::OK();
});
// this is for tests mostly. desc* keys are expected to correspond to roots
kv->for_each_in_range("meta", "metb", [&meta](td::Slice key, td::Slice value) {
if (key.size() != 32) {
meta.emplace_back(key.str(), value.str());
}
return td::Status::OK();
});
auto meta_storage = td::make_unique<MetaStorage>(std::move(meta));
return std::make_unique<InMemoryBagOfCellsDb>(std::move(storage), std::move(meta_storage));
} }
} // namespace vm } // namespace vm

View file

@ -40,6 +40,9 @@ class RootCell : public Cell {
struct PrivateTag {}; struct PrivateTag {};
public: public:
td::Status set_data_cell(Ref<DataCell> &&data_cell) const override {
return cell_->set_data_cell(std::move(data_cell));
}
td::Result<LoadedCell> load_cell() const override { td::Result<LoadedCell> load_cell() const override {
return cell_->load_cell(); return cell_->load_cell();
} }
@ -94,11 +97,11 @@ class DataCellCacheNoop {
class DataCellCacheMutex { class DataCellCacheMutex {
public: public:
Ref<DataCell> store(int idx, Ref<DataCell> cell) { Ref<DataCell> store(int idx, Ref<DataCell> cell) {
auto lock = cells_rw_mutex_.lock_write(); std::lock_guard lock(mutex_);
return cells_.emplace(idx, std::move(cell)).first->second; return cells_.emplace(idx, std::move(cell)).first->second;
} }
Ref<DataCell> load(int idx) { Ref<DataCell> load(int idx) {
auto lock = cells_rw_mutex_.lock_read(); std::lock_guard lock(mutex_);
auto it = cells_.find(idx); auto it = cells_.find(idx);
if (it != cells_.end()) { if (it != cells_.end()) {
return it->second; return it->second;
@ -106,12 +109,13 @@ class DataCellCacheMutex {
return {}; return {};
} }
void clear() { void clear() {
auto guard = cells_rw_mutex_.lock_write(); std::lock_guard lock(mutex_);
cells_.clear(); cells_.clear();
} }
private: private:
td::RwMutex cells_rw_mutex_; std::mutex mutex_;
// NB: in case of high contention, one should use multiple buckets with per bucket mutexes
td::HashMap<int, Ref<DataCell>> cells_; td::HashMap<int, Ref<DataCell>> cells_;
}; };
@ -246,7 +250,7 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb {
BagOfCells::Info info_; BagOfCells::Info info_;
std::mutex index_i_mutex_; std::mutex index_i_mutex_;
td::RwMutex index_data_rw_mutex_; std::mutex index_mutex_;
std::string index_data_; std::string index_data_;
std::atomic<int> index_i_{0}; std::atomic<int> index_i_{0};
size_t index_offset_{0}; size_t index_offset_{0};
@ -319,7 +323,7 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb {
info_.index_offset + (td::int64)idx * info_.offset_byte_size)); info_.index_offset + (td::int64)idx * info_.offset_byte_size));
offset_view = new_offset_view; offset_view = new_offset_view;
} else { } else {
guard = index_data_rw_mutex_.lock_read().move_as_ok(); std::lock_guard guard(index_mutex_);
offset_view = td::Slice(index_data_).substr((td::int64)idx * info_.offset_byte_size, info_.offset_byte_size); offset_view = td::Slice(index_data_).substr((td::int64)idx * info_.offset_byte_size, info_.offset_byte_size);
} }
@ -432,7 +436,7 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb {
} }
td::uint8 tmp[8]; td::uint8 tmp[8];
info_.write_offset(tmp, index_offset_); info_.write_offset(tmp, index_offset_);
auto guard = index_data_rw_mutex_.lock_write(); std::lock_guard guard(index_mutex_);
index_data_.append(reinterpret_cast<const char*>(tmp), info_.offset_byte_size); index_data_.append(reinterpret_cast<const char*>(tmp), info_.offset_byte_size);
} }
return td::Status::OK(); return td::Status::OK();

View file

@ -20,19 +20,51 @@
#include "td/utils/Status.h" #include "td/utils/Status.h"
#include "td/utils/Time.h" #include "td/utils/Time.h"
#include "td/utils/logging.h" #include "td/utils/logging.h"
#include "td/utils/ThreadSafeCounter.h"
#include <functional> #include <functional>
namespace td { namespace td {
struct UsageStats {
size_t get_count{};
size_t get_found_count{};
size_t get_not_found_count{};
size_t set_count{};
UsageStats operator+(const UsageStats& other) const {
return UsageStats{.get_count = get_count + other.get_count,
.get_found_count = get_found_count + other.get_found_count,
.get_not_found_count = get_not_found_count + other.get_not_found_count,
.set_count = set_count + other.set_count};
}
UsageStats operator-(const UsageStats& other) const {
return UsageStats{.get_count = get_count - other.get_count,
.get_found_count = get_found_count - other.get_found_count,
.get_not_found_count = get_not_found_count - other.get_not_found_count,
.set_count = set_count - other.set_count};
}
NamedStats to_named_stats() const {
NamedStats ns;
ns.stats_int["usage_get_count"] += get_count;
ns.stats_int["usage_get_found_count"] += get_found_count;
ns.stats_int["usage_get_not_found_count"] += get_not_found_count;
ns.stats_int["usage_set_count"] += set_count;
return ns;
}
};
inline td::StringBuilder& operator<<(td::StringBuilder& sb, const UsageStats& stats) {
sb << "get: " << stats.get_count << ", +" << stats.get_found_count << ", -" << stats.get_not_found_count;
return sb;
}
class KeyValueReader { class KeyValueReader {
public: public:
virtual ~KeyValueReader() = default; virtual ~KeyValueReader() = default;
enum class GetStatus : int32 { Ok, NotFound }; enum class GetStatus : int32 { Ok, NotFound };
virtual Result<GetStatus> get(Slice key, std::string &value) = 0; virtual Result<GetStatus> get(Slice key, std::string& value) = 0;
virtual Result<size_t> count(Slice prefix) = 0; virtual Result<size_t> count(Slice prefix) = 0;
virtual Status for_each(std::function<Status(Slice, Slice)> f) { virtual Status for_each(std::function<Status(Slice, Slice)> f) {
return Status::Error("for_each is not supported"); return Status::Error("for_each is not supported");
} }
virtual Status for_each_in_range (Slice begin, Slice end, std::function<Status(Slice, Slice)> f) { virtual Status for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) {
return td::Status::Error("foreach_range is not supported"); return td::Status::Error("foreach_range is not supported");
} }
}; };
@ -42,7 +74,7 @@ class PrefixedKeyValueReader : public KeyValueReader {
PrefixedKeyValueReader(std::shared_ptr<KeyValueReader> reader, Slice prefix) PrefixedKeyValueReader(std::shared_ptr<KeyValueReader> reader, Slice prefix)
: reader_(std::move(reader)), prefix_(prefix.str()) { : reader_(std::move(reader)), prefix_(prefix.str()) {
} }
Result<GetStatus> get(Slice key, std::string &value) override { Result<GetStatus> get(Slice key, std::string& value) override {
return reader_->get(PSLICE() << prefix_ << key, value); return reader_->get(PSLICE() << prefix_ << key, value);
} }
Result<size_t> count(Slice prefix) override { Result<size_t> count(Slice prefix) override {
@ -54,14 +86,16 @@ class PrefixedKeyValueReader : public KeyValueReader {
std::string prefix_; std::string prefix_;
}; };
class KeyValueUtils {
public:
};
class KeyValue : public KeyValueReader { class KeyValue : public KeyValueReader {
public: public:
virtual Status set(Slice key, Slice value) = 0; virtual Status set(Slice key, Slice value) = 0;
virtual Status erase(Slice key) = 0; virtual Status erase(Slice key) = 0;
virtual Status merge(Slice key, Slice value) {
return Status::Error("merge is not supported");
}
virtual Status run_gc() {
return Status::OK();
}
virtual Status begin_write_batch() = 0; virtual Status begin_write_batch() = 0;
virtual Status commit_write_batch() = 0; virtual Status commit_write_batch() = 0;
@ -80,12 +114,15 @@ class KeyValue : public KeyValueReader {
virtual Status flush() { virtual Status flush() {
return Status::OK(); return Status::OK();
} }
virtual UsageStats get_usage_stats() {
return {};
}
}; };
class PrefixedKeyValue : public KeyValue { class PrefixedKeyValue : public KeyValue {
public: public:
PrefixedKeyValue(std::shared_ptr<KeyValue> kv, Slice prefix) : kv_(std::move(kv)), prefix_(prefix.str()) { PrefixedKeyValue(std::shared_ptr<KeyValue> kv, Slice prefix) : kv_(std::move(kv)), prefix_(prefix.str()) {
} }
Result<GetStatus> get(Slice key, std::string &value) override { Result<GetStatus> get(Slice key, std::string& value) override {
return kv_->get(PSLICE() << prefix_ << key, value); return kv_->get(PSLICE() << prefix_ << key, value);
} }
Result<size_t> count(Slice prefix) override { Result<size_t> count(Slice prefix) override {

View file

@ -22,57 +22,99 @@
namespace td { namespace td {
Result<MemoryKeyValue::GetStatus> MemoryKeyValue::get(Slice key, std::string &value) { Result<MemoryKeyValue::GetStatus> MemoryKeyValue::get(Slice key, std::string &value) {
auto it = map_.find(key); auto bucket = lock(key);
if (it == map_.end()) { auto &map = bucket->map;
usage_stats_.get_count++;
auto it = map.find(key);
if (it == map.end()) {
usage_stats_.get_not_found_count++;
return GetStatus::NotFound; return GetStatus::NotFound;
} }
value = it->second; value = it->second;
usage_stats_.get_found_count++;
return GetStatus::Ok; return GetStatus::Ok;
} }
std::unique_ptr<MemoryKeyValue::Bucket, MemoryKeyValue::Unlock> MemoryKeyValue::lock(td::Slice key) {
auto bucket_id = std::hash<std::string_view>()(std::string_view(key.data(), key.size())) % buckets_.size();
return lock(buckets_[bucket_id]);
}
Status MemoryKeyValue::for_each(std::function<Status(Slice, Slice)> f) { Status MemoryKeyValue::for_each(std::function<Status(Slice, Slice)> f) {
for (auto &it : map_) { for (auto &unlocked_bucket : buckets_) {
TRY_STATUS(f(it.first, it.second)); auto bucket = lock(unlocked_bucket);
for (auto &it : bucket->map) {
TRY_STATUS(f(it.first, it.second));
}
} }
return Status::OK(); return Status::OK();
} }
Status MemoryKeyValue::for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) { Status MemoryKeyValue::for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) {
for (auto it = map_.lower_bound(begin); it != map_.end(); it++) { for (auto &unlocked_bucket : buckets_) {
if (it->first < end) { auto bucket = lock(unlocked_bucket);
TRY_STATUS(f(it->first, it->second)); auto &map = bucket->map;
} else { for (auto it = map.lower_bound(begin); it != map.end(); it++) {
break; if (it->first < end) {
TRY_STATUS(f(it->first, it->second));
} else {
break;
}
} }
} }
return Status::OK(); return Status::OK();
} }
Status MemoryKeyValue::set(Slice key, Slice value) { Status MemoryKeyValue::set(Slice key, Slice value) {
map_[key.str()] = value.str(); auto bucket = lock(key);
auto &map = bucket->map;
usage_stats_.set_count++;
map[key.str()] = value.str();
return Status::OK(); return Status::OK();
} }
Status MemoryKeyValue::merge(Slice key, Slice update) {
CHECK(merger_);
auto bucket = lock(key);
auto &map = bucket->map;
auto &value = map[key.str()];
merger_->merge_value_and_update(value, update);
if (value.empty()) {
map.erase(key.str());
}
return td::Status::OK();
}
Status MemoryKeyValue::erase(Slice key) { Status MemoryKeyValue::erase(Slice key) {
auto it = map_.find(key); auto bucket = lock(key);
if (it != map_.end()) { auto &map = bucket->map;
map_.erase(it); auto it = map.find(key);
if (it != map.end()) {
map.erase(it);
} }
return Status::OK(); return Status::OK();
} }
Result<size_t> MemoryKeyValue::count(Slice prefix) { Result<size_t> MemoryKeyValue::count(Slice prefix) {
size_t res = 0; size_t res = 0;
for (auto it = map_.lower_bound(prefix); it != map_.end(); it++) { for (auto &unlocked_bucket : buckets_) {
if (Slice(it->first).truncate(prefix.size()) != prefix) { auto bucket = lock(unlocked_bucket);
break; auto &map = bucket->map;
for (auto it = map.lower_bound(prefix); it != map.end(); it++) {
if (Slice(it->first).truncate(prefix.size()) != prefix) {
break;
}
res++;
} }
res++;
} }
return res; return res;
} }
std::unique_ptr<KeyValueReader> MemoryKeyValue::snapshot() { std::unique_ptr<KeyValueReader> MemoryKeyValue::snapshot() {
auto res = std::make_unique<MemoryKeyValue>(); auto res = std::make_unique<MemoryKeyValue>();
res->map_ = map_; for (size_t i = 0; i < buckets_.size(); i++) {
auto bucket = lock(buckets_[i]);
res->buckets_[i].map = bucket->map;
}
return std::move(res); return std::move(res);
} }
@ -80,10 +122,10 @@ std::string MemoryKeyValue::stats() const {
return PSTRING() << "MemoryKeyValueStats{" << tag("get_count", get_count_) << "}"; return PSTRING() << "MemoryKeyValueStats{" << tag("get_count", get_count_) << "}";
} }
Status MemoryKeyValue::begin_write_batch() { Status MemoryKeyValue::begin_write_batch() {
UNREACHABLE(); return Status::OK();
} }
Status MemoryKeyValue::commit_write_batch() { Status MemoryKeyValue::commit_write_batch() {
UNREACHABLE(); return Status::OK();
} }
Status MemoryKeyValue::abort_write_batch() { Status MemoryKeyValue::abort_write_batch() {
UNREACHABLE(); UNREACHABLE();

View file

@ -22,12 +22,22 @@
#include <map> #include <map>
namespace td { namespace td {
struct Merger {
virtual ~Merger() = default;
virtual void merge_value_and_update(std::string &value, Slice update) = 0;
virtual void merge_update_and_update(std::string &left_update, Slice right_update) = 0;
};
class MemoryKeyValue : public KeyValue { class MemoryKeyValue : public KeyValue {
public: public:
Result<GetStatus> get(Slice key, std::string &value) override; MemoryKeyValue() = default;
MemoryKeyValue(std::shared_ptr<Merger> merger) : merger_(std::move(merger)) {
}
Result<GetStatus> get(Slice key, std::string& value) override;
Status for_each(std::function<Status(Slice, Slice)> f) override; Status for_each(std::function<Status(Slice, Slice)> f) override;
Status for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) override; Status for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) override;
Status set(Slice key, Slice value) override; Status set(Slice key, Slice value) override;
Status merge(Slice key, Slice value) override;
Status erase(Slice key) override; Status erase(Slice key) override;
Result<size_t> count(Slice prefix) override; Result<size_t> count(Slice prefix) override;
@ -43,8 +53,30 @@ class MemoryKeyValue : public KeyValue {
std::string stats() const override; std::string stats() const override;
UsageStats get_usage_stats() override {
return usage_stats_;
}
private: private:
std::map<std::string, std::string, std::less<>> map_; static constexpr size_t buckets_n = 64;
struct Bucket {
std::mutex mutex;
std::map<std::string, std::string, std::less<>> map;
};
struct Unlock {
void operator()(Bucket* bucket) const {
bucket->mutex.unlock();
}
};
std::array<Bucket, buckets_n> buckets_{};
int64 get_count_{0}; int64 get_count_{0};
UsageStats usage_stats_{};
std::shared_ptr<Merger> merger_;
std::unique_ptr<Bucket, Unlock> lock(Bucket& bucket) {
bucket.mutex.lock();
return std::unique_ptr<Bucket, Unlock>(&bucket);
}
std::unique_ptr<Bucket, Unlock> lock(td::Slice key);
}; };
} // namespace td } // namespace td

View file

@ -24,10 +24,13 @@
#include "rocksdb/write_batch.h" #include "rocksdb/write_batch.h"
#include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/optimistic_transaction_db.h"
#include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction.h"
#include "td/utils/misc.h"
#include <rocksdb/filter_policy.h>
namespace td { namespace td {
namespace { namespace {
static Status from_rocksdb(rocksdb::Status status) { static Status from_rocksdb(const rocksdb::Status &status) {
if (status.ok()) { if (status.ok()) {
return Status::OK(); return Status::OK();
} }
@ -56,62 +59,83 @@ RocksDb::~RocksDb() {
} }
RocksDb RocksDb::clone() const { RocksDb RocksDb::clone() const {
if (transaction_db_) {
return RocksDb{transaction_db_, options_};
}
return RocksDb{db_, options_}; return RocksDb{db_, options_};
} }
Result<RocksDb> RocksDb::open(std::string path, RocksDbOptions options) { Result<RocksDb> RocksDb::open(std::string path, RocksDbOptions options) {
rocksdb::OptimisticTransactionDB *db; rocksdb::Options db_options;
{ db_options.merge_operator = options.merge_operator;
rocksdb::Options db_options; db_options.compaction_filter = options.compaction_filter;
static auto default_cache = rocksdb::NewLRUCache(1 << 30); static auto default_cache = rocksdb::NewLRUCache(1 << 30);
if (!options.no_block_cache && options.block_cache == nullptr) { if (!options.no_block_cache && options.block_cache == nullptr) {
options.block_cache = default_cache; options.block_cache = default_cache;
} }
rocksdb::BlockBasedTableOptions table_options; rocksdb::BlockBasedTableOptions table_options;
if (options.no_block_cache) { if (options.no_block_cache) {
table_options.no_block_cache = true; table_options.no_block_cache = true;
} else { } else {
table_options.block_cache = options.block_cache; table_options.block_cache = options.block_cache;
} }
db_options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); db_options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
db_options.use_direct_reads = options.use_direct_reads; // table_options.block_align = true;
db_options.manual_wal_flush = true; if (options.no_reads) {
db_options.create_if_missing = true; db_options.memtable_factory.reset(new rocksdb::VectorRepFactory());
db_options.max_background_compactions = 4; db_options.allow_concurrent_memtable_write = false;
db_options.max_background_flushes = 2; }
db_options.bytes_per_sync = 1 << 20;
db_options.writable_file_max_buffer_size = 2 << 14; db_options.wal_recovery_mode = rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords;
db_options.statistics = options.statistics; db_options.use_direct_reads = options.use_direct_reads;
db_options.max_log_file_size = 100 << 20; db_options.manual_wal_flush = true;
db_options.keep_log_file_num = 1; db_options.create_if_missing = true;
rocksdb::OptimisticTransactionDBOptions occ_options; db_options.max_background_compactions = 4;
occ_options.validate_policy = rocksdb::OccValidationPolicy::kValidateSerial; db_options.max_background_flushes = 2;
db_options.bytes_per_sync = 1 << 20;
db_options.writable_file_max_buffer_size = 2 << 14;
db_options.statistics = options.statistics;
db_options.max_log_file_size = 100 << 20;
db_options.keep_log_file_num = 1;
if (options.experimental) {
// Place your experimental options here
}
if (options.no_transactions) {
rocksdb::DB *db{nullptr};
TRY_STATUS(from_rocksdb(rocksdb::DB::Open(db_options, std::move(path), &db)));
return RocksDb(std::shared_ptr<rocksdb::DB>(db), std::move(options));
} else {
rocksdb::OptimisticTransactionDB *db{nullptr};
rocksdb::ColumnFamilyOptions cf_options(db_options); rocksdb::ColumnFamilyOptions cf_options(db_options);
std::vector<rocksdb::ColumnFamilyDescriptor> column_families; std::vector<rocksdb::ColumnFamilyDescriptor> column_families;
column_families.push_back(rocksdb::ColumnFamilyDescriptor(rocksdb::kDefaultColumnFamilyName, cf_options)); column_families.push_back(rocksdb::ColumnFamilyDescriptor(rocksdb::kDefaultColumnFamilyName, cf_options));
std::vector<rocksdb::ColumnFamilyHandle *> handles; std::vector<rocksdb::ColumnFamilyHandle *> handles;
rocksdb::OptimisticTransactionDBOptions occ_options;
occ_options.validate_policy = rocksdb::OccValidationPolicy::kValidateSerial;
TRY_STATUS(from_rocksdb(rocksdb::OptimisticTransactionDB::Open(db_options, occ_options, std::move(path), TRY_STATUS(from_rocksdb(rocksdb::OptimisticTransactionDB::Open(db_options, occ_options, std::move(path),
column_families, &handles, &db))); column_families, &handles, &db)));
CHECK(handles.size() == 1); CHECK(handles.size() == 1);
// i can delete the handle since DBImpl is always holding a reference to // i can delete the handle since DBImpl is always holding a reference to
// default column family // default column family
delete handles[0]; delete handles[0];
return RocksDb(std::shared_ptr<rocksdb::OptimisticTransactionDB>(db), std::move(options));
} }
return RocksDb(std::shared_ptr<rocksdb::OptimisticTransactionDB>(db), std::move(options));
} }
std::shared_ptr<rocksdb::Statistics> RocksDb::create_statistics() { std::shared_ptr<rocksdb::Statistics> RocksDb::create_statistics() {
return rocksdb::CreateDBStatistics(); return rocksdb::CreateDBStatistics();
} }
std::string RocksDb::statistics_to_string(const std::shared_ptr<rocksdb::Statistics> statistics) { std::string RocksDb::statistics_to_string(const std::shared_ptr<rocksdb::Statistics> &statistics) {
return statistics->ToString(); return statistics->ToString();
} }
void RocksDb::reset_statistics(const std::shared_ptr<rocksdb::Statistics> statistics) { void RocksDb::reset_statistics(const std::shared_ptr<rocksdb::Statistics> &statistics) {
statistics->Reset(); statistics->Reset();
} }
@ -133,7 +157,9 @@ std::string RocksDb::stats() const {
} }
Result<RocksDb::GetStatus> RocksDb::get(Slice key, std::string &value) { Result<RocksDb::GetStatus> RocksDb::get(Slice key, std::string &value) {
//LOG(ERROR) << "GET"; if (options_.no_reads) {
return td::Status::Error("trying to read from write-only database");
}
rocksdb::Status status; rocksdb::Status status;
if (snapshot_) { if (snapshot_) {
rocksdb::ReadOptions options; rocksdb::ReadOptions options;
@ -162,6 +188,18 @@ Status RocksDb::set(Slice key, Slice value) {
} }
return from_rocksdb(db_->Put({}, to_rocksdb(key), to_rocksdb(value))); return from_rocksdb(db_->Put({}, to_rocksdb(key), to_rocksdb(value)));
} }
Status RocksDb::merge(Slice key, Slice value) {
if (write_batch_) {
return from_rocksdb(write_batch_->Merge(to_rocksdb(key), to_rocksdb(value)));
}
if (transaction_) {
return from_rocksdb(transaction_->Merge(to_rocksdb(key), to_rocksdb(value)));
}
return from_rocksdb(db_->Merge({}, to_rocksdb(key), to_rocksdb(value)));
}
Status RocksDb::run_gc() {
return from_rocksdb(db_->CompactRange({}, nullptr, nullptr));
}
Status RocksDb::erase(Slice key) { Status RocksDb::erase(Slice key) {
if (write_batch_) { if (write_batch_) {
@ -174,7 +212,11 @@ Status RocksDb::erase(Slice key) {
} }
Result<size_t> RocksDb::count(Slice prefix) { Result<size_t> RocksDb::count(Slice prefix) {
if (options_.no_reads) {
return td::Status::Error("trying to read from write-only database");
}
rocksdb::ReadOptions options; rocksdb::ReadOptions options;
options.auto_prefix_mode = true;
options.snapshot = snapshot_.get(); options.snapshot = snapshot_.get();
std::unique_ptr<rocksdb::Iterator> iterator; std::unique_ptr<rocksdb::Iterator> iterator;
if (snapshot_ || !transaction_) { if (snapshot_ || !transaction_) {
@ -197,7 +239,11 @@ Result<size_t> RocksDb::count(Slice prefix) {
} }
Status RocksDb::for_each(std::function<Status(Slice, Slice)> f) { Status RocksDb::for_each(std::function<Status(Slice, Slice)> f) {
if (options_.no_reads) {
return td::Status::Error("trying to read from write-only database");
}
rocksdb::ReadOptions options; rocksdb::ReadOptions options;
options.auto_prefix_mode = true;
options.snapshot = snapshot_.get(); options.snapshot = snapshot_.get();
std::unique_ptr<rocksdb::Iterator> iterator; std::unique_ptr<rocksdb::Iterator> iterator;
if (snapshot_ || !transaction_) { if (snapshot_ || !transaction_) {
@ -219,7 +265,11 @@ Status RocksDb::for_each(std::function<Status(Slice, Slice)> f) {
} }
Status RocksDb::for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) { Status RocksDb::for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) {
if (options_.no_reads) {
return td::Status::Error("trying to read from write-only database");
}
rocksdb::ReadOptions options; rocksdb::ReadOptions options;
options.auto_prefix_mode = true;
options.snapshot = snapshot_.get(); options.snapshot = snapshot_.get();
std::unique_ptr<rocksdb::Iterator> iterator; std::unique_ptr<rocksdb::Iterator> iterator;
if (snapshot_ || !transaction_) { if (snapshot_ || !transaction_) {
@ -252,9 +302,10 @@ Status RocksDb::begin_write_batch() {
Status RocksDb::begin_transaction() { Status RocksDb::begin_transaction() {
CHECK(!write_batch_); CHECK(!write_batch_);
CHECK(transaction_db_);
rocksdb::WriteOptions options; rocksdb::WriteOptions options;
options.sync = true; options.sync = true;
transaction_.reset(db_->BeginTransaction(options, {})); transaction_.reset(transaction_db_->BeginTransaction(options, {}));
return Status::OK(); return Status::OK();
} }
@ -307,7 +358,11 @@ Status RocksDb::end_snapshot() {
} }
RocksDb::RocksDb(std::shared_ptr<rocksdb::OptimisticTransactionDB> db, RocksDbOptions options) RocksDb::RocksDb(std::shared_ptr<rocksdb::OptimisticTransactionDB> db, RocksDbOptions options)
: db_(std::move(db)), options_(options) { : transaction_db_{db}, db_(std::move(db)), options_(std::move(options)) {
}
RocksDb::RocksDb(std::shared_ptr<rocksdb::DB> db, RocksDbOptions options)
: db_(std::move(db)), options_(std::move(options)) {
} }
void RocksDbSnapshotStatistics::begin_snapshot(const rocksdb::Snapshot *snapshot) { void RocksDbSnapshotStatistics::begin_snapshot(const rocksdb::Snapshot *snapshot) {

View file

@ -36,12 +36,16 @@
#include <functional> #include <functional>
namespace rocksdb { namespace rocksdb {
class DB;
class Comparator;
class Cache; class Cache;
class OptimisticTransactionDB; class OptimisticTransactionDB;
class Transaction; class Transaction;
class WriteBatch; class WriteBatch;
class Snapshot; class Snapshot;
class Statistics; class Statistics;
class MergeOperator;
class CompactionFilter;
} // namespace rocksdb } // namespace rocksdb
namespace td { namespace td {
@ -61,6 +65,14 @@ struct RocksDbOptions {
std::shared_ptr<rocksdb::Statistics> statistics = nullptr; std::shared_ptr<rocksdb::Statistics> statistics = nullptr;
std::shared_ptr<rocksdb::Cache> block_cache; // Default - one 1G cache for all RocksDb std::shared_ptr<rocksdb::Cache> block_cache; // Default - one 1G cache for all RocksDb
std::shared_ptr<RocksDbSnapshotStatistics> snapshot_statistics = nullptr; std::shared_ptr<RocksDbSnapshotStatistics> snapshot_statistics = nullptr;
std::shared_ptr<rocksdb::MergeOperator> merge_operator = nullptr;
const rocksdb::CompactionFilter *compaction_filter = nullptr;
bool experimental = false;
bool no_reads = false;
bool no_transactions = false;
bool use_direct_reads = false; bool use_direct_reads = false;
bool no_block_cache = false; bool no_block_cache = false;
}; };
@ -73,10 +85,12 @@ class RocksDb : public KeyValue {
Result<GetStatus> get(Slice key, std::string &value) override; Result<GetStatus> get(Slice key, std::string &value) override;
Status set(Slice key, Slice value) override; Status set(Slice key, Slice value) override;
Status merge(Slice key, Slice value) override;
Status erase(Slice key) override; Status erase(Slice key) override;
Status run_gc() override;
Result<size_t> count(Slice prefix) override; Result<size_t> count(Slice prefix) override;
Status for_each(std::function<Status(Slice, Slice)> f) override; Status for_each(std::function<Status(Slice, Slice)> f) override;
Status for_each_in_range (Slice begin, Slice end, std::function<Status(Slice, Slice)> f) override; Status for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) override;
Status begin_write_batch() override; Status begin_write_batch() override;
Status commit_write_batch() override; Status commit_write_batch() override;
@ -94,8 +108,8 @@ class RocksDb : public KeyValue {
std::string stats() const override; std::string stats() const override;
static std::shared_ptr<rocksdb::Statistics> create_statistics(); static std::shared_ptr<rocksdb::Statistics> create_statistics();
static std::string statistics_to_string(const std::shared_ptr<rocksdb::Statistics> statistics); static std::string statistics_to_string(const std::shared_ptr<rocksdb::Statistics> &statistics);
static void reset_statistics(const std::shared_ptr<rocksdb::Statistics> statistics); static void reset_statistics(const std::shared_ptr<rocksdb::Statistics> &statistics);
static std::shared_ptr<rocksdb::Cache> create_cache(size_t capacity); static std::shared_ptr<rocksdb::Cache> create_cache(size_t capacity);
@ -103,12 +117,13 @@ class RocksDb : public KeyValue {
RocksDb &operator=(RocksDb &&); RocksDb &operator=(RocksDb &&);
~RocksDb(); ~RocksDb();
std::shared_ptr<rocksdb::OptimisticTransactionDB> raw_db() const { std::shared_ptr<rocksdb::DB> raw_db() const {
return db_; return db_;
}; };
private: private:
std::shared_ptr<rocksdb::OptimisticTransactionDB> db_; std::shared_ptr<rocksdb::OptimisticTransactionDB> transaction_db_;
std::shared_ptr<rocksdb::DB> db_;
RocksDbOptions options_; RocksDbOptions options_;
std::unique_ptr<rocksdb::Transaction> transaction_; std::unique_ptr<rocksdb::Transaction> transaction_;
@ -123,5 +138,6 @@ class RocksDb : public KeyValue {
std::unique_ptr<const rocksdb::Snapshot, UnreachableDeleter> snapshot_; std::unique_ptr<const rocksdb::Snapshot, UnreachableDeleter> snapshot_;
explicit RocksDb(std::shared_ptr<rocksdb::OptimisticTransactionDB> db, RocksDbOptions options); explicit RocksDb(std::shared_ptr<rocksdb::OptimisticTransactionDB> db, RocksDbOptions options);
explicit RocksDb(std::shared_ptr<rocksdb::DB> db, RocksDbOptions options);
}; };
} // namespace td } // namespace td

View file

@ -414,7 +414,9 @@ class MpmcQueue {
while (true) { while (true) {
auto node = hazard_pointers_.protect(thread_id, 0, read_pos_); auto node = hazard_pointers_.protect(thread_id, 0, read_pos_);
auto &block = node->block; auto &block = node->block;
if (block.write_pos <= block.read_pos && node->next.load(std::memory_order_relaxed) == nullptr) { auto read_pos = block.read_pos.load();
auto write_pos = block.write_pos.load();
if (write_pos <= read_pos && node->next.load(std::memory_order_relaxed) == nullptr) {
return false; return false;
} }
auto pos = block.read_pos++; auto pos = block.read_pos++;

View file

@ -619,6 +619,13 @@ inline Result<Unit>::Result(Status &&status) : status_(std::move(status)) {
inline StringBuilder &operator<<(StringBuilder &string_builder, const Status &status) { inline StringBuilder &operator<<(StringBuilder &string_builder, const Status &status) {
return status.print(string_builder); return status.print(string_builder);
} }
template <class T>
StringBuilder &operator<<(StringBuilder &sb, const Result<T> &result) {
if (result.is_ok()) {
return sb << "Ok{" << result.ok() << "}";
}
return sb << result.error();
}
namespace detail { namespace detail {

View file

@ -19,6 +19,7 @@
#pragma once #pragma once
#include "port/thread.h"
#include "td/utils/common.h" #include "td/utils/common.h"
#include "td/utils/Slice.h" #include "td/utils/Slice.h"
#include "td/utils/StringBuilder.h" #include "td/utils/StringBuilder.h"
@ -26,6 +27,7 @@
#include <array> #include <array>
#include <atomic> #include <atomic>
#include <map>
#include <mutex> #include <mutex>
namespace td { namespace td {
@ -69,6 +71,50 @@ class ThreadSafeCounter {
ThreadSafeMultiCounter<1> counter_; ThreadSafeMultiCounter<1> counter_;
}; };
struct NamedStats {
std::map<std::string, td::int64> stats_int;
std::map<std::string, std::string> stats_str;
NamedStats with_suffix(const std::string &suffix) const {
NamedStats res;
for (auto &p : stats_int) {
res.stats_int[p.first + suffix] = p.second;
}
for (auto &p : stats_str) {
res.stats_str[p.first + suffix] = p.second;
}
return res;
}
NamedStats with_prefix(const std::string &prefix) const {
NamedStats res;
for (auto &p : stats_int) {
res.stats_int[prefix + p.first] = p.second;
}
for (auto &p : stats_str) {
res.stats_str[prefix + p.first] = p.second;
}
return res;
}
void apply_diff(const NamedStats &other) {
for (auto &p : other.stats_int) {
stats_int[p.first] += p.second;
}
for (auto &p : other.stats_str) {
stats_str[p.first] = p.second;
}
}
void subtract_diff(const NamedStats &other) {
for (auto &p : other.stats_int) {
stats_int[p.first] -= p.second;
}
}
NamedStats combine_with(const NamedStats &other) const {
NamedStats res = *this;
res.apply_diff(other);
return res;
}
};
class NamedThreadSafeCounter { class NamedThreadSafeCounter {
static constexpr int N = 128; static constexpr int N = 128;
using Counter = ThreadSafeMultiCounter<N>; using Counter = ThreadSafeMultiCounter<N>;
@ -79,6 +125,9 @@ class NamedThreadSafeCounter {
CounterRef() = default; CounterRef() = default;
CounterRef(size_t index, Counter *counter) : index_(index), counter_(counter) { CounterRef(size_t index, Counter *counter) : index_(index), counter_(counter) {
} }
void inc() {
add(1);
}
void add(int64 diff) { void add(int64 diff) {
counter_->add(index_, diff); counter_->add(index_, diff);
} }
@ -119,6 +168,11 @@ class NamedThreadSafeCounter {
f(names_[i], counter_.sum(i)); f(names_[i], counter_.sum(i));
} }
} }
NamedStats get_stats() const {
NamedStats res;
for_each([&](Slice name, int64 cnt) { res.stats_int.emplace(name.str(), cnt); });
return res;
}
void clear() { void clear() {
std::unique_lock<std::mutex> guard(mutex_); std::unique_lock<std::mutex> guard(mutex_);
@ -181,11 +235,11 @@ struct NamedPerfCounter {
} // namespace td } // namespace td
#define TD_PERF_COUNTER(name) \ #define TD_PERF_COUNTER(name) \
static auto perf_##name = td::NamedPerfCounter::get_default().get_counter(td::Slice(#name)); \ static auto perf_##name = td::NamedPerfCounter::get_default().get_counter(td::Slice(#name)); \
auto scoped_perf_##name = td::NamedPerfCounter::ScopedPerfCounterRef{.perf_counter = perf_##name}; auto scoped_perf_##name = td::NamedPerfCounter::ScopedPerfCounterRef{.perf_counter = perf_##name};
#define TD_PERF_COUNTER_SINCE(name, since) \ #define TD_PERF_COUNTER_SINCE(name, since) \
static auto perf_##name = td::NamedPerfCounter::get_default().get_counter(td::Slice(#name)); \ static auto perf_##name = td::NamedPerfCounter::get_default().get_counter(td::Slice(#name)); \
auto scoped_perf_##name = \ auto scoped_perf_##name = \
td::NamedPerfCounter::ScopedPerfCounterRef{.perf_counter = perf_##name, .started_at_ticks = since}; td::NamedPerfCounter::ScopedPerfCounterRef{.perf_counter = perf_##name, .started_at_ticks = since};

View file

@ -1414,6 +1414,9 @@ td::Status ValidatorEngine::load_global_config() {
if (zero_state.root_hash.is_zero() || zero_state.file_hash.is_zero()) { if (zero_state.root_hash.is_zero() || zero_state.file_hash.is_zero()) {
return td::Status::Error(ton::ErrorCode::error, "[validator] section contains incomplete [zero_state]"); return td::Status::Error(ton::ErrorCode::error, "[validator] section contains incomplete [zero_state]");
} }
if (celldb_in_memory_ && celldb_v2_) {
return td::Status::Error(ton::ErrorCode::error, "at most one of --celldb-in-memory --celldb-v2 could be used");
}
ton::BlockIdExt init_block; ton::BlockIdExt init_block;
if (!conf.validator_->init_block_) { if (!conf.validator_->init_block_) {
@ -1461,11 +1464,12 @@ td::Status ValidatorEngine::load_global_config() {
if (!session_logs_file_.empty()) { if (!session_logs_file_.empty()) {
validator_options_.write().set_session_logs_file(session_logs_file_); validator_options_.write().set_session_logs_file(session_logs_file_);
} }
if (celldb_in_memory_) { if (celldb_in_memory_ || celldb_v2_) {
celldb_compress_depth_ = 0; celldb_compress_depth_ = 0;
} }
validator_options_.write().set_celldb_compress_depth(celldb_compress_depth_); validator_options_.write().set_celldb_compress_depth(celldb_compress_depth_);
validator_options_.write().set_celldb_in_memory(celldb_in_memory_); validator_options_.write().set_celldb_in_memory(celldb_in_memory_);
validator_options_.write().set_celldb_v2(celldb_v2_);
validator_options_.write().set_max_open_archive_files(max_open_archive_files_); validator_options_.write().set_max_open_archive_files(max_open_archive_files_);
validator_options_.write().set_archive_preload_period(archive_preload_period_); validator_options_.write().set_archive_preload_period(archive_preload_period_);
validator_options_.write().set_disable_rocksdb_stats(disable_rocksdb_stats_); validator_options_.write().set_disable_rocksdb_stats(disable_rocksdb_stats_);
@ -4526,6 +4530,12 @@ int main(int argc, char *argv[]) {
[&]() { [&]() {
acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_celldb_in_memory, true); }); acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_celldb_in_memory, true); });
}); });
p.add_option(
'\0', "celldb-v2",
"use new version off celldb",
[&]() {
acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_celldb_v2, true); });
});
p.add_checked_option( p.add_checked_option(
'\0', "catchain-max-block-delay", "delay before creating a new catchain block, in seconds (default: 0.4)", '\0', "catchain-max-block-delay", "delay before creating a new catchain block, in seconds (default: 0.4)",
[&](td::Slice s) -> td::Status { [&](td::Slice s) -> td::Status {

View file

@ -218,6 +218,7 @@ class ValidatorEngine : public td::actor::Actor {
bool celldb_direct_io_ = false; bool celldb_direct_io_ = false;
bool celldb_preload_all_ = false; bool celldb_preload_all_ = false;
bool celldb_in_memory_ = false; bool celldb_in_memory_ = false;
bool celldb_v2_ = false;
td::optional<double> catchain_max_block_delay_, catchain_max_block_delay_slow_; td::optional<double> catchain_max_block_delay_, catchain_max_block_delay_slow_;
bool read_config_ = false; bool read_config_ = false;
bool started_keyring_ = false; bool started_keyring_ = false;
@ -311,6 +312,9 @@ class ValidatorEngine : public td::actor::Actor {
void set_celldb_in_memory(bool value) { void set_celldb_in_memory(bool value) {
celldb_in_memory_ = value; celldb_in_memory_ = value;
} }
void set_celldb_v2(bool value) {
celldb_v2_ = value;
}
void set_catchain_max_block_delay(double value) { void set_catchain_max_block_delay(double value) {
catchain_max_block_delay_ = value; catchain_max_block_delay_ = value;
} }

View file

@ -28,6 +28,9 @@
#include "ton/ton-io.hpp" #include "ton/ton-io.hpp"
#include "common/delay.h" #include "common/delay.h"
#include <block-auto.h>
#include <rocksdb/merge_operator.h>
namespace ton { namespace ton {
namespace validator { namespace validator {
@ -73,6 +76,41 @@ CellDbIn::CellDbIn(td::actor::ActorId<RootDb> root_db, td::actor::ActorId<CellDb
: root_db_(root_db), parent_(parent), path_(std::move(path)), opts_(opts) { : root_db_(root_db), parent_(parent), path_(std::move(path)), opts_(opts) {
} }
struct MergeOperatorAddCellRefcnt : public rocksdb::MergeOperator {
const char* Name() const override {
return "MergeOperatorAddCellRefcnt";
}
static auto to_td(rocksdb::Slice value) -> td::Slice {
return td::Slice(value.data(), value.size());
}
bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override {
CHECK(merge_in.existing_value);
auto& value = *merge_in.existing_value;
CHECK(merge_in.operand_list.size() >= 1);
td::Slice diff;
std::string diff_buf;
if (merge_in.operand_list.size() == 1) {
diff = to_td(merge_in.operand_list[0]);
} else {
diff_buf = merge_in.operand_list[0].ToString();
for (size_t i = 1; i < merge_in.operand_list.size(); ++i) {
vm::CellStorer::merge_refcnt_diffs(diff_buf, to_td(merge_in.operand_list[i]));
}
diff = diff_buf;
}
merge_out->new_value = value.ToString();
vm::CellStorer::merge_value_and_refcnt_diff(merge_out->new_value, diff);
return true;
}
bool PartialMerge(const rocksdb::Slice& /*key*/, const rocksdb::Slice& left, const rocksdb::Slice& right,
std::string* new_value, rocksdb::Logger* logger) const override {
*new_value = left.ToString();
vm::CellStorer::merge_refcnt_diffs(*new_value, to_td(right));
return true;
}
};
void CellDbIn::start_up() { void CellDbIn::start_up() {
on_load_callback_ = [actor = std::make_shared<td::actor::ActorOwn<MigrationProxy>>( on_load_callback_ = [actor = std::make_shared<td::actor::ActorOwn<MigrationProxy>>(
td::actor::create_actor<MigrationProxy>("celldbmigration", actor_id(this))), td::actor::create_actor<MigrationProxy>("celldbmigration", actor_id(this))),
@ -96,46 +134,147 @@ void CellDbIn::start_up() {
db_options.snapshot_statistics = snapshot_statistics_; db_options.snapshot_statistics = snapshot_statistics_;
} }
db_options.statistics = statistics_; db_options.statistics = statistics_;
if (opts_->get_celldb_cache_size()) { auto o_celldb_cache_size = opts_->get_celldb_cache_size();
db_options.block_cache = td::RocksDb::create_cache(opts_->get_celldb_cache_size().value());
LOG(WARNING) << "Set CellDb block cache size to " << td::format::as_size(opts_->get_celldb_cache_size().value()); std::optional<vm::DynamicBagOfCellsDb::CreateInMemoryOptions> boc_in_memory_options;
std::optional<vm::DynamicBagOfCellsDb::CreateV1Options> boc_v1_options;
std::optional<vm::DynamicBagOfCellsDb::CreateV2Options> boc_v2_options;
if (opts_->get_celldb_v2()) {
boc_v2_options =
vm::DynamicBagOfCellsDb::CreateV2Options{.extra_threads = std::clamp(std::thread::hardware_concurrency() / 2, 1u, 8u),
.executor = {},
.cache_ttl_max = 2000,
.cache_size_max = 1000000};
size_t min_rocksdb_cache = std::max(size_t{1} << 30, boc_v2_options->cache_size_max * 5000);
if (!o_celldb_cache_size || o_celldb_cache_size.value() < min_rocksdb_cache) {
LOG(WARNING) << "Increase CellDb block cache size to " << td::format::as_size(min_rocksdb_cache) << " from "
<< td::format::as_size(o_celldb_cache_size.value());
o_celldb_cache_size = min_rocksdb_cache;
}
LOG(WARNING) << "Using V2 DynamicBagOfCells with options " << *boc_v2_options;
} else if (opts_->get_celldb_in_memory()) {
// default options
boc_in_memory_options = vm::DynamicBagOfCellsDb::CreateInMemoryOptions{
.extra_threads = std::thread::hardware_concurrency(),
.verbose = true,
.use_arena = false,
.use_less_memory_during_creation = true,
};
LOG(WARNING) << "Using InMemory DynamicBagOfCells with options " << *boc_v2_options;
} else {
boc_v1_options = vm::DynamicBagOfCellsDb::CreateV1Options{};
LOG(WARNING) << "Using V1 DynamicBagOfCells with options " << *boc_v1_options;
}
if (o_celldb_cache_size) {
db_options.block_cache = td::RocksDb::create_cache(o_celldb_cache_size.value());
LOG(WARNING) << "Set CellDb block cache size to " << td::format::as_size(o_celldb_cache_size.value());
} }
db_options.use_direct_reads = opts_->get_celldb_direct_io(); db_options.use_direct_reads = opts_->get_celldb_direct_io();
// NB: from now on we MUST use this merge operator
// Only V2 and InMemory BoC actually use them, but it still should be kept for V1,
// to handle updates written by V2 or InMemory BoCs
db_options.merge_operator = std::make_shared<MergeOperatorAddCellRefcnt>();
if (opts_->get_celldb_in_memory()) { if (opts_->get_celldb_in_memory()) {
td::RocksDbOptions read_db_options; td::RocksDbOptions read_db_options;
read_db_options.use_direct_reads = true; read_db_options.use_direct_reads = true;
read_db_options.no_block_cache = true; read_db_options.no_block_cache = true;
read_db_options.block_cache = {}; read_db_options.block_cache = {};
read_db_options.merge_operator = std::make_shared<MergeOperatorAddCellRefcnt>();
LOG(WARNING) << "Loading all cells in memory (because of --celldb-in-memory)"; LOG(WARNING) << "Loading all cells in memory (because of --celldb-in-memory)";
td::Timer timer; td::Timer timer;
auto read_cell_db = auto read_cell_db =
std::make_shared<td::RocksDb>(td::RocksDb::open(path_, std::move(read_db_options)).move_as_ok()); std::make_shared<td::RocksDb>(td::RocksDb::open(path_, std::move(read_db_options)).move_as_ok());
boc_ = vm::DynamicBagOfCellsDb::create_in_memory(read_cell_db.get(), {}); boc_ = vm::DynamicBagOfCellsDb::create_in_memory(read_cell_db.get(), *boc_in_memory_options);
in_memory_load_time_ = timer.elapsed(); in_memory_load_time_ = timer.elapsed();
td::actor::send_closure(parent_, &CellDb::set_in_memory_boc, boc_);
// no reads will be allowed from rocksdb, only writes
db_options.no_reads = true;
} }
auto rocks_db = std::make_shared<td::RocksDb>(td::RocksDb::open(path_, std::move(db_options)).move_as_ok()); auto rocks_db = std::make_shared<td::RocksDb>(td::RocksDb::open(path_, std::move(db_options)).move_as_ok());
rocks_db_ = rocks_db->raw_db(); rocks_db_ = rocks_db->raw_db();
cell_db_ = std::move(rocks_db); cell_db_ = std::move(rocks_db);
if (!opts_->get_celldb_in_memory()) { if (!opts_->get_celldb_in_memory()) {
boc_ = vm::DynamicBagOfCellsDb::create(); if (opts_->get_celldb_v2()) {
boc_ = vm::DynamicBagOfCellsDb::create_v2(*boc_v2_options);
} else {
boc_ = vm::DynamicBagOfCellsDb::create(*boc_v1_options);
}
boc_->set_celldb_compress_depth(opts_->get_celldb_compress_depth()); boc_->set_celldb_compress_depth(opts_->get_celldb_compress_depth());
boc_->set_loader(std::make_unique<vm::CellLoader>(cell_db_->snapshot(), on_load_callback_)).ensure(); boc_->set_loader(std::make_unique<vm::CellLoader>(cell_db_->snapshot(), on_load_callback_)).ensure();
td::actor::send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot());
} }
auto meta = boc_->meta_get_all().move_as_ok();
size_t missing_roots = 0;
size_t unknown_roots = 0;
std::set<vm::CellHash> root_hashes;
for (auto [k, v] : meta) {
if (k == "desczero") {
continue;
}
auto obj = fetch_tl_object<ton_api::db_celldb_value>(td::BufferSlice{v}, true);
obj.ensure();
auto entry = DbEntry{obj.move_as_ok()};
root_hashes.insert(vm::CellHash::from_slice(entry.root_hash.as_slice()));
auto cell = boc_->load_cell(entry.root_hash.as_slice());
missing_roots += cell.is_error();
LOG_IF(ERROR, cell.is_error()) << "Cannot load root from meta: " << entry.block_id.to_str() << " " << cell.error();
}
auto known_roots = boc_->load_known_roots().move_as_ok();
for (auto& root : known_roots) {
block::gen::ShardStateUnsplit::Record info;
block::gen::OutMsgQueueInfo::Record qinfo;
block::ShardId shard;
if (!(tlb::unpack_cell(root, info) && shard.deserialize(info.shard_id.write()) &&
tlb::unpack_cell(info.out_msg_queue_info, qinfo))) {
LOG(FATAL) << "cannot create ShardDescr from a root in celldb";
}
if (!root_hashes.contains(root->get_hash())) {
unknown_roots++;
LOG(ERROR) << "Unknown root" << ShardIdFull(shard).to_str() << ":" << info.seq_no;
constexpr bool delete_unknown_roots = false;
if (delete_unknown_roots) {
vm::CellStorer stor{*cell_db_};
cell_db_->begin_write_batch().ensure();
boc_->dec(root);
boc_->commit(stor).ensure();
cell_db_->commit_write_batch().ensure();
if (!opts_->get_celldb_in_memory()) {
boc_->set_loader(std::make_unique<vm::CellLoader>(cell_db_->snapshot(), on_load_callback_)).ensure();
}
LOG(ERROR) << "Unknown root" << ShardIdFull(shard).to_str() << ":" << info.seq_no << " REMOVED";
}
}
}
LOG_IF(ERROR, missing_roots != 1) << "Missing root hashes: " << missing_roots;
LOG_IF(ERROR, unknown_roots != 0) << "Unknown roots: " << unknown_roots;
LOG_IF(FATAL, missing_roots > 1) << "Missing root hashes: " << missing_roots;
LOG_IF(FATAL, unknown_roots != 0) << "Unknown roots: " << unknown_roots;
alarm_timestamp() = td::Timestamp::in(10.0); alarm_timestamp() = td::Timestamp::in(10.0);
auto empty = get_empty_key_hash(); auto empty = get_empty_key_hash();
if (get_block(empty).is_error()) { if (get_block(empty).is_error()) {
DbEntry e{get_empty_key(), empty, empty, RootHash::zero()}; DbEntry e{get_empty_key(), empty, empty, RootHash::zero()};
vm::CellStorer stor{*cell_db_};
cell_db_->begin_write_batch().ensure(); cell_db_->begin_write_batch().ensure();
set_block(empty, std::move(e)); set_block(empty, std::move(e));
boc_->commit(stor);
cell_db_->commit_write_batch().ensure(); cell_db_->commit_write_batch().ensure();
} }
if (opts_->get_celldb_v2() || opts_->get_celldb_in_memory()) {
send_closure(parent_, &CellDb::set_thread_safe_boc, boc_);
} else {
send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot());
}
if (opts_->get_celldb_preload_all()) { if (opts_->get_celldb_preload_all()) {
// Iterate whole DB in a separate thread // Iterate whole DB in a separate thread
delay_action( delay_action(
@ -161,7 +300,7 @@ void CellDbIn::start_up() {
{ {
std::string key = "stats.last_deleted_mc_seqno", value; std::string key = "stats.last_deleted_mc_seqno", value;
auto R = cell_db_->get(td::as_slice(key), value); auto R = boc_->meta_get(td::as_slice(key), value);
R.ensure(); R.ensure();
if (R.ok() == td::KeyValue::GetStatus::Ok) { if (R.ok() == td::KeyValue::GetStatus::Ok) {
auto r_value = td::to_integer_safe<BlockSeqno>(value); auto r_value = td::to_integer_safe<BlockSeqno>(value);
@ -240,10 +379,10 @@ void CellDbIn::store_cell(BlockIdExt block_id, td::Ref<vm::Cell> cell, td::Promi
td::Timer timer_write; td::Timer timer_write;
vm::CellStorer stor{*cell_db_}; vm::CellStorer stor{*cell_db_};
cell_db_->begin_write_batch().ensure(); cell_db_->begin_write_batch().ensure();
boc_->commit(stor).ensure();
set_block(get_empty_key_hash(), std::move(E)); set_block(get_empty_key_hash(), std::move(E));
set_block(D.prev, std::move(P)); set_block(D.prev, std::move(P));
set_block(key_hash, std::move(D)); set_block(key_hash, std::move(D));
boc_->commit(stor).ensure();
cell_db_->commit_write_batch().ensure(); cell_db_->commit_write_batch().ensure();
timer_write.pause(); timer_write.pause();
@ -266,11 +405,10 @@ void CellDbIn::store_cell(BlockIdExt block_id, td::Ref<vm::Cell> cell, td::Promi
void CellDbIn::get_cell_db_reader(td::Promise<std::shared_ptr<vm::CellDbReader>> promise) { void CellDbIn::get_cell_db_reader(td::Promise<std::shared_ptr<vm::CellDbReader>> promise) {
if (db_busy_) { if (db_busy_) {
action_queue_.push( action_queue_.push([self = this, promise = std::move(promise)](td::Result<td::Unit> R) mutable {
[self = this, promise = std::move(promise)](td::Result<td::Unit> R) mutable { R.ensure();
R.ensure(); self->get_cell_db_reader(std::move(promise));
self->get_cell_db_reader(std::move(promise)); });
});
return; return;
} }
promise.set_result(boc_->get_cell_db_reader()); promise.set_result(boc_->get_cell_db_reader());
@ -440,9 +578,16 @@ void CellDbIn::gc_cont2(BlockHandle handle) {
timer_get_keys.reset(); timer_get_keys.reset();
td::PerfWarningTimer timer_boc{"gccell_boc", 0.05}; td::PerfWarningTimer timer_boc{"gccell_boc", 0.05};
auto cell = boc_->load_cell(F.root_hash.as_slice()).move_as_ok(); auto r_cell = boc_->load_cell(F.root_hash.as_slice());
td::Ref<vm::Cell> cell;
if (r_cell.is_ok()) {
cell = r_cell.move_as_ok();
boc_->dec(cell);
LOG(ERROR) << "GC of " << handle->id().to_str();
} else {
LOG(ERROR) << "GC of UNKNOWN root: " << handle->id().to_str();
}
boc_->dec(cell);
db_busy_ = true; db_busy_ = true;
boc_->prepare_commit_async( boc_->prepare_commit_async(
async_executor, [this, SelfId = actor_id(this), timer_boc = std::move(timer_boc), F = std::move(F), key_hash, async_executor, [this, SelfId = actor_id(this), timer_boc = std::move(timer_boc), F = std::move(F), key_hash,
@ -458,17 +603,19 @@ void CellDbIn::gc_cont2(BlockHandle handle) {
td::PerfWarningTimer timer_write_batch{"gccell_write_batch", 0.05}; td::PerfWarningTimer timer_write_batch{"gccell_write_batch", 0.05};
cell_db_->begin_write_batch().ensure(); cell_db_->begin_write_batch().ensure();
boc_->commit(stor).ensure();
cell_db_->erase(get_key(key_hash)).ensure(); boc_->meta_erase(get_key(key_hash)).ensure();
set_block(F.prev, std::move(P)); set_block(F.prev, std::move(P));
set_block(F.next, std::move(N)); set_block(F.next, std::move(N));
if (handle->id().is_masterchain()) { if (handle->id().is_masterchain()) {
last_deleted_mc_state_ = handle->id().seqno(); last_deleted_mc_state_ = handle->id().seqno();
std::string key = "stats.last_deleted_mc_seqno", value = td::to_string(last_deleted_mc_state_); std::string key = "stats.last_deleted_mc_seqno", value = td::to_string(last_deleted_mc_state_);
cell_db_->set(td::as_slice(key), td::as_slice(value)); boc_->meta_set(td::as_slice(key), td::as_slice(value));
} }
boc_->commit(stor).ensure();
cell_db_->commit_write_batch().ensure(); cell_db_->commit_write_batch().ensure();
alarm_timestamp() = td::Timestamp::now(); alarm_timestamp() = td::Timestamp::now();
timer_write_batch.reset(); timer_write_batch.reset();
@ -530,7 +677,7 @@ CellDbIn::KeyHash CellDbIn::get_empty_key_hash() {
td::Result<CellDbIn::DbEntry> CellDbIn::get_block(KeyHash key_hash) { td::Result<CellDbIn::DbEntry> CellDbIn::get_block(KeyHash key_hash) {
const auto key = get_key(key_hash); const auto key = get_key(key_hash);
std::string value; std::string value;
auto R = cell_db_->get(td::as_slice(key), value); auto R = boc_->meta_get(td::as_slice(key), value);
R.ensure(); R.ensure();
auto S = R.move_as_ok(); auto S = R.move_as_ok();
if (S == td::KeyValue::GetStatus::NotFound) { if (S == td::KeyValue::GetStatus::NotFound) {
@ -543,7 +690,7 @@ td::Result<CellDbIn::DbEntry> CellDbIn::get_block(KeyHash key_hash) {
void CellDbIn::set_block(KeyHash key_hash, DbEntry e) { void CellDbIn::set_block(KeyHash key_hash, DbEntry e) {
const auto key = get_key(key_hash); const auto key = get_key(key_hash);
cell_db_->set(td::as_slice(key), e.release()).ensure(); boc_->meta_set(td::as_slice(key), e.release());
} }
void CellDbIn::migrate_cell(td::Bits256 hash) { void CellDbIn::migrate_cell(td::Bits256 hash) {
@ -631,12 +778,14 @@ void CellDb::alarm() {
} }
void CellDb::load_cell(RootHash hash, td::Promise<td::Ref<vm::DataCell>> promise) { void CellDb::load_cell(RootHash hash, td::Promise<td::Ref<vm::DataCell>> promise) {
if (in_memory_boc_) { if (thread_safe_boc_) {
auto result = in_memory_boc_->load_root_thread_safe(hash.as_slice()); auto result = thread_safe_boc_->load_root_thread_safe(hash.as_slice());
if (result.is_ok()) { if (result.is_ok()) {
return async_apply("load_cell_result", std::move(promise), std::move(result)); return async_apply("load_cell_result", std::move(promise), std::move(result));
} else { } else {
LOG(ERROR) << "load_root_thread_safe failed - this is suspicious"; LOG(ERROR) << "load_root_thread_safe failed - this is suspicious";
send_closure(cell_db_, &CellDbIn::load_cell, hash, std::move(promise));
return;
} }
} }
if (!started_) { if (!started_) {
@ -710,6 +859,13 @@ std::vector<std::pair<std::string, std::string>> CellDbIn::CellDbStatistics::pre
for (auto& [key, value] : boc_stats_->custom_stats) { for (auto& [key, value] : boc_stats_->custom_stats) {
stats.emplace_back(key, value); stats.emplace_back(key, value);
} }
for (auto& [key, value] : boc_stats_->named_stats.stats_str) {
stats.emplace_back(key, value);
}
for (auto& [key, value] : boc_stats_->named_stats.stats_int) {
stats.emplace_back(key, td::to_string(value));
}
} }
return stats; return stats;
} }

View file

@ -195,13 +195,13 @@ class CellDb : public CellDbBase {
started_ = true; started_ = true;
boc_->set_loader(std::make_unique<vm::CellLoader>(std::move(snapshot), on_load_callback_)).ensure(); boc_->set_loader(std::make_unique<vm::CellLoader>(std::move(snapshot), on_load_callback_)).ensure();
} }
void set_in_memory_boc(std::shared_ptr<const vm::DynamicBagOfCellsDb> in_memory_boc) { void set_thread_safe_boc(std::shared_ptr<const vm::DynamicBagOfCellsDb> thread_safe_boc) {
CHECK(opts_->get_celldb_in_memory()); CHECK(opts_->get_celldb_in_memory() || opts_->get_celldb_v2());
if (!started_) { if (!started_) {
alarm(); alarm();
} }
started_ = true; started_ = true;
in_memory_boc_ = std::move(in_memory_boc); thread_safe_boc_ = std::move(thread_safe_boc);
} }
void get_cell_db_reader(td::Promise<std::shared_ptr<vm::CellDbReader>> promise); void get_cell_db_reader(td::Promise<std::shared_ptr<vm::CellDbReader>> promise);
@ -219,7 +219,7 @@ class CellDb : public CellDbBase {
td::actor::ActorOwn<CellDbIn> cell_db_; td::actor::ActorOwn<CellDbIn> cell_db_;
std::unique_ptr<vm::DynamicBagOfCellsDb> boc_; std::unique_ptr<vm::DynamicBagOfCellsDb> boc_;
std::shared_ptr<const vm::DynamicBagOfCellsDb> in_memory_boc_; std::shared_ptr<const vm::DynamicBagOfCellsDb> thread_safe_boc_;
bool started_ = false; bool started_ = false;
std::vector<std::pair<std::string, std::string>> prepared_stats_{{"started", "false"}}; std::vector<std::pair<std::string, std::string>> prepared_stats_{{"started", "false"}};

View file

@ -139,6 +139,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
bool get_celldb_in_memory() const override { bool get_celldb_in_memory() const override {
return celldb_in_memory_; return celldb_in_memory_;
} }
bool get_celldb_v2() const override {
return celldb_v2_;
}
td::optional<double> get_catchain_max_block_delay() const override { td::optional<double> get_catchain_max_block_delay() const override {
return catchain_max_block_delay_; return catchain_max_block_delay_;
} }
@ -237,6 +240,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
void set_celldb_in_memory(bool value) override { void set_celldb_in_memory(bool value) override {
celldb_in_memory_ = value; celldb_in_memory_ = value;
} }
void set_celldb_v2(bool value) override {
celldb_v2_ = value;
}
void set_catchain_max_block_delay(double value) override { void set_catchain_max_block_delay(double value) override {
catchain_max_block_delay_ = value; catchain_max_block_delay_ = value;
} }
@ -304,6 +310,7 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions {
bool celldb_direct_io_ = false; bool celldb_direct_io_ = false;
bool celldb_preload_all_ = false; bool celldb_preload_all_ = false;
bool celldb_in_memory_ = false; bool celldb_in_memory_ = false;
bool celldb_v2_ = false;
td::optional<double> catchain_max_block_delay_, catchain_max_block_delay_slow_; td::optional<double> catchain_max_block_delay_, catchain_max_block_delay_slow_;
bool state_serializer_enabled_ = true; bool state_serializer_enabled_ = true;
td::Ref<CollatorOptions> collator_options_{true}; td::Ref<CollatorOptions> collator_options_{true};

View file

@ -104,6 +104,7 @@ struct ValidatorManagerOptions : public td::CntObject {
virtual std::string get_session_logs_file() const = 0; virtual std::string get_session_logs_file() const = 0;
virtual td::uint32 get_celldb_compress_depth() const = 0; virtual td::uint32 get_celldb_compress_depth() const = 0;
virtual bool get_celldb_in_memory() const = 0; virtual bool get_celldb_in_memory() const = 0;
virtual bool get_celldb_v2() const = 0;
virtual size_t get_max_open_archive_files() const = 0; virtual size_t get_max_open_archive_files() const = 0;
virtual double get_archive_preload_period() const = 0; virtual double get_archive_preload_period() const = 0;
virtual bool get_disable_rocksdb_stats() const = 0; virtual bool get_disable_rocksdb_stats() const = 0;
@ -144,6 +145,7 @@ struct ValidatorManagerOptions : public td::CntObject {
virtual void set_celldb_direct_io(bool value) = 0; virtual void set_celldb_direct_io(bool value) = 0;
virtual void set_celldb_preload_all(bool value) = 0; virtual void set_celldb_preload_all(bool value) = 0;
virtual void set_celldb_in_memory(bool value) = 0; virtual void set_celldb_in_memory(bool value) = 0;
virtual void set_celldb_v2(bool value) = 0;
virtual void set_catchain_max_block_delay(double value) = 0; virtual void set_catchain_max_block_delay(double value) = 0;
virtual void set_catchain_max_block_delay_slow(double value) = 0; virtual void set_catchain_max_block_delay_slow(double value) = 0;
virtual void set_state_serializer_enabled(bool value) = 0; virtual void set_state_serializer_enabled(bool value) = 0;