mirror of
https://github.com/ton-blockchain/ton
synced 2025-03-09 15:40:10 +00:00

- thread safe cache - parallel commit - multiple optimizations - support of key-value merge operations - improved tests and benchmarks - in-memory version won't read from key value after start - uses vector in-memory table now - use rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords - do not silently ignore errors during recovery
1505 lines
48 KiB
C++
1505 lines
48 KiB
C++
#include "vm/db/DynamicBagOfCellsDb.h"
|
|
#include "vm/db/CellStorage.h"
|
|
#include "vm/db/CellHashTable.h"
|
|
|
|
#include "vm/cells/ExtCell.h"
|
|
|
|
#include "td/utils/base64.h"
|
|
#include "td/utils/format.h"
|
|
#include "td/utils/ThreadSafeCounter.h"
|
|
#include "td/utils/misc.h"
|
|
#include "validator/validator.h"
|
|
|
|
#include "vm/cellslice.h"
|
|
|
|
#include <optional>
|
|
|
|
namespace vm {
|
|
namespace {
|
|
|
|
// Very stupid Vector/MpmcQueue
|
|
template <class T>
|
|
struct TsVector {
|
|
TsVector() {
|
|
first_block_size_ = 64;
|
|
blocks_[0].data.resize(first_block_size_);
|
|
blocks_[0].is_ready = true;
|
|
}
|
|
TsVector(std::vector<T> base) {
|
|
first_block_size_ = base.size();
|
|
blocks_[0].data = std::move(base);
|
|
blocks_[0].is_ready = true;
|
|
}
|
|
struct Block {
|
|
std::mutex mutex;
|
|
std::atomic<bool> is_ready{false};
|
|
std::vector<T> data;
|
|
};
|
|
T &at(size_t i) {
|
|
td::uint64 j = i / first_block_size_;
|
|
td::int32 hb = 63 - td::count_leading_zeroes64(j); // hb = -1 if j=0, else hb>=0
|
|
|
|
// If j=0, hb<0, so hb>>31 = -1 => mask=0
|
|
// If j>0, hb>=0, so hb>>31=0 => mask=~0 (all ones)
|
|
td::uint64 mask = ~(td::uint64)(hb >> 31);
|
|
|
|
size_t block_i = hb + 1;
|
|
uint64_t shift = hb & 63ULL;
|
|
uint64_t start = ((1ULL << shift) * first_block_size_) & mask;
|
|
size_t pos_in_block = i - start;
|
|
auto &block = blocks_[block_i];
|
|
if (block.is_ready.load(std::memory_order_acquire)) {
|
|
return block.data.at(pos_in_block);
|
|
}
|
|
|
|
std::unique_lock<std::mutex> lock(block.mutex);
|
|
if (block.is_ready.load(std::memory_order_acquire)) {
|
|
return block.data.at(pos_in_block);
|
|
}
|
|
block.resize(start);
|
|
block.is_ready.store(true, std::memory_order_release);
|
|
return block.data.at(pos_in_block);
|
|
}
|
|
template <class S>
|
|
void push_back(S &&value) {
|
|
at(end_.fetch_add(1, std::memory_order_relaxed)) = std::forward<S>(value);
|
|
}
|
|
T pop_front() {
|
|
auto pos = begin_.fetch_add(1, std::memory_order_relaxed);
|
|
while (pos >= end_.load(std::memory_order_acquire)) {
|
|
// This may (or may not) use too much CPU
|
|
td::this_thread::yield();
|
|
}
|
|
return std::move(at(pos));
|
|
}
|
|
size_t size() const {
|
|
return end_.load();
|
|
}
|
|
|
|
std::array<Block, 64> blocks_;
|
|
size_t first_block_size_{0};
|
|
std::atomic<size_t> begin_{0};
|
|
std::atomic<size_t> end_{0};
|
|
};
|
|
struct CellInfo;
|
|
|
|
class CellDbReaderExt;
|
|
struct DynamicBocExtCellExtra {
|
|
std::shared_ptr<CellDbReaderExt> reader;
|
|
};
|
|
|
|
class DynamicBocCellLoader {
|
|
public:
|
|
static td::Result<Ref<DataCell>> load_data_cell(const ExtCell<DynamicBocExtCellExtra, DynamicBocCellLoader> &cell,
|
|
const DynamicBocExtCellExtra &extra);
|
|
};
|
|
using DynamicBocExtCell = ExtCell<DynamicBocExtCellExtra, DynamicBocCellLoader>;
|
|
|
|
class CellDbReaderExt : public CellDbReader {
|
|
public:
|
|
virtual td::Result<Ref<DataCell>> load_ext_cell(Ref<DynamicBocExtCell> cell) = 0;
|
|
};
|
|
|
|
td::Result<Ref<DataCell>> DynamicBocCellLoader::load_data_cell(const DynamicBocExtCell &cell,
|
|
const DynamicBocExtCellExtra &extra) {
|
|
return extra.reader->load_ext_cell(Ref(&cell));
|
|
}
|
|
|
|
#define S(x) \
|
|
td::NamedThreadSafeCounter::CounterRef x { \
|
|
nc.get_counter(#x) \
|
|
}
|
|
|
|
struct CacheStats {
|
|
td::NamedThreadSafeCounter nc;
|
|
S(load_cell_ext);
|
|
S(load_cell_ext_cache_hits);
|
|
S(load_cell_sync);
|
|
S(load_cell_sync_cache_hits);
|
|
S(load_cell_async);
|
|
S(load_cell_async_cache_hits);
|
|
S(ext_cells);
|
|
S(ext_cells_load);
|
|
S(ext_cells_load_cache_hits);
|
|
|
|
S(kv_read_found);
|
|
S(kv_read_not_found);
|
|
|
|
S(sync_with_db);
|
|
S(sync_with_db_only_ref);
|
|
S(load_cell_no_cache);
|
|
};
|
|
|
|
struct CommitStats {
|
|
td::NamedThreadSafeCounter nc;
|
|
|
|
S(to_inc);
|
|
S(to_dec);
|
|
|
|
S(gather_new_cells_calls);
|
|
S(gather_new_cells_calls_it);
|
|
S(update_parents_calls);
|
|
S(update_parents_calls_it);
|
|
S(dec_calls);
|
|
S(dec_calls_it);
|
|
|
|
S(new_cells);
|
|
S(new_cells_leaves);
|
|
|
|
S(new_cells_loaded_not_in_db);
|
|
S(new_cells_loaded_in_db);
|
|
S(new_cells_not_in_db_fast);
|
|
|
|
S(dec_loaded);
|
|
S(dec_to_zero);
|
|
|
|
S(changes_loaded);
|
|
|
|
// new diff logic
|
|
S(diff_zero);
|
|
S(diff_full);
|
|
S(diff_erase);
|
|
S(diff_ref_cnt);
|
|
|
|
// old full data logic
|
|
S(inc_save);
|
|
S(inc_save_full);
|
|
S(inc_save_only_ref_cnt);
|
|
S(inc_new_cell);
|
|
S(inc_just_ref_cnt);
|
|
|
|
S(dec_save);
|
|
S(dec_save_full);
|
|
S(dec_save_only_refcnt);
|
|
S(dec_save_erase);
|
|
S(dec_erase_cell);
|
|
S(dec_just_ref_cnt);
|
|
};
|
|
|
|
template <class T>
|
|
struct AtomicPod {
|
|
T load() const {
|
|
while (true) {
|
|
if (auto res = try_read_stable()) {
|
|
return res->second;
|
|
}
|
|
}
|
|
}
|
|
|
|
template <class F>
|
|
std::pair<T, bool> update(F &&f) {
|
|
while (true) {
|
|
auto res = try_read_stable();
|
|
if (!res) {
|
|
continue;
|
|
}
|
|
auto [before, old_data] = *res;
|
|
|
|
auto o_new_data = f(old_data);
|
|
if (!o_new_data) {
|
|
return {old_data, false};
|
|
}
|
|
|
|
if (!lock_.compare_exchange_weak(before, before + 1, std::memory_order_acq_rel, std::memory_order_relaxed)) {
|
|
continue;
|
|
}
|
|
|
|
data_ = *o_new_data; // relaxed store inside lock
|
|
lock_.fetch_add(1, std::memory_order_release);
|
|
return {*o_new_data, true};
|
|
}
|
|
}
|
|
|
|
private:
|
|
mutable std::atomic<std::uint64_t> lock_{0};
|
|
T data_{};
|
|
|
|
std::optional<std::pair<std::uint64_t, T>> try_read_stable() const {
|
|
auto before = lock_.load(std::memory_order_acquire);
|
|
if (before % 2 == 1) {
|
|
return std::nullopt;
|
|
}
|
|
T temp = data_; // relaxed read is ok, checked by versioning
|
|
auto after = lock_.load(std::memory_order_acquire);
|
|
if (after != before) {
|
|
return std::nullopt;
|
|
}
|
|
return std::make_pair(before, temp);
|
|
}
|
|
};
|
|
|
|
struct InDbInfo {
|
|
std::vector<CellInfo *> parents;
|
|
std::atomic<td::uint32> pending_children{0};
|
|
std::atomic<bool> maybe_in_db{true};
|
|
std::atomic<bool> visited_in_gather_new_cells{false};
|
|
};
|
|
td::StringBuilder &operator<<(td::StringBuilder &sb, const InDbInfo &info) {
|
|
sb << "mb_in_db:" << info.maybe_in_db.load() << " chld_n:" << info.pending_children
|
|
<< " prnt_n:" << info.parents.size();
|
|
return sb;
|
|
}
|
|
struct CellInfo {
|
|
struct State {
|
|
// db_ref_cnt and in_db are correct
|
|
bool sync_with_db{false};
|
|
|
|
// ignore if sync_with_db is false
|
|
td::int32 db_ref_cnt{0};
|
|
td::int32 db_refcnt_fixup{0};
|
|
|
|
// if true - cell is definitely in db
|
|
// if false - we know that cell is not in db only is sync_with_db=true
|
|
bool in_db{false};
|
|
|
|
// diff to be applied
|
|
};
|
|
AtomicPod<State> state;
|
|
std::atomic<td::int32> ref_cnt_diff{0};
|
|
|
|
std::atomic<bool> visited{false};
|
|
td::unique_ptr<InDbInfo> in_db_info_ptr;
|
|
std::mutex mutex;
|
|
|
|
// Could be AtomicRef<Cell>, but is am not sure that it is worth it
|
|
const Ref<Cell> cell;
|
|
|
|
explicit CellInfo(Ref<Cell> cell) : cell(std::move(cell)) {
|
|
}
|
|
|
|
InDbInfo &in_db_info() {
|
|
return *in_db_info_ptr;
|
|
}
|
|
const InDbInfo &in_db_info() const {
|
|
return *in_db_info_ptr;
|
|
}
|
|
InDbInfo &in_db_info_create() { // NOT thread safe
|
|
if (!in_db_info_ptr) {
|
|
in_db_info_ptr = td::make_unique<InDbInfo>();
|
|
}
|
|
return in_db_info();
|
|
}
|
|
InDbInfo &in_db_info_create(CellInfo *parent) { // Thread Safe
|
|
std::unique_lock lock(mutex);
|
|
if (!in_db_info_ptr) {
|
|
in_db_info_ptr = td::make_unique<InDbInfo>();
|
|
}
|
|
auto &res = *in_db_info_ptr;
|
|
if (parent != nullptr) {
|
|
res.parents.emplace_back(parent);
|
|
}
|
|
lock.unlock();
|
|
return res;
|
|
}
|
|
void in_db_info_destroy() {
|
|
in_db_info_ptr = nullptr;
|
|
}
|
|
td::int32 inc_ref_cnt() {
|
|
return ref_cnt_diff.fetch_add(1, std::memory_order_relaxed) + 1;
|
|
}
|
|
td::int32 dec_ref_cnt() {
|
|
return ref_cnt_diff.fetch_sub(1, std::memory_order_relaxed) - 1;
|
|
}
|
|
td::int32 get_ref_cnt_diff() const {
|
|
return ref_cnt_diff.load(std::memory_order_relaxed);
|
|
}
|
|
|
|
void set_not_in_db() {
|
|
state.update([&](State state) -> std::optional<State> {
|
|
if (state.sync_with_db) {
|
|
CHECK(state.db_ref_cnt == 0);
|
|
CHECK(!state.in_db);
|
|
return {};
|
|
}
|
|
state.sync_with_db = true;
|
|
state.in_db = false;
|
|
state.db_ref_cnt = 0;
|
|
return state;
|
|
});
|
|
}
|
|
void set_in_db() {
|
|
state.update([&](State state) -> std::optional<State> {
|
|
if (state.sync_with_db) {
|
|
//LOG_CHECK(state.in_db) << *this;
|
|
return {};
|
|
}
|
|
state.in_db = true;
|
|
return state;
|
|
});
|
|
}
|
|
void synced_with_db(td::int32 db_ref_cnt) {
|
|
state.update([&](State state) -> std::optional<State> {
|
|
if (state.sync_with_db) {
|
|
CHECK(state.in_db);
|
|
CHECK(state.db_ref_cnt == db_ref_cnt);
|
|
return {};
|
|
}
|
|
state.in_db = true;
|
|
state.db_ref_cnt = db_ref_cnt;
|
|
return state;
|
|
});
|
|
}
|
|
bool visit() {
|
|
return !visited.exchange(true);
|
|
}
|
|
void on_written_to_db() {
|
|
auto diff = ref_cnt_diff.exchange(0);
|
|
state.update([&](State state) -> std::optional<State> {
|
|
if (diff == 0) {
|
|
return {};
|
|
}
|
|
if (state.sync_with_db) {
|
|
state.db_ref_cnt += diff;
|
|
CHECK(state.db_ref_cnt >= 0);
|
|
state.in_db = state.db_ref_cnt > 0;
|
|
} else {
|
|
CHECK(diff > 0);
|
|
state.in_db = true;
|
|
state.db_refcnt_fixup += diff;
|
|
}
|
|
return state;
|
|
});
|
|
}
|
|
|
|
td::Result<Ref<DataCell>> get_data_cell() {
|
|
TRY_RESULT(loaded_cell, cell->load_cell());
|
|
return loaded_cell.data_cell;
|
|
}
|
|
Cell::Hash key() const {
|
|
return cell->get_hash();
|
|
}
|
|
bool operator<(const CellInfo &other) const {
|
|
return key() < other.key();
|
|
}
|
|
|
|
struct Eq {
|
|
using is_transparent = void; // Pred to use
|
|
bool operator()(const CellInfo &info, const CellInfo &other_info) const {
|
|
return info.key() == other_info.key();
|
|
}
|
|
bool operator()(const CellInfo &info, td::Slice hash) const {
|
|
return info.key().as_slice() == hash;
|
|
}
|
|
bool operator()(td::Slice hash, const CellInfo &info) const {
|
|
return info.key().as_slice() == hash;
|
|
}
|
|
};
|
|
struct Hash {
|
|
using is_transparent = void; // Pred to use
|
|
using transparent_key_equal = Eq;
|
|
size_t operator()(td::Slice hash) const {
|
|
return cell_hash_slice_hash(hash);
|
|
}
|
|
size_t operator()(const CellInfo &info) const {
|
|
return cell_hash_slice_hash(info.key().as_slice());
|
|
}
|
|
};
|
|
};
|
|
td::StringBuilder &operator<<(td::StringBuilder &sb, const CellInfo &info) {
|
|
if (info.cell->is_loaded()) {
|
|
auto data_cell = info.cell->load_cell().move_as_ok().data_cell;
|
|
vm::CellSlice cs(vm::NoVm{}, data_cell);
|
|
sb << data_cell->get_hash().to_hex().substr(0, 8) << " refs:" << data_cell->size_refs()
|
|
<< " data:" << cs.data_bits().to_hex(cs.size()) << " data_ptr=" << data_cell.get() << " data_ref_cnt("
|
|
<< data_cell->get_refcnt() << ")";
|
|
} else {
|
|
sb << info.cell->get_hash().to_hex().substr(0, 8);
|
|
}
|
|
auto state = info.state.load();
|
|
sb << " " << &info;
|
|
sb << "\n\tin_db=" << state.in_db << " sync_with_db=" << state.sync_with_db
|
|
<< " ref_cnt_diff=" << info.get_ref_cnt_diff() << " db_ref_cnt=" << state.db_ref_cnt
|
|
<< " db_ref_cnt_fixup=" << state.db_refcnt_fixup;
|
|
if (state.sync_with_db) {
|
|
sb << " REFS(" << info.get_ref_cnt_diff() + state.db_ref_cnt << ")";
|
|
}
|
|
if (info.in_db_info_ptr) {
|
|
sb << " " << info.in_db_info();
|
|
}
|
|
sb << " visited=" << info.visited.load();
|
|
return sb;
|
|
}
|
|
|
|
struct ExecutorOptions {
|
|
size_t extra_threads_n{0};
|
|
std::shared_ptr<DynamicBagOfCellsDb::AsyncExecutor> async_executor;
|
|
};
|
|
template <class InputT = CellInfo *, class OutputT = CellInfo *>
|
|
class ExecutorImpl {
|
|
public:
|
|
ExecutorImpl(ExecutorOptions options) : options_(options) {
|
|
}
|
|
ExecutorOptions options_;
|
|
using InputData = std::vector<std::vector<InputT>>;
|
|
using OutputData = std::vector<std::vector<OutputT>>;
|
|
struct InputChunk {
|
|
td::Span<InputT> infos;
|
|
size_t begin{};
|
|
size_t end{};
|
|
};
|
|
|
|
template <class F>
|
|
OutputData process(const InputData &data, const F &process_task_f) {
|
|
if (options_.extra_threads_n > 0) {
|
|
return process_parallel(data, process_task_f);
|
|
} else {
|
|
return process_sequential(data, process_task_f);
|
|
}
|
|
}
|
|
template <class F>
|
|
struct SingleThreadWorker {
|
|
const F &process_task_f;
|
|
mutable std::vector<OutputT> results{};
|
|
void add_task(InputT input) const {
|
|
process_task_f(input, *this);
|
|
}
|
|
void add_result(OutputT output) const {
|
|
results.push_back(output);
|
|
}
|
|
};
|
|
template <class F>
|
|
OutputData process_sequential(const InputData &data, const F &process_task_f) {
|
|
auto w = SingleThreadWorker<F>{process_task_f};
|
|
for (auto &chunk : data) {
|
|
for (auto &info : chunk) {
|
|
process_task_f(info, w);
|
|
}
|
|
}
|
|
|
|
return {std::move(w.results)};
|
|
}
|
|
|
|
template <class ProcessTaskF>
|
|
struct Shared;
|
|
|
|
template <class ProcessTaskF>
|
|
struct Worker {
|
|
size_t worker_i{};
|
|
std::shared_ptr<Shared<ProcessTaskF>> shared;
|
|
|
|
void add_task(InputT input) const {
|
|
shared->delay_or_process_task(input, *this);
|
|
}
|
|
void add_result(OutputT value) const {
|
|
shared->add_result(value, worker_i);
|
|
}
|
|
void loop() const {
|
|
shared->loop(*this);
|
|
}
|
|
};
|
|
|
|
template <class ProcessTaskF>
|
|
struct Shared {
|
|
Shared(size_t workers_n, const InputData &input_data, const ProcessTaskF &process_task_f)
|
|
: input_chunks(prepare_input_chunks(input_data))
|
|
, workers_n(workers_n)
|
|
, input_size(input_chunks.empty() ? 0 : input_chunks.back().end)
|
|
, batch_size(std::clamp(input_size / workers_n / 4, size_t(1), size_t(128)))
|
|
, process_task_f(process_task_f) {
|
|
}
|
|
|
|
const std::vector<InputChunk> input_chunks;
|
|
|
|
const size_t workers_n{0};
|
|
const size_t input_size{0};
|
|
const size_t batch_size{128};
|
|
|
|
const ProcessTaskF &process_task_f;
|
|
|
|
// Position in input
|
|
std::atomic<size_t> next_input_i{0};
|
|
|
|
// Shared queue
|
|
// Probably a simpler queue would also work fine
|
|
td::MpmcQueue<InputT> mpmc_queue{workers_n};
|
|
using Waiter = td::MpmcSleepyWaiter;
|
|
Waiter waiter;
|
|
std::atomic<size_t> mpmc_queue_size{workers_n}; // guard
|
|
|
|
// Output vectors
|
|
struct ThreadData {
|
|
std::vector<OutputT> output;
|
|
char pad[TD_CONCURRENCY_PAD - sizeof(output)];
|
|
};
|
|
std::vector<ThreadData> thread_data{workers_n};
|
|
|
|
auto prepare_input_chunks(const InputData &input_data) {
|
|
std::vector<InputChunk> chunks;
|
|
for (auto &chunk : input_data) {
|
|
size_t prev_end = chunks.empty() ? 0 : chunks.back().end;
|
|
chunks.push_back({.infos = td::as_span(chunk), .begin = prev_end, .end = prev_end + chunk.size()});
|
|
}
|
|
return chunks;
|
|
}
|
|
|
|
void delay_or_process_task(InputT input, const Worker<ProcessTaskF> &worker) {
|
|
// if there is enough tasks in queue, we continue recursion
|
|
if (mpmc_queue_size.load(std::memory_order_acquire) > 256) {
|
|
process_task_f(input, worker);
|
|
} else {
|
|
mpmc_queue_size.fetch_add(1, std::memory_order_acq_rel);
|
|
mpmc_queue.push(input, worker.worker_i);
|
|
waiter.notify();
|
|
}
|
|
}
|
|
|
|
void add_result(OutputT result, size_t worker_i) {
|
|
thread_data[worker_i].output.push_back(std::move(result));
|
|
}
|
|
|
|
void process_initial_input(const Worker<ProcessTaskF> &worker) {
|
|
size_t input_chunk_i = 0;
|
|
while (true) {
|
|
auto begin_i = next_input_i.fetch_add(batch_size, std::memory_order_relaxed);
|
|
auto end_i = begin_i + batch_size;
|
|
if (begin_i >= input_size) {
|
|
break;
|
|
}
|
|
for (size_t i = begin_i; i < end_i && i < input_size; i++) {
|
|
while (input_chunks[input_chunk_i].end <= i) {
|
|
input_chunk_i++;
|
|
}
|
|
auto offset = i - input_chunks[input_chunk_i].begin;
|
|
auto task = input_chunks[input_chunk_i].infos[offset];
|
|
process_task_f(task, worker);
|
|
}
|
|
}
|
|
}
|
|
|
|
void on_processed_task_from_queue(size_t worker_i) {
|
|
if (mpmc_queue_size.fetch_add(-1, std::memory_order_acq_rel) == 1) {
|
|
for (size_t i = 0; i < workers_n; i++) {
|
|
mpmc_queue.push(nullptr, worker_i);
|
|
waiter.notify();
|
|
}
|
|
}
|
|
}
|
|
|
|
void process_queue(const Worker<ProcessTaskF> &worker) {
|
|
on_processed_task_from_queue(worker.worker_i);
|
|
|
|
Waiter::Slot slot;
|
|
waiter.init_slot(slot, td::narrow_cast<td::int32>(worker.worker_i));
|
|
|
|
while (true) {
|
|
InputT input{};
|
|
if (mpmc_queue.try_pop(input, worker.worker_i)) {
|
|
waiter.stop_wait(slot);
|
|
if (!input) {
|
|
break;
|
|
}
|
|
process_task_f(input, worker);
|
|
on_processed_task_from_queue(worker.worker_i);
|
|
} else {
|
|
waiter.wait(slot);
|
|
}
|
|
}
|
|
}
|
|
void loop(const Worker<ProcessTaskF> &worker) {
|
|
process_initial_input(worker);
|
|
process_queue(worker);
|
|
}
|
|
void finish() const {
|
|
CHECK(mpmc_queue_size == 0);
|
|
}
|
|
};
|
|
|
|
template <class F>
|
|
OutputData process_parallel(const InputData &input_data, const F &process_task_f) {
|
|
const size_t workers_n = options_.extra_threads_n + 1;
|
|
auto shared = std::make_shared<Shared<F>>(workers_n, input_data, process_task_f);
|
|
|
|
CHECK(workers_n >= 1);
|
|
for (size_t i = 0; i < workers_n; i++) {
|
|
auto to_run = [worker = Worker<F>{.worker_i = i, .shared = shared}] { worker.loop(); };
|
|
|
|
if (i + 1 == workers_n) {
|
|
to_run();
|
|
} else if (options_.async_executor) {
|
|
options_.async_executor->execute_async(std::move(to_run));
|
|
} else {
|
|
// NB: td::thread, NOT std::thread
|
|
td::thread(std::move(to_run)).detach();
|
|
}
|
|
}
|
|
shared->finish();
|
|
return td::transform(shared->thread_data, [](auto &&x) { return std::move(x.output); });
|
|
}
|
|
};
|
|
struct Executor {
|
|
Executor(ExecutorOptions options = {}) : options_(options) {
|
|
}
|
|
|
|
template <class InputT = CellInfo *, class OutputT = CellInfo *, class F>
|
|
auto operator()(const std::vector<std::vector<InputT>> &data, const F &process_task_f) {
|
|
return ExecutorImpl<InputT, OutputT>(options_).process(data, process_task_f);
|
|
}
|
|
|
|
private:
|
|
ExecutorOptions options_;
|
|
};
|
|
|
|
// Thread safe storage for CellInfo
|
|
// Will be used by everybody as shared cache. Yes there is some overhead, but it don't want to create other hash table
|
|
struct CellInfoStorage {
|
|
public:
|
|
// All methods are thead safe
|
|
// All CellInfo pointers lives as long as CellInfoStorage
|
|
|
|
// returns CellInfo, only if it is already exists
|
|
CellInfo *get_cell_info(td::Slice hash) {
|
|
return lock(hash)->hash_table.get_if_exists(hash);
|
|
}
|
|
|
|
CellInfo &create_cell_info_from_db(Ref<DataCell> data_cell, td::int32 ref_cnt) {
|
|
auto &info = create_cell_info_from_data_cell(std::move(data_cell));
|
|
info.synced_with_db(ref_cnt);
|
|
return info;
|
|
}
|
|
|
|
// Creates CellInfo from data_cell, or updates existing CellInfo if is not yet loaded
|
|
CellInfo &create_cell_info_from_data_cell(Ref<DataCell> cell) {
|
|
CHECK(cell.not_null());
|
|
CHECK(cell->is_loaded());
|
|
|
|
auto hash = cell->get_hash();
|
|
auto [info, created] = lock(hash.as_slice())->hash_table.emplace(hash.as_slice(), std::move(cell));
|
|
|
|
if (!created) {
|
|
info.cell->set_data_cell(std::move(cell));
|
|
}
|
|
return info;
|
|
}
|
|
|
|
// Creates CellInfo from cell. If cell is loaded, it will be used to rewrite or udpate current cell
|
|
CellInfo &create_cell_info(Ref<Cell> cell, CellDbReaderExt *from_reader, CacheStats &stats) {
|
|
if (cell->is_loaded()) {
|
|
return create_cell_info_from_data_cell(cell->load_cell().move_as_ok().data_cell);
|
|
}
|
|
|
|
bool our_ext_cell = false;
|
|
auto ext_cell = dynamic_cast<const DynamicBocExtCell *>(cell.get());
|
|
if (ext_cell) {
|
|
auto prunned_cell = ext_cell->get_prunned_cell();
|
|
if (prunned_cell.not_null()) {
|
|
our_ext_cell = prunned_cell->get_extra().reader.get() == from_reader;
|
|
}
|
|
our_ext_cell = true;
|
|
} else if (!cell->is_loaded()) {
|
|
// if we cached cell from OTHER db is good idea to drop it ASAP
|
|
force_drop_cache_.store(true, std::memory_order_relaxed);
|
|
}
|
|
|
|
auto hash = cell->get_hash();
|
|
auto [info, created] = lock(hash.as_slice())->hash_table.emplace(hash.as_slice(), std::move(cell));
|
|
if (our_ext_cell) {
|
|
stats.ext_cells_load.inc();
|
|
if (info.cell->is_loaded()) {
|
|
stats.ext_cells_load_cache_hits.inc();
|
|
}
|
|
info.set_in_db();
|
|
}
|
|
return info;
|
|
}
|
|
|
|
void dump() {
|
|
LOG(ERROR) << "===========BEGIN DUMP===========";
|
|
for (auto &bucket : buckets_) {
|
|
std::lock_guard guard(bucket.mutex);
|
|
bucket.hash_table.for_each([&](auto &info) { LOG(INFO) << info; });
|
|
}
|
|
LOG(ERROR) << "===========END DUMP===========";
|
|
}
|
|
|
|
size_t cache_size() {
|
|
size_t res = 0;
|
|
for (auto &bucket : buckets_) {
|
|
std::lock_guard guard(bucket.mutex);
|
|
res += bucket.hash_table.size();
|
|
}
|
|
return res;
|
|
}
|
|
bool force_drop_cache() {
|
|
return force_drop_cache_.load(std::memory_order_relaxed);
|
|
}
|
|
|
|
private:
|
|
struct Bucket {
|
|
std::mutex mutex;
|
|
CellHashTable<CellInfo> hash_table;
|
|
};
|
|
constexpr static size_t buckets_n = 8192;
|
|
std::array<Bucket, buckets_n> bucket_;
|
|
|
|
struct Unlock {
|
|
void operator()(Bucket *bucket) const {
|
|
bucket->mutex.unlock();
|
|
}
|
|
};
|
|
std::array<Bucket, buckets_n> buckets_{};
|
|
std::atomic<bool> force_drop_cache_{false};
|
|
|
|
std::unique_ptr<Bucket, Unlock> lock(Bucket &bucket) {
|
|
bucket.mutex.lock();
|
|
return std::unique_ptr<Bucket, Unlock>(&bucket);
|
|
}
|
|
std::unique_ptr<Bucket, Unlock> lock(td::Slice key) {
|
|
auto hash = td::as<size_t>(key.substr(16, 8).ubegin());
|
|
auto bucket_i = hash % buckets_n;
|
|
return lock(buckets_[bucket_i]);
|
|
}
|
|
};
|
|
|
|
class DynamicBagOfCellsDbImplV2 : public DynamicBagOfCellsDb {
|
|
public:
|
|
explicit DynamicBagOfCellsDbImplV2(CreateV2Options options) : options_(options) {
|
|
get_thread_safe_counter().inc();
|
|
// LOG(ERROR) << "Constructor called for DynamicBagOfCellsDbImplV2";
|
|
}
|
|
~DynamicBagOfCellsDbImplV2() {
|
|
// LOG(ERROR) << "Destructor called for DynamicBagOfCellsDbImplV2";
|
|
get_thread_safe_counter().add(-1);
|
|
|
|
if (cell_db_reader_) {
|
|
cell_db_reader_->drop_cache();
|
|
}
|
|
}
|
|
|
|
td::Result<std::vector<std::pair<std::string, std::string>>> meta_get_all() const override {
|
|
CHECK(meta_db_fixup_.empty());
|
|
std::vector<std::pair<std::string, std::string>> result;
|
|
auto s = cell_db_reader_->key_value_reader().for_each_in_range(
|
|
"desc", "desd", [&](const td::Slice &key, const td::Slice &value) {
|
|
if (td::begins_with(key, "desc") && key.size() != 32) {
|
|
result.emplace_back(key.str(), value.str());
|
|
}
|
|
return td::Status::OK();
|
|
});
|
|
TRY_STATUS(std::move(s));
|
|
return result;
|
|
}
|
|
td::Result<KeyValue::GetStatus> meta_get(td::Slice key, std::string &value) override {
|
|
auto it = meta_db_fixup_.find(key);
|
|
if (it != meta_db_fixup_.end()) {
|
|
if (it->second.empty()) {
|
|
return KeyValue::GetStatus::NotFound;
|
|
}
|
|
value = it->second;
|
|
return KeyValue::GetStatus::Ok;
|
|
}
|
|
return cell_db_reader_->key_value_reader().get(key, value);
|
|
}
|
|
td::Status meta_set(td::Slice key, td::Slice value) override {
|
|
meta_diffs_.push_back(
|
|
CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Set, .key = key.str(), .value = value.str()});
|
|
return td::Status::OK();
|
|
}
|
|
td::Status meta_erase(td::Slice key) override {
|
|
meta_diffs_.push_back(CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Erase, .key = key.str()});
|
|
return td::Status::OK();
|
|
}
|
|
td::Result<Ref<DataCell>> load_cell(td::Slice hash) override {
|
|
CHECK(cell_db_reader_);
|
|
return cell_db_reader_->load_cell(hash);
|
|
}
|
|
td::Result<Ref<DataCell>> load_root(td::Slice hash) override {
|
|
return load_cell(hash);
|
|
}
|
|
td::Result<Ref<DataCell>> load_root_thread_safe(td::Slice hash) const override {
|
|
// TODO: it is better to use AtomicRef, or atomic shared pointer
|
|
// But to use AtomicRef we need a little refactoring
|
|
// And std::atomic<std::shared_ptr<>> is still unsupported by clang
|
|
std::unique_lock lock(atomic_cell_db_reader_mutex_);
|
|
auto reader = atomic_cell_db_reader_;
|
|
lock.unlock();
|
|
if (!reader) {
|
|
return td::Status::Error("Empty reader");
|
|
}
|
|
return reader->load_cell(hash);
|
|
}
|
|
void load_cell_async(td::Slice hash, std::shared_ptr<AsyncExecutor> executor,
|
|
td::Promise<Ref<DataCell>> promise) override {
|
|
CHECK(cell_db_reader_);
|
|
return cell_db_reader_->load_cell_async(hash, std::move(executor), std::move(promise));
|
|
}
|
|
void prepare_commit_async(std::shared_ptr<AsyncExecutor> executor, td::Promise<td::Unit> promise) override {
|
|
auto promise_ptr = std::make_shared<td::Promise<td::Unit>>(std::move(promise));
|
|
executor->execute_async([this, promise_ptr = std::move(promise_ptr)] {
|
|
prepare_commit();
|
|
promise_ptr->set_value(td::Unit());
|
|
});
|
|
}
|
|
|
|
void inc(const Ref<Cell> &cell) override {
|
|
if (cell.is_null()) {
|
|
return;
|
|
}
|
|
if (cell->get_virtualization() != 0) {
|
|
return;
|
|
}
|
|
to_inc_.push_back(cell);
|
|
}
|
|
void dec(const Ref<Cell> &cell) override {
|
|
if (cell.is_null()) {
|
|
return;
|
|
}
|
|
if (cell->get_virtualization() != 0) {
|
|
return;
|
|
}
|
|
to_dec_.push_back(cell);
|
|
}
|
|
|
|
bool is_prepared_for_commit() {
|
|
return to_inc_.empty() && to_dec_.empty();
|
|
}
|
|
|
|
Stats get_stats_diff() override {
|
|
return {};
|
|
}
|
|
|
|
td::Status prepare_commit() override {
|
|
if (is_prepared_for_commit()) {
|
|
return td::Status::OK();
|
|
}
|
|
// NB: we don't use options.executor, because it is prone to deadlocks. We need extra_threads_n threads
|
|
// available for blocking
|
|
Executor executor{{.extra_threads_n = options_.extra_threads, .async_executor = {}}};
|
|
// calculate in_db for all vertices reachable from to_inc_ roots
|
|
// - for ext cells we already know they are in db
|
|
// - calculate in_db up from leaves
|
|
// - if at least one child is not in db, then the cell is definitely not in db
|
|
// - so in best case only leaves will be loaded from db
|
|
// - this is optional step. All other logic must work in any case
|
|
// - only already loaded cells are loaded from db
|
|
|
|
stats_.to_inc.add(to_inc_.size());
|
|
stats_.to_dec.add(to_dec_.size());
|
|
|
|
std::vector<std::vector<CellInfo *>> visited_cells;
|
|
auto add_visited_cells = [&](std::vector<std::vector<CellInfo *>> new_visited_cells) {
|
|
for (auto &x : new_visited_cells) {
|
|
visited_cells.push_back(std::move(x));
|
|
}
|
|
};
|
|
|
|
std::vector<std::vector<CellInfo *>> new_cells_leaves;
|
|
{
|
|
td::PerfWarningTimer timer("celldb_v2: gather_new_cells");
|
|
std::vector<CellInfo *> prepared_to_inc;
|
|
std::vector<CellInfo *> visited_roots;
|
|
for (auto &cell : to_inc_) {
|
|
auto &info = cell_db_reader_->cell_info(cell);
|
|
if (info.inc_ref_cnt() == 1 && info.visit()) {
|
|
visited_roots.push_back(&info);
|
|
}
|
|
if (info.state.load().in_db) {
|
|
continue;
|
|
}
|
|
auto &in_db_info = info.in_db_info_create(nullptr);
|
|
if (!in_db_info.visited_in_gather_new_cells.exchange(true)) {
|
|
prepared_to_inc.push_back(&info);
|
|
}
|
|
}
|
|
new_cells_leaves =
|
|
executor({std::move(prepared_to_inc)}, [&](CellInfo *info, auto &worker) { gather_new_cells(info, worker); });
|
|
visited_cells.push_back(std::move(visited_roots));
|
|
}
|
|
|
|
// LOG(WARNING) << "new_cells_leaves: " << new_cells_leaves.size();
|
|
{
|
|
td::PerfWarningTimer timer("celldb_v2: update_parents");
|
|
add_visited_cells(
|
|
executor({std::move(new_cells_leaves)}, [&](CellInfo *info, auto &worker) { update_parents(info, worker); }));
|
|
}
|
|
{
|
|
td::PerfWarningTimer timer("dec");
|
|
std::vector<CellInfo *> prepared_to_dec;
|
|
for (auto &cell : to_dec_) {
|
|
auto &info = cell_db_reader_->cell_info(cell);
|
|
prepared_to_dec.push_back(&info);
|
|
}
|
|
add_visited_cells(
|
|
executor({std::move(prepared_to_dec)}, [&](CellInfo *info, auto &worker) { dec_cell(info, worker); }));
|
|
}
|
|
|
|
td::PerfWarningTimer timer_serialize("celldb_v2: save_diff_serialize", 0.01);
|
|
// LOG(INFO) << "threads_n = " << options_.extra_threads + 1;
|
|
diff_chunks_ = executor.operator()<CellInfo *, CellStorer::Diff>(
|
|
visited_cells, [&](CellInfo *info, auto &worker) { serialize_diff(info, worker); });
|
|
timer_serialize.reset();
|
|
|
|
{
|
|
td::PerfWarningTimer timer("celldb_v2: clear");
|
|
to_inc_.clear();
|
|
to_dec_.clear();
|
|
}
|
|
|
|
//cell_db_reader_->dump();
|
|
return td::Status::OK();
|
|
}
|
|
|
|
td::Status commit(CellStorer &storer) override {
|
|
prepare_commit();
|
|
save_diff(storer);
|
|
// We DON'T delete entries from cache, so cache actually represents diff with snapshot in reader
|
|
// But we don't want took keep old snapshot forever
|
|
LOG_IF(ERROR, dbg) << "clear cell_db_reader";
|
|
//cell_db_reader_->dump();
|
|
//TODO: call drop_cache reliably via rtti
|
|
|
|
constexpr bool always_drop_cache = false;
|
|
if (always_drop_cache) {
|
|
td::PerfWarningTimer timer("celldb_v2: reset reader");
|
|
cell_db_reader_->drop_cache();
|
|
cache_stats_.apply_diff(cell_db_reader_->get_stats());
|
|
cache_stats_.stats_int["commits"] += 1;
|
|
cell_db_reader_ = {};
|
|
// keep atomic reader, to it will be reused
|
|
}
|
|
return td::Status::OK();
|
|
}
|
|
|
|
std::shared_ptr<CellDbReader> get_cell_db_reader() override {
|
|
CHECK(cell_db_reader_);
|
|
return cell_db_reader_;
|
|
}
|
|
|
|
td::Status set_loader(std::unique_ptr<CellLoader> loader) override {
|
|
if (cell_db_reader_) {
|
|
auto cache_size = cell_db_reader_->cache_size();
|
|
bool force_drop_cache = cell_db_reader_->force_drop_cache();
|
|
if (loader && cache_size < options_.cache_size_max && cell_db_reader_ttl_ < options_.cache_ttl_max &&
|
|
!force_drop_cache) {
|
|
// keep cache
|
|
cell_db_reader_ttl_++;
|
|
return td::Status::OK();
|
|
}
|
|
|
|
td::PerfWarningTimer timer(PSTRING() << "celldb_v2: reset reader, TTL=" << cell_db_reader_ttl_ << "/"
|
|
<< options_.cache_ttl_max << ", cache_size=" << cache_size
|
|
<< ", force_drop_cache=" << force_drop_cache);
|
|
cache_stats_.apply_diff(cell_db_reader_->get_stats());
|
|
cell_db_reader_->drop_cache();
|
|
cell_db_reader_ = {};
|
|
meta_db_fixup_ = {};
|
|
cell_db_reader_ttl_ = 0;
|
|
}
|
|
|
|
if (loader) {
|
|
cell_db_reader_ = std::make_shared<CellDbReaderImpl>(std::move(loader));
|
|
cell_db_reader_ttl_ = 0;
|
|
}
|
|
|
|
{
|
|
std::lock_guard guard(atomic_cell_db_reader_mutex_);
|
|
atomic_cell_db_reader_ = cell_db_reader_;
|
|
}
|
|
return td::Status::OK();
|
|
}
|
|
|
|
void set_celldb_compress_depth(td::uint32 value) override {
|
|
CHECK(value == 0);
|
|
}
|
|
|
|
vm::ExtCellCreator &as_ext_cell_creator() override {
|
|
CHECK(cell_db_reader_);
|
|
return *cell_db_reader_;
|
|
}
|
|
td::Result<Stats> get_stats() override {
|
|
auto ps = stats_.nc.get_stats().with_prefix("storage_");
|
|
ps.apply_diff(cache_stats_.with_prefix("cache_cum_"));
|
|
if (cell_db_reader_) {
|
|
ps.apply_diff(cell_db_reader_->get_stats().with_prefix("cache_now_"));
|
|
ps.apply_diff(cell_db_reader_->get_stats().with_prefix("cache_cum_"));
|
|
}
|
|
Stats res;
|
|
res.named_stats = std::move(ps);
|
|
res.named_stats.stats_int["cache.size"] = cell_db_reader_ ? cell_db_reader_->cache_size() : 0;
|
|
res.named_stats.stats_int["cache.size_max"] = options_.cache_size_max;
|
|
res.named_stats.stats_int["cache.ttl"] = cell_db_reader_ttl_;
|
|
res.named_stats.stats_int["cache.ttl_max"] = options_.cache_ttl_max;
|
|
return res;
|
|
}
|
|
|
|
private:
|
|
static td::NamedThreadSafeCounter::CounterRef get_thread_safe_counter() {
|
|
static auto res = td::NamedThreadSafeCounter::get_default().get_counter("DynamicBagOfCellsDb");
|
|
return res;
|
|
}
|
|
|
|
class CellDbReaderImpl : public CellDbReaderExt,
|
|
public ExtCellCreator,
|
|
public std::enable_shared_from_this<CellDbReaderImpl> {
|
|
public:
|
|
explicit CellDbReaderImpl(std::unique_ptr<CellLoader> cell_loader) : cell_loader_(std::move(cell_loader)) {
|
|
}
|
|
|
|
size_t cache_size() const {
|
|
// NOT thread safe
|
|
if (internal_storage_) {
|
|
return internal_storage_->cache_size();
|
|
}
|
|
return 0;
|
|
}
|
|
bool force_drop_cache() const {
|
|
// NOT thread safe
|
|
if (internal_storage_) {
|
|
return internal_storage_->force_drop_cache();
|
|
}
|
|
return false;
|
|
}
|
|
void drop_cache() {
|
|
// NOT thread safe
|
|
internal_storage_.reset();
|
|
}
|
|
|
|
td::Result<Ref<Cell>> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override {
|
|
// thread safe function
|
|
stats_.ext_cells.inc();
|
|
TRY_RESULT(ext_cell, DynamicBocExtCell::create(PrunnedCellInfo{level_mask, hash, depth},
|
|
DynamicBocExtCellExtra{shared_from_this()}));
|
|
|
|
return ext_cell;
|
|
}
|
|
CellInfo *register_ext_cell_inner(Ref<DynamicBocExtCell> ext_cell, CellInfoStorage &storage) {
|
|
auto &info = storage.create_cell_info(std::move(ext_cell), this, stats_);
|
|
return &info;
|
|
}
|
|
|
|
void load_cell_async(td::Slice hash, std::shared_ptr<AsyncExecutor> executor, td::Promise<Ref<DataCell>> promise) {
|
|
// thread safe function
|
|
stats_.load_cell_async.inc();
|
|
auto maybe_cell = load_cell_fast_path(hash, false, nullptr);
|
|
if (maybe_cell.not_null()) {
|
|
stats_.load_cell_async_cache_hits.inc();
|
|
return promise.set_value(std::move(maybe_cell));
|
|
}
|
|
auto promise_ptr = std::make_shared<td::Promise<Ref<DataCell>>>(std::move(promise));
|
|
|
|
executor->execute_async(
|
|
[self = shared_from_this(), promise_ptr = std::move(promise_ptr), hash = CellHash::from_slice(hash)]() {
|
|
promise_ptr->set_result(self->load_cell(hash.as_slice()));
|
|
});
|
|
}
|
|
|
|
td::Result<Ref<DataCell>> load_cell(td::Slice hash) override {
|
|
// thread safe function
|
|
stats_.load_cell_sync.inc();
|
|
bool loaded{false};
|
|
auto maybe_cell = load_cell_fast_path(hash, true, &loaded);
|
|
if (maybe_cell.not_null()) {
|
|
if (!loaded) {
|
|
stats_.load_cell_sync_cache_hits.inc();
|
|
}
|
|
return maybe_cell;
|
|
}
|
|
return load_cell_slow_path(hash);
|
|
}
|
|
|
|
td::Result<Ref<DataCell>> load_ext_cell(Ref<DynamicBocExtCell> ext_cell) override {
|
|
// thread safe function.
|
|
// Called by external cell
|
|
stats_.load_cell_ext.inc();
|
|
auto storage = weak_storage_.lock();
|
|
if (!storage) {
|
|
TRY_RESULT(load_result, load_cell_no_cache(ext_cell->get_hash().as_slice()));
|
|
return load_result.cell_;
|
|
}
|
|
// we delayed registering ext cell till this moment
|
|
auto cell_info = register_ext_cell_inner(std::move(ext_cell), *storage);
|
|
|
|
CHECK(cell_info != nullptr); // currently all ext_cells are registered in cache
|
|
if (!cell_info->cell->is_loaded()) {
|
|
sync_with_db(*cell_info, true);
|
|
CHECK(cell_info->cell->is_loaded()); // critical, better to fail
|
|
} else {
|
|
stats_.load_cell_ext_cache_hits.inc();
|
|
}
|
|
return cell_info->cell->load_cell().move_as_ok().data_cell;
|
|
}
|
|
|
|
CellInfo &cell_info(Ref<Cell> cell) {
|
|
// thread safe function, but called only by DB
|
|
CHECK(internal_storage_)
|
|
return internal_storage_->create_cell_info(std::move(cell), this, stats_);
|
|
}
|
|
|
|
std::pair<CellInfo::State, bool> sync_with_db(CellInfo &info, bool need_data) {
|
|
// thread safe function, but called only by DB
|
|
auto effective_need_data = need_data;
|
|
if (info.cell->is_loaded()) {
|
|
effective_need_data = false;
|
|
}
|
|
return info.state.update([&](CellInfo::State state) -> std::optional<CellInfo::State> {
|
|
if (state.sync_with_db) {
|
|
return {};
|
|
}
|
|
stats_.sync_with_db.inc();
|
|
if (!effective_need_data) {
|
|
stats_.sync_with_db_only_ref.inc();
|
|
}
|
|
auto load_result =
|
|
cell_loader_->load(info.cell->get_hash().as_slice(), effective_need_data, *this).move_as_ok();
|
|
|
|
state.sync_with_db = true;
|
|
if (load_result.status == CellLoader::LoadResult::NotFound) {
|
|
CHECK(state.in_db == false);
|
|
CHECK(state.db_ref_cnt == 0);
|
|
stats_.kv_read_not_found.inc();
|
|
return state;
|
|
}
|
|
stats_.kv_read_found.inc();
|
|
|
|
state.in_db = true;
|
|
state.db_ref_cnt = load_result.refcnt() + state.db_refcnt_fixup;
|
|
if (load_result.cell().not_null()) {
|
|
info.cell->set_data_cell(std::move(load_result.cell()));
|
|
}
|
|
CHECK(!need_data || info.cell->is_loaded());
|
|
return state;
|
|
});
|
|
}
|
|
|
|
void dump() {
|
|
internal_storage_->dump();
|
|
}
|
|
|
|
td::NamedStats get_stats() const {
|
|
return stats_.nc.get_stats();
|
|
}
|
|
td::KeyValueReader &key_value_reader() {
|
|
return cell_loader_->key_value_reader();
|
|
}
|
|
|
|
private:
|
|
static td::NamedThreadSafeCounter::CounterRef get_thread_safe_counter() {
|
|
static auto res = td::NamedThreadSafeCounter::get_default().get_counter("DynamicBagOfCellsDbLoader");
|
|
return res;
|
|
}
|
|
std::shared_ptr<CellInfoStorage> internal_storage_{std::make_shared<CellInfoStorage>()};
|
|
std::weak_ptr<CellInfoStorage> weak_storage_{internal_storage_};
|
|
std::unique_ptr<CellLoader> cell_loader_;
|
|
CacheStats stats_;
|
|
|
|
Ref<DataCell> load_cell_fast_path(td::Slice hash, bool may_block, bool *loaded) {
|
|
auto storage = weak_storage_.lock();
|
|
if (!storage) {
|
|
return {};
|
|
}
|
|
auto cell_info = storage->get_cell_info(hash);
|
|
if (cell_info != nullptr) {
|
|
if (!cell_info->cell->is_loaded()) {
|
|
if (may_block) {
|
|
if (loaded) {
|
|
*loaded = true;
|
|
}
|
|
CHECK(cell_info->state.load().in_db);
|
|
sync_with_db(*cell_info, true);
|
|
CHECK(cell_info->cell->is_loaded());
|
|
} else {
|
|
return {};
|
|
}
|
|
}
|
|
return cell_info->cell->load_cell().move_as_ok().data_cell;
|
|
}
|
|
return {};
|
|
}
|
|
td::Result<CellLoader::LoadResult> load_cell_no_cache(td::Slice hash) {
|
|
stats_.load_cell_no_cache.inc();
|
|
TRY_RESULT(load_result, cell_loader_->load(hash, true, *this));
|
|
if (load_result.status == CellLoader::LoadResult::NotFound) {
|
|
stats_.kv_read_not_found.inc();
|
|
return td::Status::Error("Cell load failed: not in db");
|
|
}
|
|
stats_.kv_read_found.inc();
|
|
return load_result;
|
|
}
|
|
td::Result<Ref<DataCell>> load_cell_slow_path(td::Slice hash) {
|
|
TRY_RESULT(load_result, load_cell_no_cache(hash));
|
|
auto storage = weak_storage_.lock();
|
|
if (!storage) {
|
|
return load_result.cell_;
|
|
}
|
|
auto &cell_info = storage->create_cell_info_from_db(std::move(load_result.cell()), load_result.refcnt());
|
|
return cell_info.cell->load_cell().move_as_ok().data_cell;
|
|
}
|
|
};
|
|
|
|
CreateV2Options options_;
|
|
std::vector<Ref<Cell>> to_inc_;
|
|
std::vector<Ref<Cell>> to_dec_;
|
|
std::vector<std::vector<CellStorer::Diff>> diff_chunks_;
|
|
std::vector<CellStorer::MetaDiff> meta_diffs_;
|
|
std::map<std::string, std::string, std::less<>> meta_db_fixup_;
|
|
|
|
mutable std::mutex atomic_cell_db_reader_mutex_;
|
|
std::shared_ptr<CellDbReaderImpl> atomic_cell_db_reader_;
|
|
|
|
std::shared_ptr<CellDbReaderImpl> cell_db_reader_;
|
|
size_t cell_db_reader_ttl_{0};
|
|
td::NamedStats cache_stats_;
|
|
CommitStats stats_;
|
|
bool dbg{false};
|
|
|
|
template <class WorkerT>
|
|
void gather_new_cells(CellInfo *info, WorkerT &worker) {
|
|
stats_.gather_new_cells_calls.inc();
|
|
do {
|
|
// invariant: info is not in DB; with created in_db_info
|
|
// we enter into each root only once
|
|
stats_.gather_new_cells_calls_it.inc();
|
|
stats_.new_cells.inc();
|
|
auto &in_db_info = info->in_db_info();
|
|
|
|
CellSlice cs(vm::NoVm{}, info->cell); // ensure cell is loaded
|
|
CellInfo *prev_child_info = nullptr;
|
|
while (cs.have_refs()) {
|
|
auto *child_info = &cell_db_reader_->cell_info(cs.fetch_ref());
|
|
auto child_state = child_info->state.load();
|
|
|
|
if (child_state.in_db) {
|
|
LOG_IF(INFO, dbg) << "gather_new_cells: IN DB\n\tchld: " << *child_info;
|
|
continue;
|
|
}
|
|
|
|
auto &child_in_db_info = child_info->in_db_info_create(info);
|
|
in_db_info.pending_children.fetch_add(1, std::memory_order_relaxed);
|
|
|
|
if (child_in_db_info.visited_in_gather_new_cells.exchange(true)) {
|
|
continue;
|
|
}
|
|
|
|
if (prev_child_info != nullptr) {
|
|
worker.add_task(prev_child_info);
|
|
}
|
|
prev_child_info = child_info;
|
|
}
|
|
LOG_IF(INFO, dbg) << "gather_new_cells: NOT IN DB\n\t" << *info;
|
|
if (in_db_info.pending_children.load(std::memory_order_relaxed) == 0) {
|
|
worker.add_result(info);
|
|
stats_.new_cells_leaves.inc();
|
|
LOG_IF(WARNING, dbg) << "gather_new_cells: ADD LEAVE\n\t" << *info;
|
|
}
|
|
info = prev_child_info;
|
|
} while (info != nullptr);
|
|
}
|
|
|
|
template <class WorkerT>
|
|
void update_parents(CellInfo *info, const WorkerT &worker) {
|
|
stats_.update_parents_calls.inc();
|
|
size_t it = 0;
|
|
do {
|
|
stats_.update_parents_calls_it.inc();
|
|
it++;
|
|
//LOG(INFO) << "update_parents: it=" << it << "\n\t";
|
|
auto &in_db_info = info->in_db_info();
|
|
bool in_db = false;
|
|
if (in_db_info.maybe_in_db.load(std::memory_order_relaxed)) {
|
|
auto [state, loaded] = cell_db_reader_->sync_with_db(*info, false);
|
|
in_db = state.in_db;
|
|
if (in_db) {
|
|
stats_.new_cells_loaded_in_db.inc();
|
|
} else {
|
|
stats_.new_cells_loaded_not_in_db.inc();
|
|
}
|
|
} else {
|
|
stats_.new_cells_not_in_db_fast.inc();
|
|
info->set_not_in_db();
|
|
}
|
|
LOG_IF(INFO, dbg) << "update_parents: it=" << it << "\n\t" << *info;
|
|
|
|
CellInfo *prev_parent{nullptr};
|
|
for (auto &parent : in_db_info.parents) {
|
|
auto &parent_in_db_info = parent->in_db_info();
|
|
if (!in_db) {
|
|
parent_in_db_info.maybe_in_db.store(false, std::memory_order_relaxed);
|
|
}
|
|
if (parent_in_db_info.pending_children.fetch_sub(1, std::memory_order_release) == 1) {
|
|
if (prev_parent) {
|
|
worker.add_task(prev_parent);
|
|
}
|
|
prev_parent = parent;
|
|
}
|
|
}
|
|
if (!in_db) {
|
|
CellSlice cs(vm::NoVm{}, info->cell);
|
|
while (cs.have_refs()) {
|
|
auto child = cs.fetch_ref();
|
|
auto &child_info = cell_db_reader_->cell_info(std::move(child));
|
|
if (child_info.inc_ref_cnt() == 1 && child_info.visit()) {
|
|
worker.add_result(&child_info);
|
|
}
|
|
}
|
|
}
|
|
info->in_db_info_destroy();
|
|
info = prev_parent;
|
|
} while (info);
|
|
}
|
|
|
|
template <class WorkerT>
|
|
void dec_cell(CellInfo *info, WorkerT &worker) {
|
|
stats_.dec_calls.inc();
|
|
|
|
while (true) {
|
|
stats_.dec_calls_it.inc();
|
|
if (info->visit()) {
|
|
worker.add_result(info);
|
|
}
|
|
auto ref_cnt_diff = info->dec_ref_cnt();
|
|
if (ref_cnt_diff > 0) {
|
|
LOG_IF(INFO, dbg) << "NOT DEC"
|
|
<< "\n\t" << info;
|
|
break;
|
|
}
|
|
auto state = info->state.load();
|
|
if (ref_cnt_diff == 0 && state.in_db) {
|
|
LOG_IF(INFO, dbg) << "NOT DEC (in_db) "
|
|
<< "\n\t" << info;
|
|
break;
|
|
}
|
|
if (!state.sync_with_db) {
|
|
state = cell_db_reader_->sync_with_db(*info, true).first;
|
|
stats_.dec_loaded.inc();
|
|
CHECK(ref_cnt_diff == 0 || state.in_db);
|
|
}
|
|
auto ref_cnt = state.db_ref_cnt + ref_cnt_diff;
|
|
if (ref_cnt > 0) {
|
|
LOG_IF(INFO, dbg) << "DEC " << ref_cnt << "\n\t" << info;
|
|
} else {
|
|
LOG_IF(ERROR, dbg) << "DEC " << ref_cnt << "\n\t" << info;
|
|
}
|
|
CHECK(ref_cnt >= 0);
|
|
if (ref_cnt > 0) {
|
|
break;
|
|
}
|
|
stats_.dec_to_zero.inc();
|
|
CellSlice cs(vm::NoVm{}, info->cell);
|
|
if (!cs.have_refs()) {
|
|
break;
|
|
}
|
|
while (cs.size_refs() > 1) {
|
|
worker.add_task(&cell_db_reader_->cell_info(cs.fetch_ref()));
|
|
}
|
|
info = &cell_db_reader_->cell_info(cs.fetch_ref());
|
|
}
|
|
}
|
|
|
|
template <class Worker>
|
|
void serialize_diff(CellInfo *info, Worker &worker) {
|
|
info->visited.store(false, std::memory_order_relaxed);
|
|
auto ref_cnt_diff = info->get_ref_cnt_diff();
|
|
if (ref_cnt_diff == 0) {
|
|
stats_.diff_zero.inc();
|
|
return;
|
|
}
|
|
|
|
bool merge_supported = true;
|
|
if (merge_supported) {
|
|
auto state = info->state.load();
|
|
if (ref_cnt_diff < 0) {
|
|
CHECK(state.sync_with_db);
|
|
/*
|
|
if (state.db_ref_cnt + ref_cnt_diff == 0) {
|
|
LOG(ERROR) << "DEC ERASE " << info->cell->get_hash().to_hex();
|
|
} else {
|
|
LOG(ERROR) << "DEC MERGE " << info->cell->get_hash().to_hex() << *info;
|
|
}
|
|
*/
|
|
}
|
|
if (ref_cnt_diff < 0 && state.sync_with_db && state.db_ref_cnt + ref_cnt_diff == 0) {
|
|
// Erase is better than Merge+CompactionFilter
|
|
// So I see no reason for CompactionFilter at all
|
|
worker.add_result({.type = CellStorer::Diff::Erase, .key = info->cell->get_hash()});
|
|
stats_.diff_erase.inc();
|
|
} else {
|
|
bool with_data = ref_cnt_diff > 0 && !state.in_db;
|
|
if (with_data) {
|
|
CHECK(state.sync_with_db);
|
|
auto data_cell = info->cell->load_cell().move_as_ok().data_cell;
|
|
stats_.diff_full.inc();
|
|
worker.add_result({.type = CellStorer::Diff::Set,
|
|
.key = info->cell->get_hash(),
|
|
.value = CellStorer::serialize_value(ref_cnt_diff + state.db_ref_cnt, data_cell, false)});
|
|
} else {
|
|
stats_.diff_ref_cnt.inc();
|
|
worker.add_result({.type = CellStorer::Diff::Merge,
|
|
.key = info->cell->get_hash(),
|
|
.value = CellStorer::serialize_refcnt_diffs(ref_cnt_diff)});
|
|
}
|
|
}
|
|
info->on_written_to_db();
|
|
return;
|
|
}
|
|
|
|
auto state = info->state.load();
|
|
if (!state.sync_with_db) {
|
|
stats_.changes_loaded.inc();
|
|
state = cell_db_reader_->sync_with_db(*info, true).first;
|
|
}
|
|
CHECK(state.sync_with_db);
|
|
auto new_ref_cnt = ref_cnt_diff + state.db_ref_cnt;
|
|
|
|
if (ref_cnt_diff < 0) {
|
|
stats_.dec_save.inc();
|
|
if (new_ref_cnt == 0) {
|
|
stats_.dec_erase_cell.inc();
|
|
|
|
LOG_IF(ERROR, dbg) << "DEC ERASE " << *info;
|
|
worker.add_result({.type = CellStorer::Diff::Erase, .key = info->cell->get_hash()});
|
|
stats_.dec_save_erase.inc();
|
|
} else {
|
|
stats_.dec_just_ref_cnt.inc();
|
|
|
|
LOG_IF(ERROR, dbg) << "DEC REFCNT " << *info;
|
|
CHECK(info->cell->is_loaded());
|
|
worker.add_result(
|
|
{.type = CellStorer::Diff::Set,
|
|
.key = info->cell->get_hash(),
|
|
.value = CellStorer::serialize_value(new_ref_cnt, info->cell->load_cell().move_as_ok().data_cell, false)});
|
|
stats_.dec_save_full.inc();
|
|
}
|
|
} else {
|
|
stats_.inc_save.inc();
|
|
CHECK(info->cell->is_loaded());
|
|
if (state.db_ref_cnt == 0) {
|
|
stats_.inc_new_cell.inc();
|
|
LOG_IF(ERROR, dbg) << "INC CREATE " << *info;
|
|
} else {
|
|
stats_.inc_just_ref_cnt.inc();
|
|
LOG_IF(ERROR, dbg) << "INC REFCNT " << *info;
|
|
}
|
|
|
|
worker.add_result(
|
|
{.type = CellStorer::Diff::Set,
|
|
.key = info->cell->get_hash(),
|
|
.value = CellStorer::serialize_value(new_ref_cnt, info->cell->load_cell().move_as_ok().data_cell, false)});
|
|
stats_.inc_save_full.inc();
|
|
}
|
|
}
|
|
|
|
void save_diff(CellStorer &storer) {
|
|
td::PerfWarningTimer timer("celldb_v2: save_diff");
|
|
td::PerfWarningTimer timer_store_to_db("celldb_v2: save_diff_store_to_db", 0.01);
|
|
// Have no idea hot to parallelize this in case of rocksdb
|
|
for (auto &diffs : diff_chunks_) {
|
|
for (auto &diff : diffs) {
|
|
storer.apply_diff(diff).ensure();
|
|
}
|
|
}
|
|
for (auto &meta_diff : meta_diffs_) {
|
|
meta_db_fixup_[meta_diff.key] = meta_diff.value;
|
|
storer.apply_meta_diff(meta_diff).ensure();
|
|
}
|
|
timer_store_to_db.reset();
|
|
td::PerfWarningTimer timer_clear("celldb_v2: save_diff_clear");
|
|
diff_chunks_.clear();
|
|
meta_diffs_.clear();
|
|
timer_clear.reset();
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
std::unique_ptr<DynamicBagOfCellsDb> DynamicBagOfCellsDb::create_v2(CreateV2Options options) {
|
|
return std::make_unique<DynamicBagOfCellsDbImplV2>(options);
|
|
}
|
|
} // namespace vm
|