mirror of
https://github.com/ton-blockchain/ton
synced 2025-03-09 15:40:10 +00:00
celldb: version 2
- thread safe cache - parallel commit - multiple optimizations - support of key-value merge operations - improved tests and benchmarks - in-memory version won't read from key value after start - uses vector in-memory table now - use rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords - do not silently ignore errors during recovery
This commit is contained in:
parent
1b70e48327
commit
c863c42ed1
32 changed files with 3276 additions and 318 deletions
|
@ -24,10 +24,13 @@
|
|||
#include "rocksdb/write_batch.h"
|
||||
#include "rocksdb/utilities/optimistic_transaction_db.h"
|
||||
#include "rocksdb/utilities/transaction.h"
|
||||
#include "td/utils/misc.h"
|
||||
|
||||
#include <rocksdb/filter_policy.h>
|
||||
|
||||
namespace td {
|
||||
namespace {
|
||||
static Status from_rocksdb(rocksdb::Status status) {
|
||||
static Status from_rocksdb(const rocksdb::Status &status) {
|
||||
if (status.ok()) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -56,62 +59,83 @@ RocksDb::~RocksDb() {
|
|||
}
|
||||
|
||||
RocksDb RocksDb::clone() const {
|
||||
if (transaction_db_) {
|
||||
return RocksDb{transaction_db_, options_};
|
||||
}
|
||||
return RocksDb{db_, options_};
|
||||
}
|
||||
|
||||
Result<RocksDb> RocksDb::open(std::string path, RocksDbOptions options) {
|
||||
rocksdb::OptimisticTransactionDB *db;
|
||||
{
|
||||
rocksdb::Options db_options;
|
||||
rocksdb::Options db_options;
|
||||
db_options.merge_operator = options.merge_operator;
|
||||
db_options.compaction_filter = options.compaction_filter;
|
||||
|
||||
static auto default_cache = rocksdb::NewLRUCache(1 << 30);
|
||||
if (!options.no_block_cache && options.block_cache == nullptr) {
|
||||
options.block_cache = default_cache;
|
||||
}
|
||||
static auto default_cache = rocksdb::NewLRUCache(1 << 30);
|
||||
if (!options.no_block_cache && options.block_cache == nullptr) {
|
||||
options.block_cache = default_cache;
|
||||
}
|
||||
|
||||
rocksdb::BlockBasedTableOptions table_options;
|
||||
if (options.no_block_cache) {
|
||||
table_options.no_block_cache = true;
|
||||
} else {
|
||||
table_options.block_cache = options.block_cache;
|
||||
}
|
||||
db_options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
|
||||
rocksdb::BlockBasedTableOptions table_options;
|
||||
if (options.no_block_cache) {
|
||||
table_options.no_block_cache = true;
|
||||
} else {
|
||||
table_options.block_cache = options.block_cache;
|
||||
}
|
||||
db_options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
|
||||
|
||||
db_options.use_direct_reads = options.use_direct_reads;
|
||||
db_options.manual_wal_flush = true;
|
||||
db_options.create_if_missing = true;
|
||||
db_options.max_background_compactions = 4;
|
||||
db_options.max_background_flushes = 2;
|
||||
db_options.bytes_per_sync = 1 << 20;
|
||||
db_options.writable_file_max_buffer_size = 2 << 14;
|
||||
db_options.statistics = options.statistics;
|
||||
db_options.max_log_file_size = 100 << 20;
|
||||
db_options.keep_log_file_num = 1;
|
||||
rocksdb::OptimisticTransactionDBOptions occ_options;
|
||||
occ_options.validate_policy = rocksdb::OccValidationPolicy::kValidateSerial;
|
||||
// table_options.block_align = true;
|
||||
if (options.no_reads) {
|
||||
db_options.memtable_factory.reset(new rocksdb::VectorRepFactory());
|
||||
db_options.allow_concurrent_memtable_write = false;
|
||||
}
|
||||
|
||||
db_options.wal_recovery_mode = rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords;
|
||||
db_options.use_direct_reads = options.use_direct_reads;
|
||||
db_options.manual_wal_flush = true;
|
||||
db_options.create_if_missing = true;
|
||||
db_options.max_background_compactions = 4;
|
||||
db_options.max_background_flushes = 2;
|
||||
db_options.bytes_per_sync = 1 << 20;
|
||||
db_options.writable_file_max_buffer_size = 2 << 14;
|
||||
db_options.statistics = options.statistics;
|
||||
db_options.max_log_file_size = 100 << 20;
|
||||
db_options.keep_log_file_num = 1;
|
||||
|
||||
if (options.experimental) {
|
||||
// Place your experimental options here
|
||||
}
|
||||
|
||||
if (options.no_transactions) {
|
||||
rocksdb::DB *db{nullptr};
|
||||
TRY_STATUS(from_rocksdb(rocksdb::DB::Open(db_options, std::move(path), &db)));
|
||||
return RocksDb(std::shared_ptr<rocksdb::DB>(db), std::move(options));
|
||||
} else {
|
||||
rocksdb::OptimisticTransactionDB *db{nullptr};
|
||||
rocksdb::ColumnFamilyOptions cf_options(db_options);
|
||||
std::vector<rocksdb::ColumnFamilyDescriptor> column_families;
|
||||
column_families.push_back(rocksdb::ColumnFamilyDescriptor(rocksdb::kDefaultColumnFamilyName, cf_options));
|
||||
std::vector<rocksdb::ColumnFamilyHandle *> handles;
|
||||
rocksdb::OptimisticTransactionDBOptions occ_options;
|
||||
occ_options.validate_policy = rocksdb::OccValidationPolicy::kValidateSerial;
|
||||
TRY_STATUS(from_rocksdb(rocksdb::OptimisticTransactionDB::Open(db_options, occ_options, std::move(path),
|
||||
column_families, &handles, &db)));
|
||||
CHECK(handles.size() == 1);
|
||||
// i can delete the handle since DBImpl is always holding a reference to
|
||||
// default column family
|
||||
delete handles[0];
|
||||
return RocksDb(std::shared_ptr<rocksdb::OptimisticTransactionDB>(db), std::move(options));
|
||||
}
|
||||
return RocksDb(std::shared_ptr<rocksdb::OptimisticTransactionDB>(db), std::move(options));
|
||||
}
|
||||
|
||||
std::shared_ptr<rocksdb::Statistics> RocksDb::create_statistics() {
|
||||
return rocksdb::CreateDBStatistics();
|
||||
}
|
||||
|
||||
std::string RocksDb::statistics_to_string(const std::shared_ptr<rocksdb::Statistics> statistics) {
|
||||
std::string RocksDb::statistics_to_string(const std::shared_ptr<rocksdb::Statistics> &statistics) {
|
||||
return statistics->ToString();
|
||||
}
|
||||
|
||||
void RocksDb::reset_statistics(const std::shared_ptr<rocksdb::Statistics> statistics) {
|
||||
void RocksDb::reset_statistics(const std::shared_ptr<rocksdb::Statistics> &statistics) {
|
||||
statistics->Reset();
|
||||
}
|
||||
|
||||
|
@ -133,7 +157,9 @@ std::string RocksDb::stats() const {
|
|||
}
|
||||
|
||||
Result<RocksDb::GetStatus> RocksDb::get(Slice key, std::string &value) {
|
||||
//LOG(ERROR) << "GET";
|
||||
if (options_.no_reads) {
|
||||
return td::Status::Error("trying to read from write-only database");
|
||||
}
|
||||
rocksdb::Status status;
|
||||
if (snapshot_) {
|
||||
rocksdb::ReadOptions options;
|
||||
|
@ -162,6 +188,18 @@ Status RocksDb::set(Slice key, Slice value) {
|
|||
}
|
||||
return from_rocksdb(db_->Put({}, to_rocksdb(key), to_rocksdb(value)));
|
||||
}
|
||||
Status RocksDb::merge(Slice key, Slice value) {
|
||||
if (write_batch_) {
|
||||
return from_rocksdb(write_batch_->Merge(to_rocksdb(key), to_rocksdb(value)));
|
||||
}
|
||||
if (transaction_) {
|
||||
return from_rocksdb(transaction_->Merge(to_rocksdb(key), to_rocksdb(value)));
|
||||
}
|
||||
return from_rocksdb(db_->Merge({}, to_rocksdb(key), to_rocksdb(value)));
|
||||
}
|
||||
Status RocksDb::run_gc() {
|
||||
return from_rocksdb(db_->CompactRange({}, nullptr, nullptr));
|
||||
}
|
||||
|
||||
Status RocksDb::erase(Slice key) {
|
||||
if (write_batch_) {
|
||||
|
@ -174,7 +212,11 @@ Status RocksDb::erase(Slice key) {
|
|||
}
|
||||
|
||||
Result<size_t> RocksDb::count(Slice prefix) {
|
||||
if (options_.no_reads) {
|
||||
return td::Status::Error("trying to read from write-only database");
|
||||
}
|
||||
rocksdb::ReadOptions options;
|
||||
options.auto_prefix_mode = true;
|
||||
options.snapshot = snapshot_.get();
|
||||
std::unique_ptr<rocksdb::Iterator> iterator;
|
||||
if (snapshot_ || !transaction_) {
|
||||
|
@ -197,7 +239,11 @@ Result<size_t> RocksDb::count(Slice prefix) {
|
|||
}
|
||||
|
||||
Status RocksDb::for_each(std::function<Status(Slice, Slice)> f) {
|
||||
if (options_.no_reads) {
|
||||
return td::Status::Error("trying to read from write-only database");
|
||||
}
|
||||
rocksdb::ReadOptions options;
|
||||
options.auto_prefix_mode = true;
|
||||
options.snapshot = snapshot_.get();
|
||||
std::unique_ptr<rocksdb::Iterator> iterator;
|
||||
if (snapshot_ || !transaction_) {
|
||||
|
@ -219,7 +265,11 @@ Status RocksDb::for_each(std::function<Status(Slice, Slice)> f) {
|
|||
}
|
||||
|
||||
Status RocksDb::for_each_in_range(Slice begin, Slice end, std::function<Status(Slice, Slice)> f) {
|
||||
if (options_.no_reads) {
|
||||
return td::Status::Error("trying to read from write-only database");
|
||||
}
|
||||
rocksdb::ReadOptions options;
|
||||
options.auto_prefix_mode = true;
|
||||
options.snapshot = snapshot_.get();
|
||||
std::unique_ptr<rocksdb::Iterator> iterator;
|
||||
if (snapshot_ || !transaction_) {
|
||||
|
@ -252,9 +302,10 @@ Status RocksDb::begin_write_batch() {
|
|||
|
||||
Status RocksDb::begin_transaction() {
|
||||
CHECK(!write_batch_);
|
||||
CHECK(transaction_db_);
|
||||
rocksdb::WriteOptions options;
|
||||
options.sync = true;
|
||||
transaction_.reset(db_->BeginTransaction(options, {}));
|
||||
transaction_.reset(transaction_db_->BeginTransaction(options, {}));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -307,7 +358,11 @@ Status RocksDb::end_snapshot() {
|
|||
}
|
||||
|
||||
RocksDb::RocksDb(std::shared_ptr<rocksdb::OptimisticTransactionDB> db, RocksDbOptions options)
|
||||
: db_(std::move(db)), options_(options) {
|
||||
: transaction_db_{db}, db_(std::move(db)), options_(std::move(options)) {
|
||||
}
|
||||
|
||||
RocksDb::RocksDb(std::shared_ptr<rocksdb::DB> db, RocksDbOptions options)
|
||||
: db_(std::move(db)), options_(std::move(options)) {
|
||||
}
|
||||
|
||||
void RocksDbSnapshotStatistics::begin_snapshot(const rocksdb::Snapshot *snapshot) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue