1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

Merge pull request #1184 from birydrad/testnet

Merge actor stats command
This commit is contained in:
EmelyanenkoK 2024-09-19 16:35:34 +03:00 committed by GitHub
commit e93f1a988c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
100 changed files with 3408 additions and 360 deletions

View file

@ -151,6 +151,7 @@ set(TON_DB_SOURCE
vm/db/CellHashTable.h
vm/db/CellStorage.h
vm/db/TonDb.h
vm/db/InMemoryBagOfCellsDb.cpp
)
set(FIFT_SOURCE
@ -541,7 +542,7 @@ target_include_directories(create-state PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT
if (INTERNAL_COMPILE)
target_link_libraries(create-state PUBLIC ton_crypto fift-lib ton_block tonlib git)
else()
if (TONLIB_COMPILE)
if (TONLIB_COMPILE)
target_link_libraries(create-state PUBLIC ton_crypto fift-lib ton_block tonlib git)
else()
target_link_libraries(create-state PUBLIC ton_crypto fift-lib ton_block git)

View file

@ -320,7 +320,7 @@ ton::ValidatorSessionConfig Config::get_consensus_config() const {
c.max_block_size = r.max_block_bytes;
c.max_collated_data_size = r.max_collated_bytes;
};
auto set_v2 = [&] (auto& r) {
auto set_v2 = [&](auto& r) {
set_v1(r);
c.new_catchain_ids = r.new_catchain_ids;
};
@ -1940,7 +1940,7 @@ td::Result<SizeLimitsConfig> Config::get_size_limits_config() const {
td::Result<SizeLimitsConfig> Config::do_get_size_limits_config(td::Ref<vm::CellSlice> cs) {
SizeLimitsConfig limits;
if (cs.is_null()) {
return limits; // default values
return limits; // default values
}
auto unpack_v1 = [&](auto& rec) {
limits.max_msg_bits = rec.max_msg_bits;
@ -2299,17 +2299,14 @@ td::Result<Ref<vm::Tuple>> ConfigInfo::get_prev_blocks_info() const {
if (shard->sgn() < 0) {
shard &= ((td::make_refint(1) << 64) - 1);
}
return vm::make_tuple_ref(
td::make_refint(block_id.id.workchain),
std::move(shard),
td::make_refint(block_id.id.seqno),
td::bits_to_refint(block_id.root_hash.bits(), 256),
td::bits_to_refint(block_id.file_hash.bits(), 256));
return vm::make_tuple_ref(td::make_refint(block_id.id.workchain), std::move(shard),
td::make_refint(block_id.id.seqno), td::bits_to_refint(block_id.root_hash.bits(), 256),
td::bits_to_refint(block_id.file_hash.bits(), 256));
};
std::vector<vm::StackEntry> last_mc_blocks;
last_mc_blocks.push_back(block_id_to_tuple(block_id));
for (ton::BlockSeqno seqno = block_id.id.seqno; seqno > 0 && last_mc_blocks.size() < 16; ) {
for (ton::BlockSeqno seqno = block_id.id.seqno; seqno > 0 && last_mc_blocks.size() < 16;) {
--seqno;
ton::BlockIdExt block_id;
if (!get_old_mc_block_id(seqno, block_id)) {
@ -2323,9 +2320,8 @@ td::Result<Ref<vm::Tuple>> ConfigInfo::get_prev_blocks_info() const {
if (!get_last_key_block(last_key_block, last_key_block_lt)) {
return td::Status::Error("cannot fetch last key block");
}
return vm::make_tuple_ref(
td::make_cnt_ref<std::vector<vm::StackEntry>>(std::move(last_mc_blocks)),
block_id_to_tuple(last_key_block));
return vm::make_tuple_ref(td::make_cnt_ref<std::vector<vm::StackEntry>>(std::move(last_mc_blocks)),
block_id_to_tuple(last_key_block));
}
td::optional<PrecompiledContractsConfig::Contract> PrecompiledContractsConfig::get_contract(

View file

@ -197,6 +197,7 @@ struct McShardHash : public McShardHashI {
: blk_(blk), start_lt_(start_lt), end_lt_(end_lt) {
}
McShardHash(const McShardHash&) = default;
McShardHash& operator=(const McShardHash&) = default;
bool is_valid() const {
return blk_.is_valid();
}
@ -545,7 +546,10 @@ class Config {
};
public:
enum { needValidatorSet = 16, needSpecialSmc = 32, needWorkchainInfo = 256, needCapabilities = 512 };
static constexpr int needValidatorSet = 16;
static constexpr int needSpecialSmc = 32;
static constexpr int needWorkchainInfo = 256;
static constexpr int needCapabilities = 512;
int mode{0};
ton::BlockIdExt block_id;
@ -682,14 +686,12 @@ class Config {
class ConfigInfo : public Config, public ShardConfig {
public:
enum {
needStateRoot = 1,
needLibraries = 2,
needStateExtraRoot = 4,
needShardHashes = 8,
needAccountsRoot = 64,
needPrevBlocks = 128
};
static constexpr int needStateRoot = 1;
static constexpr int needLibraries = 2;
static constexpr int needStateExtraRoot = 4;
static constexpr int needShardHashes = 8;
static constexpr int needAccountsRoot = 64;
static constexpr int needPrevBlocks = 128;
ton::BlockSeqno vert_seqno{~0U};
int global_id_{0};
ton::UnixTime utime{0};

View file

@ -2860,22 +2860,26 @@ td::Status Transaction::check_state_limits(const SizeLimitsConfig& size_limits,
vm::CellStorageStat storage_stat;
storage_stat.limit_cells = size_limits.max_acc_state_cells;
storage_stat.limit_bits = size_limits.max_acc_state_bits;
td::Timer timer;
auto add_used_storage = [&](const td::Ref<vm::Cell>& cell) -> td::Status {
if (cell.not_null()) {
TRY_RESULT(res, storage_stat.add_used_storage(cell));
if (res.max_merkle_depth > max_allowed_merkle_depth) {
return td::Status::Error("too big merkle depth");
{
TD_PERF_COUNTER(transaction_storage_stat_a);
td::Timer timer;
auto add_used_storage = [&](const td::Ref<vm::Cell>& cell) -> td::Status {
if (cell.not_null()) {
TRY_RESULT(res, storage_stat.add_used_storage(cell));
if (res.max_merkle_depth > max_allowed_merkle_depth) {
return td::Status::Error("too big merkle depth");
}
}
return td::Status::OK();
};
TRY_STATUS(add_used_storage(new_code));
TRY_STATUS(add_used_storage(new_data));
TRY_STATUS(add_used_storage(new_library));
if (timer.elapsed() > 0.1) {
LOG(INFO) << "Compute used storage took " << timer.elapsed() << "s";
}
return td::Status::OK();
};
TRY_STATUS(add_used_storage(new_code));
TRY_STATUS(add_used_storage(new_data));
TRY_STATUS(add_used_storage(new_library));
if (timer.elapsed() > 0.1) {
LOG(INFO) << "Compute used storage took " << timer.elapsed() << "s";
}
if (acc_status == Account::acc_active) {
storage_stat.clear_limit();
} else {
@ -3156,6 +3160,7 @@ bool Transaction::compute_state() {
if (new_stats) {
stats = new_stats.unwrap();
} else {
TD_PERF_COUNTER(transaction_storage_stat_b);
td::Timer timer;
stats.add_used_storage(Ref<vm::Cell>(storage)).ensure();
if (timer.elapsed() > 0.1) {

View file

@ -2294,11 +2294,11 @@ std::string AnyIntView<Tr>::to_dec_string_destroy_any() {
stack.push_back(divmod_short_any(Tr::max_pow10));
} while (sgn());
char slice[word_bits * 97879 / 325147 + 2];
std::sprintf(slice, "%lld", stack.back());
std::snprintf(slice, sizeof(slice), "%lld", stack.back());
s += slice;
stack.pop_back();
while (stack.size()) {
std::sprintf(slice, "%018lld", stack.back());
std::snprintf(slice, sizeof(slice), "%018lld", stack.back());
s += slice;
stack.pop_back();
}

View file

@ -29,6 +29,7 @@ Ref<CntObject> CntObject::clone() const {
namespace detail {
struct SafeDeleter {
public:
thread_local static td::int64 delete_count;
void retire(const CntObject *ptr) {
if (is_active_) {
to_delete_.push_back(ptr);
@ -39,9 +40,11 @@ struct SafeDeleter {
is_active_ = false;
};
delete ptr;
delete_count++;
while (!to_delete_.empty()) {
auto *ptr = to_delete_.back();
to_delete_.pop_back();
delete_count++;
delete ptr;
}
}
@ -50,6 +53,7 @@ struct SafeDeleter {
std::vector<const CntObject *> to_delete_;
bool is_active_{false};
};
thread_local td::int64 SafeDeleter::delete_count{0};
TD_THREAD_LOCAL SafeDeleter *deleter;
void safe_delete(const CntObject *ptr) {
@ -57,4 +61,7 @@ void safe_delete(const CntObject *ptr) {
deleter->retire(ptr);
}
} // namespace detail
int64 ref_get_delete_count() {
return detail::SafeDeleter::delete_count;
}
} // namespace td

View file

@ -472,5 +472,6 @@ template <class T>
void swap(Ref<T>& r1, Ref<T>& r2) {
r1.swap(r2);
}
int64 ref_get_delete_count();
} // namespace td

View file

@ -3425,7 +3425,7 @@ void import_cmdline_args(Dictionary& d, std::string arg0, int n, const char* con
cmdline_args->set(std::move(list));
for (int i = 1; i <= n; i++) {
char buffer[14];
sprintf(buffer, "$%d ", i);
snprintf(buffer, sizeof(buffer), "$%d ", i);
d.def_stack_word(buffer, std::bind(interpret_get_fixed_cmdline_arg, _1, i));
}
}

View file

@ -81,7 +81,7 @@ bool CodeBlob::compute_used_code_vars(std::unique_ptr<Op>& ops_ptr, const VarDes
func_assert(ops_ptr->cl == Op::_Nop);
return ops_ptr->set_var_info(var_info);
}
return compute_used_code_vars(ops_ptr->next, var_info, edit) | ops_ptr->compute_used_vars(*this, edit);
return int(compute_used_code_vars(ops_ptr->next, var_info, edit)) | int(ops_ptr->compute_used_vars(*this, edit));
}
bool operator==(const VarDescrList& x, const VarDescrList& y) {
@ -584,7 +584,7 @@ bool prune_unreachable(std::unique_ptr<Op>& ops) {
ops = std::move(op.block1);
return prune_unreachable(ops);
} else {
reach = prune_unreachable(op.block0) | prune_unreachable(op.block1);
reach = int(prune_unreachable(op.block0)) | int(prune_unreachable(op.block1));
}
break;
}
@ -660,7 +660,7 @@ bool prune_unreachable(std::unique_ptr<Op>& ops) {
break;
}
case Op::_TryCatch: {
reach = prune_unreachable(op.block0) | prune_unreachable(op.block1);
reach = int(prune_unreachable(op.block0)) | int(prune_unreachable(op.block1));
break;
}
default:
@ -892,15 +892,15 @@ bool Op::mark_noreturn() {
return set_noreturn(true);
case _If:
case _TryCatch:
return set_noreturn((block0->mark_noreturn() & (block1 && block1->mark_noreturn())) | next->mark_noreturn());
return set_noreturn((int(block0->mark_noreturn()) & int(block1 && block1->mark_noreturn())) | int(next->mark_noreturn()));
case _Again:
block0->mark_noreturn();
return set_noreturn(true);
case _Until:
return set_noreturn(block0->mark_noreturn() | next->mark_noreturn());
return set_noreturn(int(block0->mark_noreturn()) | int(next->mark_noreturn()));
case _While:
block1->mark_noreturn();
return set_noreturn(block0->mark_noreturn() | next->mark_noreturn());
return set_noreturn(int(block0->mark_noreturn()) | int(next->mark_noreturn()));
case _Repeat:
block0->mark_noreturn();
return set_noreturn(next->mark_noreturn());

View file

@ -48,6 +48,7 @@ struct OpensslEVP_SHA512 {
template <typename H>
class HashCtx {
EVP_MD_CTX *base_ctx{nullptr};
EVP_MD_CTX *ctx{nullptr};
void init();
void clear();
@ -77,16 +78,20 @@ class HashCtx {
template <typename H>
void HashCtx<H>::init() {
ctx = EVP_MD_CTX_create();
base_ctx = EVP_MD_CTX_create();
EVP_DigestInit_ex(base_ctx, H::get_evp(), 0);
reset();
}
template <typename H>
void HashCtx<H>::reset() {
EVP_DigestInit_ex(ctx, H::get_evp(), 0);
EVP_MD_CTX_copy_ex(ctx, base_ctx);
}
template <typename H>
void HashCtx<H>::clear() {
EVP_MD_CTX_destroy(base_ctx);
base_ctx = nullptr;
EVP_MD_CTX_destroy(ctx);
ctx = nullptr;
}

View file

@ -17,6 +17,8 @@
Copyright 2017-2020 Telegram Systems LLP
*/
#include "crypto/Ed25519.h"
#include "ellcurve/Ed25519.h"
#include "td/utils/logging.h"
#include "td/utils/misc.h"
#include "td/utils/Slice.h"
@ -24,6 +26,8 @@
#include "td/utils/JsonBuilder.h"
#include "wycheproof.h"
#include "keys/keys.hpp"
#include "td/utils/benchmark.h"
#include <string>
#include <utility>
@ -217,3 +221,36 @@ TEST(Crypto, almost_zero) {
}
}
}
BENCH(ed25519_sign, "ed25519_sign") {
auto private_key = td::Ed25519::generate_private_key().move_as_ok();
std::string hash_to_sign(32, 'a');
for (int i = 0; i < n; i++) {
private_key.sign(hash_to_sign).ensure();
}
}
BENCH(ed25519_shared_secret, "ed25519_shared_secret") {
auto private_key_a = td::Ed25519::generate_private_key().move_as_ok();
auto private_key_b = td::Ed25519::generate_private_key().move_as_ok();
auto public_key_b = private_key_a.get_public_key().move_as_ok();
for (int i = 0; i < n; i++) {
td::Ed25519::compute_shared_secret(public_key_b, private_key_a).ensure();
}
}
BENCH(ed25519_verify, "ed25519_verify") {
auto private_key = td::Ed25519::generate_private_key().move_as_ok();
std::string hash_to_sign(32, 'a');
auto public_key = private_key.get_public_key().move_as_ok();
auto signature = private_key.sign(hash_to_sign).move_as_ok();
for (int i = 0; i < n; i++) {
public_key.verify_signature(hash_to_sign, signature).ensure();
}
}
TEST(Crypto, ed25519_benchmark) {
bench(ed25519_signBench());
bench(ed25519_shared_secretBench());
bench(ed25519_verifyBench());
}

View file

@ -54,10 +54,15 @@
#include <set>
#include <map>
#include <thread>
#include <openssl/sha.h>
#include "openssl/digest.hpp"
#include "vm/dict.h"
#include <numeric>
#include <optional>
namespace vm {
@ -82,9 +87,23 @@ int get_random_serialization_mode(T &rnd) {
return modes[rnd.fast(0, (int)modes.size() - 1)];
}
class BenchSha256 : public td::Benchmark {
class BenchSha : public td::Benchmark {
public:
explicit BenchSha(size_t n) : str_(n, 'a') {
}
std::string get_description() const override {
return PSTRING() << get_name() << " length=" << str_.size();
}
virtual std::string get_name() const = 0;
protected:
std::string str_;
};
class BenchSha256 : public BenchSha {
public:
using BenchSha::BenchSha;
std::string get_name() const override {
return "SHA256";
}
@ -92,7 +111,7 @@ class BenchSha256 : public td::Benchmark {
int res = 0;
for (int i = 0; i < n; i++) {
digest::SHA256 hasher;
hasher.feed("abcd", 4);
hasher.feed(str_);
unsigned char buf[32];
hasher.extract(buf);
res += buf[0];
@ -100,10 +119,12 @@ class BenchSha256 : public td::Benchmark {
td::do_not_optimize_away(res);
}
};
class BenchSha256Reuse : public td::Benchmark {
class BenchSha256Reuse : public BenchSha {
public:
std::string get_description() const override {
return "SHA256 reuse";
using BenchSha::BenchSha;
std::string get_name() const override {
return "SHA256 reuse (used in DataCell)";
}
void run(int n) override {
@ -111,7 +132,7 @@ class BenchSha256Reuse : public td::Benchmark {
digest::SHA256 hasher;
for (int i = 0; i < n; i++) {
hasher.reset();
hasher.feed("abcd", 4);
hasher.feed(str_);
unsigned char buf[32];
hasher.extract(buf);
res += buf[0];
@ -119,28 +140,46 @@ class BenchSha256Reuse : public td::Benchmark {
td::do_not_optimize_away(res);
}
};
class BenchSha256Low : public td::Benchmark {
class BenchSha256Low : public BenchSha {
public:
std::string get_description() const override {
using BenchSha::BenchSha;
std::string get_name() const override {
return "SHA256 low level";
}
// Use the old method to check for performance degradation
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#elif defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4996) // Disable deprecated warning for MSVC
#endif
void run(int n) override {
int res = 0;
td::Sha256State ctx;
SHA256_CTX ctx;
for (int i = 0; i < n; i++) {
ctx.init();
ctx.feed("abcd");
SHA256_Init(&ctx);
SHA256_Update(&ctx, str_.data(), str_.size());
unsigned char buf[32];
ctx.extract(td::MutableSlice{buf, 32});
SHA256_Final(buf, &ctx);
res += buf[0];
}
td::do_not_optimize_away(res);
}
};
class BenchSha256Tdlib : public td::Benchmark {
#if defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#elif defined(_MSC_VER)
#pragma warning(pop)
#endif
class BenchSha256Tdlib : public BenchSha {
public:
std::string get_description() const override {
using BenchSha::BenchSha;
std::string get_name() const override {
return "SHA256 TDLib";
}
@ -150,7 +189,7 @@ class BenchSha256Tdlib : public td::Benchmark {
for (int i = 0; i < n; i++) {
td::init_thread_local<td::Sha256State>(ctx);
ctx->init();
ctx->feed("abcd");
ctx->feed(str_);
unsigned char buf[32];
ctx->extract(td::MutableSlice(buf, 32), false);
res += buf[0];
@ -158,11 +197,61 @@ class BenchSha256Tdlib : public td::Benchmark {
td::do_not_optimize_away(res);
}
};
template <class F>
void bench_threaded(F &&f) {
class Threaded : public td::Benchmark {
public:
explicit Threaded(F &&f) : f_(std::move(f)), base(f_()) {
}
F f_;
std::decay_t<decltype(f_())> base;
std::string get_description() const override {
return base.get_description() + " threaded";
}
void run(int n) override {
std::atomic<int> task_i{0};
int chunk_size = 1024;
int num_threads = 16;
n *= num_threads;
std::vector<td::thread> threads;
for (int i = 0; i < num_threads; i++) {
threads.emplace_back([&]() mutable {
auto bench = f_();
while (true) {
i = task_i.fetch_add(chunk_size, std::memory_order_relaxed);
auto i_end = std::min(n, i + chunk_size);
if (i > n) {
break;
}
bench.run(i_end - i);
}
});
}
for (auto &thread : threads) {
thread.join();
}
};
};
bench(Threaded(std::forward<F>(f)));
}
TEST(Cell, sha_benchmark) {
bench(BenchSha256Tdlib());
bench(BenchSha256Low());
bench(BenchSha256Reuse());
bench(BenchSha256());
for (size_t n : {4, 64, 128}) {
bench(BenchSha256Tdlib(n));
bench(BenchSha256Low(n));
bench(BenchSha256Reuse(n));
bench(BenchSha256(n));
}
}
TEST(Cell, sha_benchmark_threaded) {
for (size_t n : {4, 64, 128}) {
bench_threaded([n] { return BenchSha256Tdlib(n); });
bench_threaded([n]() { return BenchSha256Low(n); });
bench_threaded([n]() { return BenchSha256Reuse(n); });
bench_threaded([n]() { return BenchSha256(n); });
}
}
std::string serialize_boc(Ref<Cell> cell, int mode = 31) {
@ -762,16 +851,70 @@ TEST(TonDb, BocMultipleRoots) {
}
};
TEST(TonDb, DynamicBoc) {
TEST(TonDb, InMemoryDynamicBocSimple) {
auto counter = [] { return td::NamedThreadSafeCounter::get_default().get_counter("DataCell").sum(); };
auto before = counter();
SCOPE_EXIT {
LOG_CHECK(before == counter()) << before << " vs " << counter();
;
};
td::Random::Xorshift128plus rnd{123};
auto kv = std::make_shared<td::MemoryKeyValue>();
CellStorer storer(*kv);
auto boc = DynamicBagOfCellsDb::create_in_memory(kv.get(), {});
auto empty_cell = vm::CellBuilder().finalize();
boc->inc(empty_cell);
boc->prepare_commit().ensure();
boc->commit(storer).ensure();
auto got_empty_cell = boc->load_cell(empty_cell->get_hash().as_slice()).move_as_ok();
ASSERT_EQ(empty_cell->get_hash(), got_empty_cell->get_hash());
boc->dec(empty_cell);
auto one_ref_cell = vm::CellBuilder().store_ref(empty_cell).finalize();
boc->inc(one_ref_cell);
boc->prepare_commit().ensure();
boc->commit(storer).ensure();
auto got_one_ref_cell = boc->load_cell(one_ref_cell->get_hash().as_slice()).move_as_ok();
ASSERT_EQ(one_ref_cell->get_hash(), got_one_ref_cell->get_hash());
boc = DynamicBagOfCellsDb::create_in_memory(kv.get(), {});
auto random_ref_cell = gen_random_cell(3, rnd);
boc->inc(random_ref_cell);
boc->prepare_commit().ensure();
boc->commit(storer).ensure();
auto got_random_ref_cell = boc->load_cell(random_ref_cell->get_hash().as_slice()).move_as_ok();
ASSERT_EQ(random_ref_cell->get_hash(), got_random_ref_cell->get_hash());
boc = DynamicBagOfCellsDb::create_in_memory(kv.get(), {});
}
void test_dynamic_boc(std::optional<DynamicBagOfCellsDb::CreateInMemoryOptions> o_in_memory) {
auto counter = [] { return td::NamedThreadSafeCounter::get_default().get_counter("DataCell").sum(); };
auto before = counter();
SCOPE_EXIT {
LOG_CHECK((o_in_memory && o_in_memory->use_arena) || before == counter()) << before << " vs " << counter();
;
};
td::Random::Xorshift128plus rnd{123};
std::string old_root_hash;
std::string old_root_serialization;
auto kv = std::make_shared<td::MemoryKeyValue>();
auto dboc = DynamicBagOfCellsDb::create();
auto create_dboc = [&]() {
if (o_in_memory) {
auto res = DynamicBagOfCellsDb::create_in_memory(kv.get(), *o_in_memory);
auto roots_n = old_root_hash.empty() ? 0 : 1;
ASSERT_EQ(roots_n, res->get_stats().ok().roots_total_count);
return res;
}
return DynamicBagOfCellsDb::create();
};
auto dboc = create_dboc();
dboc->set_loader(std::make_unique<CellLoader>(kv));
for (int t = 1000; t >= 0; t--) {
if (rnd() % 10 == 0) {
dboc = DynamicBagOfCellsDb::create();
dboc = create_dboc();
}
dboc->set_loader(std::make_unique<CellLoader>(kv));
Ref<Cell> old_root;
@ -795,29 +938,64 @@ TEST(TonDb, DynamicBoc) {
if (t != 0) {
dboc->inc(cell);
}
dboc->prepare_commit();
dboc->prepare_commit().ensure();
{
CellStorer cell_storer(*kv);
dboc->commit(cell_storer);
dboc->commit(cell_storer).ensure();
}
}
ASSERT_EQ(0u, kv->count("").ok());
}
template <class F>
void with_all_boc_options(F &&f) {
LOG(INFO) << "Test dynamic boc";
LOG(INFO) << "\ton disk";
f({});
for (auto use_arena : {false, true}) {
for (auto less_memory : {false, true}) {
LOG(INFO) << "\tuse_arena=" << use_arena << " less_memory=" << less_memory;
f(DynamicBagOfCellsDb::CreateInMemoryOptions{.extra_threads = std::thread::hardware_concurrency(),
.verbose = false,
.use_arena = use_arena,
.use_less_memory_during_creation = less_memory});
}
}
}
TEST(TonDb, DynamicBoc) {
with_all_boc_options(test_dynamic_boc);
};
TEST(TonDb, DynamicBoc2) {
void test_dynamic_boc2(std::optional<DynamicBagOfCellsDb::CreateInMemoryOptions> o_in_memory) {
int VERBOSITY_NAME(boc) = VERBOSITY_NAME(DEBUG) + 10;
td::Random::Xorshift128plus rnd{123};
int total_roots = 10000;
int max_roots = 20;
std::vector<std::string> root_hashes(max_roots);
std::vector<Ref<Cell>> roots(max_roots);
int last_commit_at = 0;
int first_root_id = 0;
int last_root_id = 0;
auto kv = std::make_shared<td::MemoryKeyValue>();
auto dboc = DynamicBagOfCellsDb::create();
auto create_dboc = [&](td::int64 root_n) {
if (o_in_memory) {
auto res = DynamicBagOfCellsDb::create_in_memory(kv.get(), *o_in_memory);
auto stats = res->get_stats().move_as_ok();
ASSERT_EQ(root_n, stats.roots_total_count);
VLOG(boc) << "reset roots_n=" << stats.roots_total_count << " cells_n=" << stats.cells_total_count;
return res;
}
return DynamicBagOfCellsDb::create();
};
auto dboc = create_dboc(0);
dboc->set_loader(std::make_unique<CellLoader>(kv));
auto counter = [] { return td::NamedThreadSafeCounter::get_default().get_counter("DataCell").sum(); };
auto before = counter();
SCOPE_EXIT {
LOG_CHECK((o_in_memory && o_in_memory->use_arena) || before == counter()) << before << " vs " << counter();
};
std::vector<Ref<Cell>> roots(max_roots);
std::vector<std::string> root_hashes(max_roots);
auto add_root = [&](Ref<Cell> root) {
dboc->inc(root);
root_hashes[last_root_id % max_roots] = (root->get_hash().as_slice().str());
@ -825,18 +1003,23 @@ TEST(TonDb, DynamicBoc2) {
last_root_id++;
};
auto get_root = [&](int root_id) {
auto get_root = [&](int root_id) -> Ref<Cell> {
VLOG(boc) << " from older root #" << root_id;
auto from_root = roots[root_id % max_roots];
if (from_root.is_null()) {
VLOG(boc) << " from db";
auto from_root_hash = root_hashes[root_id % max_roots];
from_root = dboc->load_cell(from_root_hash).move_as_ok();
if (o_in_memory && (rnd() % 2 == 0)) {
from_root = dboc->load_root(from_root_hash).move_as_ok();
} else {
from_root = dboc->load_cell(from_root_hash).move_as_ok();
}
} else {
VLOG(boc) << "FROM MEMORY";
}
return from_root;
};
std::map<CellHash, int> root_cnt;
auto new_root = [&] {
if (last_root_id == total_roots) {
return;
@ -850,7 +1033,9 @@ TEST(TonDb, DynamicBoc2) {
from_root = get_root(rnd.fast(first_root_id, last_root_id - 1));
}
VLOG(boc) << " ...";
add_root(gen_random_cell(rnd.fast(1, 20), from_root, rnd));
auto new_root = gen_random_cell(rnd.fast(1, 20), from_root, rnd);
root_cnt[new_root->get_hash()]++;
add_root(std::move(new_root));
VLOG(boc) << " OK";
};
@ -870,7 +1055,7 @@ TEST(TonDb, DynamicBoc2) {
auto reset = [&] {
VLOG(boc) << "reset";
commit();
dboc = DynamicBagOfCellsDb::create();
dboc = create_dboc(td::int64(root_cnt.size()));
dboc->set_loader(std::make_unique<CellLoader>(kv));
};
@ -879,7 +1064,15 @@ TEST(TonDb, DynamicBoc2) {
if (first_root_id == last_root_id) {
return;
}
dboc->dec(get_root(first_root_id));
auto old_root = get_root(first_root_id);
auto it = root_cnt.find(old_root->get_hash());
it->second--;
CHECK(it->second >= 0);
if (it->second == 0) {
root_cnt.erase(it);
}
dboc->dec(std::move(old_root));
first_root_id++;
VLOG(boc) << " OK";
};
@ -893,6 +1086,10 @@ TEST(TonDb, DynamicBoc2) {
ASSERT_EQ(0u, kv->count("").ok());
}
TEST(TonDb, DynamicBoc2) {
with_all_boc_options(test_dynamic_boc2);
}
template <class BocDeserializerT>
td::Status test_boc_deserializer(std::vector<Ref<Cell>> cells, int mode) {
auto total_data_cells_before = vm::DataCell::get_total_data_cells();
@ -1848,7 +2045,7 @@ TEST(TonDb, CompactArrayOld) {
SCOPE_EXIT {
ton_db->commit_transaction(std::move(txn));
};
auto smart = txn->begin_smartcontract("");
auto smart = txn->begin_smartcontract();
SCOPE_EXIT {
txn->commit_smartcontract(std::move(smart));
};
@ -1875,7 +2072,7 @@ TEST(TonDb, CompactArrayOld) {
SCOPE_EXIT {
ton_db->commit_transaction(std::move(txn));
};
auto smart = txn->begin_smartcontract("");
auto smart = txn->begin_smartcontract();
//smart->validate_meta();
SCOPE_EXIT {
txn->commit_smartcontract(std::move(smart));
@ -1896,7 +2093,7 @@ TEST(TonDb, CompactArrayOld) {
SCOPE_EXIT {
ton_db->abort_transaction(std::move(txn));
};
auto smart = txn->begin_smartcontract("");
auto smart = txn->begin_smartcontract();
SCOPE_EXIT {
txn->abort_smartcontract(std::move(smart));
};
@ -1950,14 +2147,15 @@ TEST(TonDb, BocRespectsUsageCell) {
ASSERT_STREQ(serialization, serialization_of_virtualized_cell);
}
TEST(TonDb, DynamicBocRespectsUsageCell) {
void test_dynamic_boc_respectes_usage_cell(std::optional<vm::DynamicBagOfCellsDb::CreateInMemoryOptions> o_in_memory) {
td::Random::Xorshift128plus rnd(123);
auto cell = vm::gen_random_cell(20, rnd, true);
auto usage_tree = std::make_shared<vm::CellUsageTree>();
auto usage_cell = vm::UsageCell::create(cell, usage_tree->root_ptr());
auto kv = std::make_shared<td::MemoryKeyValue>();
auto dboc = vm::DynamicBagOfCellsDb::create();
auto dboc = o_in_memory ? vm::DynamicBagOfCellsDb::create_in_memory(kv.get(), *o_in_memory)
: vm::DynamicBagOfCellsDb::create();
dboc->set_loader(std::make_unique<vm::CellLoader>(kv));
dboc->inc(usage_cell);
{
@ -1972,6 +2170,42 @@ TEST(TonDb, DynamicBocRespectsUsageCell) {
ASSERT_STREQ(serialization, serialization_of_virtualized_cell);
}
TEST(TonDb, DynamicBocRespectsUsageCell) {
vm::with_all_boc_options(test_dynamic_boc_respectes_usage_cell);
}
TEST(TonDb, LargeBocSerializer) {
td::Random::Xorshift128plus rnd{123};
size_t n = 1000000;
std::vector<td::uint64> data(n);
std::iota(data.begin(), data.end(), 0);
vm::CompactArray arr(data);
auto root = arr.root();
std::string path = "serialization";
td::unlink(path).ignore();
auto fd = td::FileFd::open(path, td::FileFd::Flags::Create | td::FileFd::Flags::Truncate | td::FileFd::Flags::Write)
.move_as_ok();
std_boc_serialize_to_file(root, fd, 31);
fd.close();
auto a = td::read_file_str(path).move_as_ok();
auto kv = std::make_shared<td::MemoryKeyValue>();
auto dboc = vm::DynamicBagOfCellsDb::create();
dboc->set_loader(std::make_unique<vm::CellLoader>(kv));
dboc->inc(root);
dboc->prepare_commit();
vm::CellStorer cell_storer(*kv);
dboc->commit(cell_storer);
dboc->set_loader(std::make_unique<vm::CellLoader>(kv));
td::unlink(path).ignore();
fd = td::FileFd::open(path, td::FileFd::Flags::Create | td::FileFd::Flags::Truncate | td::FileFd::Flags::Write)
.move_as_ok();
std_boc_serialize_to_file_large(dboc->get_cell_db_reader(), root->get_hash(), fd, 31);
fd.close();
auto b = td::read_file_str(path).move_as_ok();
CHECK(a == b);
}
TEST(TonDb, DoNotMakeListsPrunned) {
auto cell = vm::CellBuilder().store_bytes("abc").finalize();
auto is_prunned = [&](const td::Ref<vm::Cell> &cell) { return true; };
@ -2020,7 +2254,7 @@ TEST(TonDb, CellStat) {
ASSERT_EQ(stat.cells, new_stat.get_stat().cells);
ASSERT_EQ(stat.bits, new_stat.get_stat().bits);
CHECK(usage_tree.unique());
CHECK(usage_tree.use_count() == 1);
usage_tree.reset();
td::Ref<vm::Cell> C, BC, C_proof;
std::shared_ptr<vm::CellUsageTree> usage_tree_B;
@ -2057,7 +2291,6 @@ TEST(Ref, AtomicRef) {
int threads_n = 10;
std::vector<Node> nodes(threads_n);
std::vector<td::thread> threads(threads_n);
int thread_id = 0;
for (auto &thread : threads) {
thread = td::thread([&] {
for (int i = 0; i < 1000000; i++) {
@ -2072,7 +2305,6 @@ TEST(Ref, AtomicRef) {
}
}
});
thread_id++;
}
for (auto &thread : threads) {
thread.join();

View file

@ -1316,7 +1316,7 @@ void CppTypeCode::clear_context() {
std::string CppTypeCode::new_tmp_var() {
char buffer[16];
while (true) {
sprintf(buffer, "t%d", ++tmp_ints);
snprintf(buffer, sizeof(buffer), "t%d", ++tmp_ints);
if (tmp_cpp_ids.is_good_ident(buffer) && local_cpp_ids.is_good_ident(buffer)) {
break;
}

View file

@ -420,7 +420,7 @@ void AdmissibilityInfo::operator|=(const AdmissibilityInfo& other) {
std::size_t i, j, n = info.size(), n1 = other.info.size();
assert(n1 && !(n1 & (n1 - 1)));
for (i = j = 0; i < n; i++) {
info[i] = info[i] | other.info[j];
info[i] = info[i] || other.info[j];
j = (j + 1) & (n1 - 1);
}
}
@ -2511,7 +2511,7 @@ void define_builtins() {
Bits_type = define_builtin_type("bits", "#", false, 1023, 0, true, 0);
for (int i = 1; i <= 257; i++) {
char buff[8];
sprintf(buff, "uint%d", i);
snprintf(buff, sizeof(buff), "uint%d", i);
define_builtin_type(buff + 1, "", false, i, i, true, -1);
if (i < 257) {
define_builtin_type(buff, "", false, i, i, true, 1);
@ -2519,7 +2519,7 @@ void define_builtins() {
}
for (int i = 1; i <= 1023; i++) {
char buff[12];
sprintf(buff, "bits%d", i);
snprintf(buff, sizeof(buff), "bits%d", i);
define_builtin_type(buff, "", false, i, i, true, 0);
}
Eq_type = define_builtin_type("=", "##", false, 0, 0, true);

View file

@ -35,7 +35,7 @@ void Atom::print_to(std::ostream& os) const {
std::string Atom::make_name() const {
char buffer[16];
sprintf(buffer, "atom#%d", index_);
snprintf(buffer, sizeof(buffer), "atom#%d", index_);
return buffer;
}

View file

@ -183,6 +183,9 @@ int BagOfCells::add_root(td::Ref<vm::Cell> add_root) {
// Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp
td::Status BagOfCells::import_cells() {
if (logger_ptr_) {
logger_ptr_->start_stage("import_cells");
}
cells_clear();
for (auto& root : roots) {
auto res = import_cell(root.cell, 0);
@ -196,6 +199,9 @@ td::Status BagOfCells::import_cells() {
//LOG(INFO) << "[cells: " << cell_count << ", refs: " << int_refs << ", bytes: " << data_bytes
//<< ", internal hashes: " << int_hashes << ", top hashes: " << top_hashes << "]";
CHECK(cell_count != 0);
if (logger_ptr_) {
logger_ptr_->finish_stage(PSLICE() << cell_count << " cells");
}
return td::Status::OK();
}
@ -207,6 +213,9 @@ td::Result<int> BagOfCells::import_cell(td::Ref<vm::Cell> cell, int depth) {
if (cell.is_null()) {
return td::Status::Error("error while importing a cell into a bag of cells: cell is null");
}
if (logger_ptr_) {
TRY_STATUS(logger_ptr_->on_cell_processed());
}
auto it = cells.find(cell->get_hash());
if (it != cells.end()) {
auto pos = it->second;
@ -436,17 +445,19 @@ std::size_t BagOfCells::estimate_serialized_size(int mode) {
return res.ok();
}
BagOfCells& BagOfCells::serialize(int mode) {
td::Status BagOfCells::serialize(int mode) {
std::size_t size_est = estimate_serialized_size(mode);
if (!size_est) {
serialized.clear();
return *this;
return td::Status::OK();
}
serialized.resize(size_est);
if (serialize_to(const_cast<unsigned char*>(serialized.data()), serialized.size(), mode) != size_est) {
TRY_RESULT(size, serialize_to(const_cast<unsigned char*>(serialized.data()), serialized.size(), mode));
if (size != size_est) {
serialized.clear();
return td::Status::Error("serialization failed");
}
return *this;
return td::Status::OK();
}
std::string BagOfCells::serialize_to_string(int mode) {
@ -456,8 +467,8 @@ std::string BagOfCells::serialize_to_string(int mode) {
}
std::string res;
res.resize(size_est, 0);
if (serialize_to(const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(res.data())), res.size(), mode) ==
res.size()) {
if (serialize_to(const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(res.data())), res.size(), mode)
.move_as_ok() == res.size()) {
return res;
} else {
return {};
@ -470,8 +481,9 @@ td::Result<td::BufferSlice> BagOfCells::serialize_to_slice(int mode) {
return td::Status::Error("no cells to serialize to this bag of cells");
}
td::BufferSlice res(size_est);
if (serialize_to(const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(res.data())), res.size(), mode) ==
res.size()) {
TRY_RESULT(size, serialize_to(const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(res.data())),
res.size(), mode));
if (size == res.size()) {
return std::move(res);
} else {
return td::Status::Error("error while serializing a bag of cells: actual serialized size differs from estimated");
@ -494,14 +506,10 @@ std::string BagOfCells::extract_string() const {
// cell_data:(tot_cells_size * [ uint8 ])
// = BagOfCells;
// Changes in this function may require corresponding changes in crypto/vm/large-boc-serializer.cpp
template<typename WriterT>
std::size_t BagOfCells::serialize_to_impl(WriterT& writer, int mode) {
auto store_ref = [&](unsigned long long value) {
writer.store_uint(value, info.ref_byte_size);
};
auto store_offset = [&](unsigned long long value) {
writer.store_uint(value, info.offset_byte_size);
};
template <typename WriterT>
td::Result<std::size_t> BagOfCells::serialize_to_impl(WriterT& writer, int mode) {
auto store_ref = [&](unsigned long long value) { writer.store_uint(value, info.ref_byte_size); };
auto store_offset = [&](unsigned long long value) { writer.store_uint(value, info.offset_byte_size); };
writer.store_uint(info.magic, 4);
@ -536,6 +544,9 @@ std::size_t BagOfCells::serialize_to_impl(WriterT& writer, int mode) {
DCHECK((unsigned)cell_count == cell_list_.size());
if (info.has_index) {
std::size_t offs = 0;
if (logger_ptr_) {
logger_ptr_->start_stage("generate_index");
}
for (int i = cell_count - 1; i >= 0; --i) {
const Ref<DataCell>& dc = cell_list_[i].dc_ref;
bool with_hash = (mode & Mode::WithIntHashes) && !cell_list_[i].wt;
@ -548,11 +559,20 @@ std::size_t BagOfCells::serialize_to_impl(WriterT& writer, int mode) {
fixed_offset = offs * 2 + cell_list_[i].should_cache;
}
store_offset(fixed_offset);
if (logger_ptr_) {
TRY_STATUS(logger_ptr_->on_cell_processed());
}
}
if (logger_ptr_) {
logger_ptr_->finish_stage("");
}
DCHECK(offs == info.data_size);
}
DCHECK(writer.position() == info.data_offset);
size_t keep_position = writer.position();
if (logger_ptr_) {
logger_ptr_->start_stage("serialize");
}
for (int i = 0; i < cell_count; ++i) {
const auto& dc_info = cell_list_[cell_count - 1 - i];
const Ref<DataCell>& dc = dc_info.dc_ref;
@ -572,6 +592,9 @@ std::size_t BagOfCells::serialize_to_impl(WriterT& writer, int mode) {
// std::cerr << ' ' << k;
}
// std::cerr << std::endl;
if (logger_ptr_) {
TRY_STATUS(logger_ptr_->on_cell_processed());
}
}
writer.chk();
DCHECK(writer.position() - keep_position == info.data_size);
@ -580,11 +603,14 @@ std::size_t BagOfCells::serialize_to_impl(WriterT& writer, int mode) {
unsigned crc = writer.get_crc32();
writer.store_uint(td::bswap32(crc), 4);
}
if (logger_ptr_) {
logger_ptr_->finish_stage(PSLICE() << cell_count << " cells, " << writer.position() << " bytes");
}
DCHECK(writer.empty());
return writer.position();
}
std::size_t BagOfCells::serialize_to(unsigned char* buffer, std::size_t buff_size, int mode) {
td::Result<std::size_t> BagOfCells::serialize_to(unsigned char* buffer, std::size_t buff_size, int mode) {
std::size_t size_est = estimate_serialized_size(mode);
if (!size_est || size_est > buff_size) {
return 0;
@ -599,7 +625,7 @@ td::Status BagOfCells::serialize_to_file(td::FileFd& fd, int mode) {
return td::Status::Error("no cells to serialize to this bag of cells");
}
boc_writers::FileWriter writer{fd, size_est};
size_t s = serialize_to_impl(writer, mode);
TRY_RESULT(s, serialize_to_impl(writer, mode));
TRY_STATUS(writer.finalize());
if (s != size_est) {
return td::Status::Error("error while serializing a bag of cells: actual serialized size differs from estimated");
@ -1001,6 +1027,21 @@ td::Result<td::BufferSlice> std_boc_serialize_multi(std::vector<Ref<Cell>> roots
}
return boc.serialize_to_slice(mode);
}
td::Status std_boc_serialize_to_file(Ref<Cell> root, td::FileFd& fd, int mode,
td::CancellationToken cancellation_token) {
if (root.is_null()) {
return td::Status::Error("cannot serialize a null cell reference into a bag of cells");
}
td::Timer timer;
BagOfCellsLogger logger(std::move(cancellation_token));
BagOfCells boc;
boc.set_logger(&logger);
boc.add_root(std::move(root));
TRY_STATUS(boc.import_cells());
TRY_STATUS(boc.serialize_to_file(fd, mode));
LOG(ERROR) << "serialization took " << timer.elapsed() << "s";
return td::Status::OK();
}
/*
*

View file

@ -27,6 +27,8 @@
#include "td/utils/buffer.h"
#include "td/utils/HashMap.h"
#include "td/utils/HashSet.h"
#include "td/utils/Time.h"
#include "td/utils/Timer.h"
#include "td/utils/port/FileFd.h"
namespace vm {
@ -199,6 +201,43 @@ struct CellSerializationInfo {
td::Result<Ref<DataCell>> create_data_cell(td::Slice data, td::Span<Ref<Cell>> refs) const;
};
class BagOfCellsLogger {
public:
BagOfCellsLogger() = default;
explicit BagOfCellsLogger(td::CancellationToken cancellation_token)
: cancellation_token_(std::move(cancellation_token)) {
}
void start_stage(std::string stage) {
log_speed_at_ = td::Timestamp::in(LOG_SPEED_PERIOD);
processed_cells_ = 0;
timer_ = {};
stage_ = std::move(stage);
}
void finish_stage(td::Slice desc) {
LOG(ERROR) << "serializer: " << stage_ << " took " << timer_.elapsed() << "s, " << desc;
}
td::Status on_cell_processed() {
++processed_cells_;
if (processed_cells_ % 1000 == 0) {
TRY_STATUS(cancellation_token_.check());
}
if (log_speed_at_.is_in_past()) {
log_speed_at_ += LOG_SPEED_PERIOD;
LOG(WARNING) << "serializer: " << stage_ << " " << (double)processed_cells_ / LOG_SPEED_PERIOD << " cells/s";
processed_cells_ = 0;
}
return td::Status::OK();
}
private:
std::string stage_;
td::Timer timer_;
td::CancellationToken cancellation_token_;
td::Timestamp log_speed_at_;
size_t processed_cells_ = 0;
static constexpr double LOG_SPEED_PERIOD = 120.0;
};
class BagOfCells {
public:
enum { hash_bytes = vm::Cell::hash_bytes, default_max_roots = 16384 };
@ -283,6 +322,7 @@ class BagOfCells {
const unsigned char* index_ptr{nullptr};
const unsigned char* data_ptr{nullptr};
std::vector<unsigned long long> custom_index;
BagOfCellsLogger* logger_ptr_{nullptr};
public:
void clear();
@ -292,14 +332,17 @@ class BagOfCells {
int add_root(td::Ref<vm::Cell> add_root);
td::Status import_cells() TD_WARN_UNUSED_RESULT;
BagOfCells() = default;
void set_logger(BagOfCellsLogger* logger_ptr) {
logger_ptr_ = logger_ptr;
}
std::size_t estimate_serialized_size(int mode = 0);
BagOfCells& serialize(int mode = 0);
std::string serialize_to_string(int mode = 0);
td::Status serialize(int mode = 0);
td::string serialize_to_string(int mode = 0);
td::Result<td::BufferSlice> serialize_to_slice(int mode = 0);
std::size_t serialize_to(unsigned char* buffer, std::size_t buff_size, int mode = 0);
td::Result<std::size_t> serialize_to(unsigned char* buffer, std::size_t buff_size, int mode = 0);
td::Status serialize_to_file(td::FileFd& fd, int mode = 0);
template<typename WriterT>
std::size_t serialize_to_impl(WriterT& writer, int mode = 0);
template <typename WriterT>
td::Result<std::size_t> serialize_to_impl(WriterT& writer, int mode = 0);
std::string extract_string() const;
td::Result<long long> deserialize(const td::Slice& data, int max_roots = default_max_roots);
@ -345,6 +388,8 @@ td::Result<std::vector<Ref<Cell>>> std_boc_deserialize_multi(td::Slice data,
int max_roots = BagOfCells::default_max_roots);
td::Result<td::BufferSlice> std_boc_serialize_multi(std::vector<Ref<Cell>> root, int mode = 0);
td::Status std_boc_serialize_to_file(Ref<Cell> root, td::FileFd& fd, int mode = 0,
td::CancellationToken cancellation_token = {});
td::Status std_boc_serialize_to_file_large(std::shared_ptr<CellDbReader> reader, Cell::Hash root_hash, td::FileFd& fd,
int mode = 0, td::CancellationToken cancellation_token = {});

View file

@ -19,6 +19,7 @@
#pragma once
#include "common/refcnt.hpp"
#include "common/bitstring.h"
#include "td/utils/HashSet.h"
#include "vm/cells/CellHash.h"
#include "vm/cells/CellTraits.h"
@ -86,4 +87,31 @@ class Cell : public CellTraits {
};
std::ostream& operator<<(std::ostream& os, const Cell& c);
using is_transparent = void; // Pred to use
inline vm::CellHash as_cell_hash(const Ref<Cell>& cell) {
return cell->get_hash();
}
inline vm::CellHash as_cell_hash(td::Slice hash) {
return vm::CellHash::from_slice(hash);
}
inline vm::CellHash as_cell_hash(vm::CellHash hash) {
return hash;
}
struct CellEqF {
using is_transparent = void; // Pred to use
template <class A, class B>
bool operator()(const A& a, const B& b) const {
return as_cell_hash(a) == as_cell_hash(b);
}
};
struct CellHashF {
using is_transparent = void; // Pred to use
using transparent_key_equal = CellEqF;
template <class T>
size_t operator()(const T& value) const {
return cell_hash_slice_hash(as_cell_hash(value).as_slice());
}
};
using CellHashSet = td::HashSet<td::Ref<Cell>, CellHashF, CellEqF>;
} // namespace vm

View file

@ -617,7 +617,7 @@ std::string CellBuilder::to_hex() const {
int len = serialize(buff, sizeof(buff));
char hex_buff[Cell::max_serialized_bytes * 2 + 1];
for (int i = 0; i < len; i++) {
sprintf(hex_buff + 2 * i, "%02x", buff[i]);
snprintf(hex_buff + 2 * i, sizeof(hex_buff) - 2 * i, "%02x", buff[i]);
}
return hex_buff;
}

View file

@ -74,13 +74,17 @@ struct CellHash {
};
} // namespace vm
inline size_t cell_hash_slice_hash(td::Slice hash) {
// use offset 8, because in db keys are grouped by first bytes.
return td::as<size_t>(hash.substr(8, 8).ubegin());
}
namespace std {
template <>
struct hash<vm::CellHash> {
typedef vm::CellHash argument_type;
typedef std::size_t result_type;
result_type operator()(argument_type const& s) const noexcept {
return td::as<size_t>(s.as_slice().ubegin());
return cell_hash_slice_hash(s.as_slice());
}
};
} // namespace std

View file

@ -976,7 +976,7 @@ void CellSlice::dump(std::ostream& os, int level, bool endl) const {
os << "; refs: " << refs_st << ".." << refs_en;
if (level > 2) {
char tmp[64];
std::sprintf(tmp, "; ptr=data+%ld; z=%016llx",
std::snprintf(tmp, sizeof(tmp), "; ptr=data+%ld; z=%016llx",
static_cast<long>(ptr && cell.not_null() ? ptr - cell->get_data() : -1), static_cast<long long>(z));
os << tmp << " (have " << size() << " bits; " << zd << " preloaded)";
}

View file

@ -20,6 +20,15 @@
namespace vm {
namespace detail {
template <class CellT>
struct DefaultAllocator {
template <class T, class... ArgsT>
std::unique_ptr<CellT> make_unique(ArgsT&&... args) {
return std::make_unique<T>(std::forward<ArgsT>(args)...);
}
};
template <class CellT, size_t Size = 0>
class CellWithArrayStorage : public CellT {
public:
@ -29,14 +38,14 @@ class CellWithArrayStorage : public CellT {
~CellWithArrayStorage() {
CellT::destroy_storage(get_storage());
}
template <class... ArgsT>
static std::unique_ptr<CellT> create(size_t storage_size, ArgsT&&... args) {
template <class Allocator, class... ArgsT>
static auto create(Allocator allocator, size_t storage_size, ArgsT&&... args) {
static_assert(CellT::max_storage_size <= 40 * 8, "");
//size = 128 + 32 + 8;
auto size = (storage_size + 7) / 8;
#define CASE(size) \
case (size): \
return std::make_unique<CellWithArrayStorage<CellT, (size)*8>>(std::forward<ArgsT>(args)...);
return allocator. template make_unique<CellWithArrayStorage<CellT, (size) * 8>>(std::forward<ArgsT>(args)...);
#define CASE2(offset) CASE(offset) CASE(offset + 1)
#define CASE8(offset) CASE2(offset) CASE2(offset + 2) CASE2(offset + 4) CASE2(offset + 6)
#define CASE32(offset) CASE8(offset) CASE8(offset + 8) CASE8(offset + 16) CASE8(offset + 24)
@ -48,6 +57,10 @@ class CellWithArrayStorage : public CellT {
LOG(FATAL) << "TOO BIG " << storage_size;
UNREACHABLE();
}
template <class... ArgsT>
static std::unique_ptr<CellT> create(size_t storage_size, ArgsT&&... args) {
return create(DefaultAllocator<CellT>{}, storage_size, std::forward<ArgsT>(args)...);
}
private:
alignas(alignof(void*)) char storage_[Size];

View file

@ -25,7 +25,44 @@
#include "vm/cells/CellWithStorage.h"
namespace vm {
thread_local bool DataCell::use_arena = false;
namespace {
template <class CellT>
struct ArenaAllocator {
template <class T, class... ArgsT>
std::unique_ptr<CellT> make_unique(ArgsT&&... args) {
auto* ptr = fast_alloc(sizeof(T));
T* obj = new (ptr) T(std::forward<ArgsT>(args)...);
return std::unique_ptr<T>(obj);
}
private:
td::MutableSlice alloc_batch() {
size_t batch_size = 1 << 20;
auto batch = std::make_unique<char[]>(batch_size);
return td::MutableSlice(batch.release(), batch_size);
}
char* fast_alloc(size_t size) {
thread_local td::MutableSlice batch;
auto aligned_size = (size + 7) / 8 * 8;
if (batch.size() < size) {
batch = alloc_batch();
}
auto res = batch.begin();
batch.remove_prefix(aligned_size);
return res;
}
};
}
std::unique_ptr<DataCell> DataCell::create_empty_data_cell(Info info) {
if (use_arena) {
ArenaAllocator<DataCell> allocator;
auto res = detail::CellWithArrayStorage<DataCell>::create(allocator, info.get_storage_size(), info);
// this is dangerous
Ref<DataCell>(res.get()).release();
return res;
}
return detail::CellWithUniquePtrStorage<DataCell>::create(info.get_storage_size(), info);
}
@ -359,7 +396,7 @@ std::string DataCell::to_hex() const {
int len = serialize(buff, sizeof(buff));
char hex_buff[max_serialized_bytes * 2 + 1];
for (int i = 0; i < len; i++) {
sprintf(hex_buff + 2 * i, "%02x", buff[i]);
snprintf(hex_buff + 2 * i, sizeof(hex_buff) - 2 * i, "%02x", buff[i]);
}
return hex_buff;
}

View file

@ -27,6 +27,9 @@ namespace vm {
class DataCell : public Cell {
public:
// NB: cells created with use_arena=true are never freed
static thread_local bool use_arena;
DataCell(const DataCell& other) = delete;
~DataCell() override;
@ -121,10 +124,6 @@ class DataCell : public Cell {
void destroy_storage(char* storage);
explicit DataCell(Info info);
Cell* get_ref_raw_ptr(unsigned idx) const {
DCHECK(idx < get_refs_cnt());
return info_.get_refs(get_storage())[idx];
}
public:
td::Result<LoadedCell> load_cell() const override {
@ -152,6 +151,20 @@ class DataCell : public Cell {
return Ref<Cell>(get_ref_raw_ptr(idx));
}
Cell* get_ref_raw_ptr(unsigned idx) const {
DCHECK(idx < get_refs_cnt());
return info_.get_refs(get_storage())[idx];
}
Ref<Cell> reset_ref_unsafe(unsigned idx, Ref<Cell> ref, bool check_hash = true) {
CHECK(idx < get_refs_cnt());
auto refs = info_.get_refs(get_storage());
CHECK(!check_hash || refs[idx]->get_hash() == ref->get_hash());
auto res = Ref<Cell>(refs[idx], Ref<Cell>::acquire_t{}); // call destructor
refs[idx] = ref.release();
return res;
}
td::uint32 get_virtualization() const override {
return info_.virtualization_;
}
@ -173,6 +186,9 @@ class DataCell : public Cell {
return ((get_bits() + 23) >> 3) +
(with_hashes ? get_level_mask().get_hashes_count() * (hash_bytes + depth_bytes) : 0);
}
size_t get_storage_size() const {
return info_.get_storage_size();
}
int serialize(unsigned char* buff, int buff_size, bool with_hashes = false) const;
std::string serialize() const;
std::string to_hex() const;
@ -207,6 +223,9 @@ class DataCell : public Cell {
};
std::ostream& operator<<(std::ostream& os, const DataCell& c);
inline CellHash as_cell_hash(const Ref<DataCell>& cell) {
return cell->get_hash();
}
} // namespace vm

View file

@ -30,18 +30,27 @@ struct PrunnedCellInfo {
template <class ExtraT>
class PrunnedCell : public Cell {
public:
ExtraT& get_extra() {
return extra_;
}
const ExtraT& get_extra() const {
return extra_;
}
static td::Result<Ref<PrunnedCell<ExtraT>>> create(const PrunnedCellInfo& prunned_cell_info, ExtraT&& extra) {
return create(detail::DefaultAllocator<PrunnedCell<ExtraT>>(), prunned_cell_info, std::forward<ExtraT>(extra));
}
template <class AllocatorT>
static td::Result<Ref<PrunnedCell<ExtraT>>> create(AllocatorT allocator, const PrunnedCellInfo& prunned_cell_info,
ExtraT&& extra) {
auto level_mask = prunned_cell_info.level_mask;
if (level_mask.get_level() > max_level) {
return td::Status::Error("Level is too big");
}
Info info(level_mask);
auto prunned_cell =
detail::CellWithUniquePtrStorage<PrunnedCell<ExtraT>>::create(info.get_storage_size(), info, std::move(extra));
detail::CellWithArrayStorage<PrunnedCell<ExtraT>>::create(allocator, info.get_storage_size(), info, std::move(extra));
TRY_STATUS(prunned_cell->init(prunned_cell_info));
return Ref<PrunnedCell<ExtraT>>(prunned_cell.release(), typename Ref<PrunnedCell<ExtraT>>::acquire_t{});
}
@ -51,6 +60,7 @@ class PrunnedCell : public Cell {
}
protected:
static constexpr auto max_storage_size = (max_level + 1) * (hash_bytes + sizeof(td::uint16));
struct Info {
Info(LevelMask level_mask) {
level_mask_ = level_mask.get_mask() & 7;

View file

@ -19,7 +19,7 @@
#pragma once
#include "td/utils/Slice.h"
#include "td/utils/HashSet.h"
#include <set>
namespace vm {
@ -73,6 +73,6 @@ class CellHashTable {
}
private:
std::set<InfoT, std::less<>> set_;
td::NodeHashSet<InfoT, typename InfoT::Hash, typename InfoT::Eq> set_;
};
} // namespace vm

View file

@ -33,6 +33,7 @@ class RefcntCellStorer {
template <class StorerT>
void store(StorerT &storer) const {
TD_PERF_COUNTER(cell_store);
using td::store;
if (as_boc_) {
td::int32 tag = -1;
@ -151,18 +152,27 @@ CellLoader::CellLoader(std::shared_ptr<KeyValueReader> reader, std::function<voi
td::Result<CellLoader::LoadResult> CellLoader::load(td::Slice hash, bool need_data, ExtCellCreator &ext_cell_creator) {
//LOG(ERROR) << "Storage: load cell " << hash.size() << " " << td::base64_encode(hash);
LoadResult res;
TD_PERF_COUNTER(cell_load);
std::string serialized;
TRY_RESULT(get_status, reader_->get(hash, serialized));
if (get_status != KeyValue::GetStatus::Ok) {
DCHECK(get_status == KeyValue::GetStatus::NotFound);
return res;
return LoadResult{};
}
TRY_RESULT(res, load(hash, serialized, need_data, ext_cell_creator));
if (on_load_callback_) {
on_load_callback_(res);
}
return res;
}
td::Result<CellLoader::LoadResult> CellLoader::load(td::Slice hash, td::Slice value, bool need_data,
ExtCellCreator &ext_cell_creator) {
LoadResult res;
res.status = LoadResult::Ok;
RefcntCellParser refcnt_cell(need_data);
td::TlParser parser(serialized);
td::TlParser parser(value);
refcnt_cell.parse(parser, ext_cell_creator);
TRY_STATUS(parser.get_status());
@ -170,9 +180,6 @@ td::Result<CellLoader::LoadResult> CellLoader::load(td::Slice hash, bool need_da
res.cell_ = std::move(refcnt_cell.cell);
res.stored_boc_ = refcnt_cell.stored_boc_;
//CHECK(res.cell_->get_hash() == hash);
if (on_load_callback_) {
on_load_callback_(res);
}
return res;
}
@ -184,7 +191,11 @@ td::Status CellStorer::erase(td::Slice hash) {
return kv_.erase(hash);
}
std::string CellStorer::serialize_value(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc) {
return td::serialize(RefcntCellStorer(refcnt, cell, as_boc));
}
td::Status CellStorer::set(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc) {
return kv_.set(cell->get_hash().as_slice(), td::serialize(RefcntCellStorer(refcnt, cell, as_boc)));
return kv_.set(cell->get_hash().as_slice(), serialize_value(refcnt, cell, as_boc));
}
} // namespace vm

View file

@ -49,6 +49,7 @@ class CellLoader {
};
CellLoader(std::shared_ptr<KeyValueReader> reader, std::function<void(const LoadResult &)> on_load_callback = {});
td::Result<LoadResult> load(td::Slice hash, bool need_data, ExtCellCreator &ext_cell_creator);
static td::Result<LoadResult> load(td::Slice hash, td::Slice value, bool need_data, ExtCellCreator &ext_cell_creator);
private:
std::shared_ptr<KeyValueReader> reader_;
@ -60,6 +61,7 @@ class CellStorer {
CellStorer(KeyValue &kv);
td::Status erase(td::Slice hash);
td::Status set(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc);
static std::string serialize_value(td::int32 refcnt, const td::Ref<DataCell> &cell, bool as_boc);
private:
KeyValue &kv_;

View file

@ -60,6 +60,20 @@ struct CellInfo {
bool operator<(const CellInfo &other) const {
return key() < other.key();
}
struct Eq {
using is_transparent = void; // Pred to use
bool operator()(const CellInfo &info, const CellInfo &other_info) const { return info.key() == other_info.key();}
bool operator()(const CellInfo &info, td::Slice hash) const { return info.key().as_slice() == hash;}
bool operator()(td::Slice hash, const CellInfo &info) const { return info.key().as_slice() == hash;}
};
struct Hash {
using is_transparent = void; // Pred to use
using transparent_key_equal = Eq;
size_t operator()(td::Slice hash) const { return cell_hash_slice_hash(hash); }
size_t operator()(const CellInfo &info) const { return cell_hash_slice_hash(info.key().as_slice());}
};
};
bool operator<(const CellInfo &a, td::Slice b) {
@ -86,6 +100,12 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat
TRY_RESULT(loaded_cell, get_cell_info_force(hash).cell->load_cell());
return std::move(loaded_cell.data_cell);
}
td::Result<Ref<DataCell>> load_root(td::Slice hash) override {
return load_cell(hash);
}
td::Result<Ref<DataCell>> load_root_thread_safe(td::Slice hash) const override {
return td::Status::Error("Not implemented");
}
void load_cell_async(td::Slice hash, std::shared_ptr<AsyncExecutor> executor,
td::Promise<Ref<DataCell>> promise) override {
auto info = hash_table_.get_if_exists(hash);

View file

@ -23,6 +23,11 @@
#include "td/utils/Status.h"
#include "td/actor/PromiseFuture.h"
#include <thread>
namespace td {
class KeyValueReader;
}
namespace vm {
class CellLoader;
class CellStorer;
@ -45,12 +50,20 @@ class DynamicBagOfCellsDb {
public:
virtual ~DynamicBagOfCellsDb() = default;
virtual td::Result<Ref<DataCell>> load_cell(td::Slice hash) = 0;
virtual td::Result<Ref<DataCell>> load_root(td::Slice hash) = 0;
virtual td::Result<Ref<DataCell>> load_root_thread_safe(td::Slice hash) const = 0;
struct Stats {
td::int64 roots_total_count{0};
td::int64 cells_total_count{0};
td::int64 cells_total_size{0};
void apply_diff(Stats diff) {
std::vector<std::pair<std::string, std::string>> custom_stats;
void apply_diff(const Stats &diff) {
roots_total_count += diff.roots_total_count;
cells_total_count += diff.cells_total_count;
cells_total_size += diff.cells_total_size;
CHECK(roots_total_count >= 0);
CHECK(cells_total_count >= 0);
CHECK(cells_total_size >= 0);
}
};
virtual void inc(const Ref<Cell> &old_root) = 0;
@ -58,6 +71,9 @@ class DynamicBagOfCellsDb {
virtual td::Status prepare_commit() = 0;
virtual Stats get_stats_diff() = 0;
virtual td::Result<Stats> get_stats() {
return td::Status::Error("Not implemented");
}
virtual td::Status commit(CellStorer &) = 0;
virtual std::shared_ptr<CellDbReader> get_cell_db_reader() = 0;
@ -65,13 +81,24 @@ class DynamicBagOfCellsDb {
virtual td::Status set_loader(std::unique_ptr<CellLoader> loader) = 0;
virtual void set_celldb_compress_depth(td::uint32 value) = 0;
virtual vm::ExtCellCreator& as_ext_cell_creator() = 0;
virtual vm::ExtCellCreator &as_ext_cell_creator() = 0;
static std::unique_ptr<DynamicBagOfCellsDb> create();
struct CreateInMemoryOptions {
size_t extra_threads{std::thread::hardware_concurrency()};
bool verbose{true};
// Allocated DataCels will never be deleted
bool use_arena{false};
// Almost no overhead in memory during creation, but will scan database twice
bool use_less_memory_during_creation{true};
};
static std::unique_ptr<DynamicBagOfCellsDb> create_in_memory(td::KeyValueReader *kv, CreateInMemoryOptions options);
class AsyncExecutor {
public:
virtual ~AsyncExecutor() {}
virtual ~AsyncExecutor() {
}
virtual void execute_async(std::function<void()> f) = 0;
virtual void execute_sync(std::function<void()> f) = 0;
};

View file

@ -0,0 +1,984 @@
#include "CellStorage.h"
#include "DynamicBagOfCellsDb.h"
#include "td/utils/Timer.h"
#include "td/utils/base64.h"
#include "td/utils/format.h"
#include "td/utils/int_types.h"
#include "td/utils/misc.h"
#include "td/utils/port/Stat.h"
#include "vm/cells/CellHash.h"
#include "vm/cells/CellSlice.h"
#include "vm/cells/DataCell.h"
#include "vm/cells/ExtCell.h"
#include "td/utils/HashMap.h"
#include "td/utils/HashSet.h"
#include <optional>
#if TD_PORT_POSIX
#include <sys/mman.h>
#include <unistd.h>
#endif
namespace vm {
namespace {
constexpr bool use_dense_hash_map = true;
template <class F>
void parallel_run(size_t n, F &&run_task, size_t extra_threads_n) {
std::atomic<size_t> next_task_id{0};
auto loop = [&] {
while (true) {
auto task_id = next_task_id++;
if (task_id >= n) {
break;
}
run_task(task_id);
}
};
// NB: it could be important that td::thread is used, not std::thread
std::vector<td::thread> threads;
for (size_t i = 0; i < extra_threads_n; i++) {
threads.emplace_back(loop);
}
loop();
for (auto &thread : threads) {
thread.join();
}
threads.clear();
}
struct UniqueAccess {
struct Release {
void operator()(UniqueAccess *access) const {
if (access) {
access->release();
}
}
};
using Lock = std::unique_ptr<UniqueAccess, Release>;
Lock lock() {
CHECK(!locked_.exchange(true));
return Lock(this);
}
private:
std::atomic<bool> locked_{false};
void release() {
locked_ = false;
}
};
class DefaultPrunnedCellCreator : public ExtCellCreator {
public:
td::Result<Ref<Cell>> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override {
TRY_RESULT(cell, PrunnedCell<td::Unit>::create(PrunnedCellInfo{level_mask, hash, depth}, td::Unit{}));
return cell;
}
};
class ArenaPrunnedCellCreator : public ExtCellCreator {
struct ArenaAllocator {
ArenaAllocator() {
// only one instance ever
static UniqueAccess unique_access;
[[maybe_unused]] auto ptr = unique_access.lock().release();
}
std::mutex mutex;
struct Deleter {
static constexpr size_t batch_size = 1 << 24;
#if TD_PORT_POSIX
static std::unique_ptr<char, Deleter> alloc() {
char *ptr = reinterpret_cast<char *>(
mmap(NULL, batch_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
CHECK(ptr != nullptr);
return std::unique_ptr<char, Deleter>(ptr);
}
void operator()(char *ptr) const {
munmap(ptr, batch_size);
}
#else
static std::unique_ptr<char, Deleter> alloc() {
auto ptr = reinterpret_cast<char *>(malloc(batch_size));
CHECK(ptr != nullptr);
return std::unique_ptr<char, Deleter>(ptr);
}
void operator()(char *ptr) const {
free(ptr);
}
#endif
};
std::vector<std::unique_ptr<char, Deleter>> arena;
td::uint64 arena_generation{0};
td::MutableSlice alloc_batch() {
auto batch = Deleter::alloc();
auto res = td::MutableSlice(batch.get(), Deleter::batch_size);
std::lock_guard<std::mutex> guard(mutex);
arena.emplace_back(std::move(batch));
return res;
}
char *alloc(size_t size) {
thread_local td::MutableSlice batch;
thread_local td::uint64 batch_generation{0};
auto aligned_size = (size + 7) / 8 * 8;
if (batch.size() < size || batch_generation != arena_generation) {
batch = alloc_batch();
batch_generation = arena_generation;
}
auto res = batch.begin();
batch.remove_prefix(aligned_size);
return res;
}
void clear() {
std::lock_guard<std::mutex> guard(mutex);
arena_generation++;
td::reset_to_empty(arena);
}
};
static ArenaAllocator arena_;
static td::ThreadSafeCounter cells_count_;
public:
struct Counter {
Counter() {
cells_count_.add(1);
}
Counter(Counter &&other) {
cells_count_.add(1);
}
Counter(const Counter &other) {
cells_count_.add(1);
}
~Counter() {
cells_count_.add(-1);
}
};
struct Allocator {
template <class T, class... ArgsT>
std::unique_ptr<PrunnedCell<Counter>> make_unique(ArgsT &&...args) {
auto *ptr = arena_.alloc(sizeof(T));
T *obj = new (ptr) T(std::forward<ArgsT>(args)...);
return std::unique_ptr<T>(obj);
}
};
td::Result<Ref<Cell>> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override {
Allocator allocator;
TRY_RESULT(cell, PrunnedCell<Counter>::create(allocator, PrunnedCellInfo{level_mask, hash, depth}, Counter()));
return cell;
}
static td::int64 count() {
return cells_count_.sum();
}
static void clear_arena() {
LOG_CHECK(cells_count_.sum() == 0) << cells_count_.sum();
arena_.clear();
}
};
td::ThreadSafeCounter ArenaPrunnedCellCreator::cells_count_;
ArenaPrunnedCellCreator::ArenaAllocator ArenaPrunnedCellCreator::arena_;
struct CellInfo {
mutable td::int32 db_refcnt{0};
Ref<DataCell> cell;
};
static_assert(sizeof(CellInfo) == 16);
CellHash as_cell_hash(const CellInfo &info) {
return info.cell->get_hash();
}
struct CellInfoHashTableBaseline {
td::HashSet<CellInfo, CellHashF, CellEqF> ht_;
const CellInfo *find(CellHash hash) const {
if (auto it = ht_.find(hash); it != ht_.end()) {
return &*it;
}
return nullptr;
}
void erase(CellHash hash) {
auto it = ht_.find(hash);
CHECK(it != ht_.end());
ht_.erase(it);
}
void insert(CellInfo info) {
ht_.insert(std::move(info));
}
template <class Iterator>
void init_from(Iterator begin, Iterator end) {
ht_ = td::HashSet<CellInfo, CellHashF, CellEqF>(begin, end);
}
size_t size() const {
return ht_.size();
}
auto begin() const {
return ht_.begin();
}
auto end() const {
return ht_.end();
}
size_t bucket_count() const {
return ht_.bucket_count();
}
template <class F>
auto for_each(F &&f) {
for (auto &it : ht_) {
f(it);
}
}
};
struct CellInfoHashTableDense {
size_t dense_ht_size_{0};
size_t dense_ht_buckets_{1};
std::vector<size_t> dense_ht_offsets_{1};
std::vector<CellInfo> dense_ht_values_;
td::HashSet<CellInfo, CellHashF, CellEqF> new_ht_;
size_t dense_choose_bucket(const CellHash &hash) const {
return cell_hash_slice_hash(hash.as_slice()) % dense_ht_buckets_;
}
const CellInfo *dense_find(CellHash hash) const {
auto bucket_i = dense_choose_bucket(hash);
auto begin = dense_ht_values_.begin() + dense_ht_offsets_[bucket_i];
auto end = dense_ht_values_.begin() + dense_ht_offsets_[bucket_i + 1];
for (auto it = begin; it != end; ++it) {
if (it->cell.not_null() && it->cell->get_hash() == hash) {
return &*it;
}
}
return nullptr;
}
CellInfo *dense_find_empty(CellHash hash) {
auto bucket_i = dense_choose_bucket(hash);
auto begin = dense_ht_values_.begin() + dense_ht_offsets_[bucket_i];
auto end = dense_ht_values_.begin() + dense_ht_offsets_[bucket_i + 1];
for (auto it = begin; it != end; ++it) {
if (it->cell.is_null()) {
return &*it;
}
}
return nullptr;
}
const CellInfo *find(CellHash hash) const {
if (auto it = new_ht_.find(hash); it != new_ht_.end()) {
return &*it;
}
if (auto it = dense_find(hash)) {
return it;
}
return nullptr;
}
void erase(CellHash hash) {
if (auto it = new_ht_.find(hash); it != new_ht_.end()) {
new_ht_.erase(it);
return;
}
auto info = dense_find(hash);
CHECK(info && info->db_refcnt > 0);
info->db_refcnt = 0;
const_cast<CellInfo *>(info)->cell = {};
CHECK(dense_ht_size_ > 0);
dense_ht_size_--;
}
void insert(CellInfo info) {
if (auto dest = dense_find_empty(info.cell->get_hash())) {
*dest = std::move(info);
dense_ht_size_++;
return;
}
new_ht_.insert(std::move(info));
}
template <class Iterator>
void init_from(Iterator begin, Iterator end) {
auto size = td::narrow_cast<size_t>(std::distance(begin, end));
dense_ht_buckets_ = std::max(size_t(1), size_t(size / 8));
std::vector<size_t> offsets(dense_ht_buckets_ + 2);
for (auto it = begin; it != end; ++it) {
auto bucket_i = dense_choose_bucket(it->cell->get_hash());
offsets[bucket_i + 2]++;
}
for (size_t i = 1; i < offsets.size(); i++) {
offsets[i] += offsets[i - 1];
}
dense_ht_values_.resize(size);
for (auto it = begin; it != end; ++it) {
auto bucket_i = dense_choose_bucket(it->cell->get_hash());
dense_ht_values_[offsets[bucket_i + 1]++] = std::move(*it);
}
CHECK(offsets[0] == 0);
CHECK(offsets[offsets.size() - 1] == size);
CHECK(offsets[offsets.size() - 2] == size);
dense_ht_offsets_ = std::move(offsets);
dense_ht_size_ = size;
}
size_t size() const {
return dense_ht_size_ + new_ht_.size();
}
template <class F>
auto for_each(F &&f) {
for (auto &it : dense_ht_values_) {
if (it.cell.not_null()) {
f(it);
}
}
for (auto &it : new_ht_) {
f(it);
}
}
size_t bucket_count() const {
return new_ht_.bucket_count() + dense_ht_values_.size();
}
};
using CellInfoHashTable = std::conditional_t<use_dense_hash_map, CellInfoHashTableDense, CellInfoHashTableBaseline>;
class CellStorage {
struct PrivateTag {};
struct CellBucket;
struct None {
void operator()(CellBucket *bucket) {
}
};
struct CellBucketRef {
UniqueAccess::Lock lock;
std::unique_ptr<CellBucket, None> bucket;
CellBucket &operator*() {
return *bucket;
}
CellBucket *operator->() {
return bucket.get();
}
};
struct CellBucket {
mutable UniqueAccess access_;
CellInfoHashTable infos_;
std::vector<CellInfo> cells_;
std::vector<Ref<DataCell>> roots_;
size_t boc_count_{0};
[[maybe_unused]] char pad3[TD_CONCURRENCY_PAD];
void clear() {
td::reset_to_empty(infos_);
td::reset_to_empty(cells_);
td::reset_to_empty(roots_);
}
CellBucketRef unique_access() const {
auto lock = access_.lock();
return CellBucketRef{.lock = std::move(lock),
.bucket = std::unique_ptr<CellBucket, None>(const_cast<CellBucket *>(this))};
}
};
std::array<CellBucket, 256> buckets_{};
bool inited_{false};
const CellBucket &get_bucket(size_t i) const {
return buckets_.at(i);
}
const CellBucket &get_bucket(const CellHash &hash) const {
return get_bucket(hash.as_array()[0]);
}
mutable UniqueAccess local_access_;
td::HashSet<Ref<DataCell>, CellHashF, CellEqF> local_roots_;
DynamicBagOfCellsDb::Stats stats_;
mutable std::mutex root_mutex_;
td::HashSet<Ref<DataCell>, CellHashF, CellEqF> roots_;
public:
std::optional<CellInfo> get_info(const CellHash &hash) const {
auto lock = local_access_.lock();
auto &bucket = get_bucket(hash);
if (auto info_ptr = bucket.infos_.find(hash)) {
return *info_ptr;
}
return {};
}
DynamicBagOfCellsDb::Stats get_stats() {
auto unique_access = local_access_.lock();
auto stats = stats_;
auto add_stat = [&stats](auto key, auto value) {
stats.custom_stats.emplace_back(std::move(key), PSTRING() << value);
};
if constexpr (use_dense_hash_map) {
size_t dense_ht_capacity = 0;
size_t new_ht_capacity = 0;
size_t dense_ht_size = 0;
size_t new_ht_size = 0;
for_each_bucket(0, [&](auto bucket_id, CellBucket &bucket) {
dense_ht_capacity += bucket.infos_.dense_ht_values_.size();
dense_ht_size += bucket.infos_.dense_ht_size_;
new_ht_capacity += bucket.infos_.new_ht_.bucket_count();
new_ht_size += bucket.infos_.new_ht_.size();
});
auto size = new_ht_size + dense_ht_size;
auto capacity = new_ht_capacity + dense_ht_capacity;
add_stat("ht.capacity", capacity);
add_stat("ht.size", size);
add_stat("ht.load", double(size) / std::max(1.0, double(capacity)));
add_stat("ht.dense_ht_capacity", dense_ht_capacity);
add_stat("ht.dense_ht_size", dense_ht_size);
add_stat("ht.dense_ht_load", double(dense_ht_size) / std::max(1.0, double(dense_ht_capacity)));
add_stat("ht.new_ht_capacity", new_ht_capacity);
add_stat("ht.new_ht_size", new_ht_size);
add_stat("ht.new_ht_load", double(new_ht_size) / std::max(1.0, double(new_ht_capacity)));
} else {
size_t capacity = 0;
size_t size = 0;
for_each_bucket(0, [&](auto bucket_id, CellBucket &bucket) {
capacity += bucket.infos_.bucket_count();
size += bucket.infos_.size();
});
add_stat("ht.capacity", capacity);
add_stat("ht.size", size);
add_stat("ht.load", double(size) / std::max(1.0, double(capacity)));
}
CHECK(td::narrow_cast<size_t>(stats.roots_total_count) == local_roots_.size());
return stats;
}
void apply_stats_diff(DynamicBagOfCellsDb::Stats diff) {
auto unique_access = local_access_.lock();
stats_.apply_diff(diff);
CHECK(td::narrow_cast<size_t>(stats_.roots_total_count) == local_roots_.size());
size_t cells_count{0};
for_each_bucket(0, [&](size_t bucket_id, auto &bucket) { cells_count += bucket.infos_.size(); });
CHECK(td::narrow_cast<size_t>(stats_.cells_total_count) == cells_count);
}
td::Result<Ref<DataCell>> load_cell(const CellHash &hash) const {
auto lock = local_access_.lock();
auto &bucket = get_bucket(hash);
if (auto info_ptr = bucket.infos_.find(hash)) {
return info_ptr->cell;
}
return td::Status::Error("not found");
}
td::Result<Ref<DataCell>> load_root_local(const CellHash &hash) const {
auto lock = local_access_.lock();
if (auto it = local_roots_.find(hash); it != local_roots_.end()) {
return *it;
}
return td::Status::Error("not found");
}
td::Result<Ref<DataCell>> load_root_shared(const CellHash &hash) const {
std::lock_guard<std::mutex> lock(root_mutex_);
if (auto it = roots_.find(hash); it != roots_.end()) {
return *it;
}
return td::Status::Error("not found");
}
void erase(const CellHash &hash) {
auto lock = local_access_.lock();
auto bucket = get_bucket(hash).unique_access();
bucket->infos_.erase(hash);
if (auto local_it = local_roots_.find(hash); local_it != local_roots_.end()) {
local_roots_.erase(local_it);
std::lock_guard<std::mutex> root_lock(root_mutex_);
auto shared_it = roots_.find(hash);
CHECK(shared_it != roots_.end());
roots_.erase(shared_it);
CHECK(stats_.roots_total_count > 0);
stats_.roots_total_count--;
}
}
void add_new_root(Ref<DataCell> cell) {
auto lock = local_access_.lock();
if (local_roots_.insert(cell).second) {
std::lock_guard<std::mutex> lock(root_mutex_);
roots_.insert(std::move(cell));
stats_.roots_total_count++;
}
}
void set(td::int32 refcnt, Ref<DataCell> cell) {
auto lock = local_access_.lock();
//LOG(ERROR) << "setting refcnt to " << refcnt << ", cell " << td::base64_encode(cell->get_hash().as_slice());
auto hash = cell->get_hash();
auto bucket = get_bucket(hash).unique_access();
if (auto info_ptr = bucket->infos_.find(hash)) {
CHECK(info_ptr->cell.get() == cell.get());
info_ptr->db_refcnt = refcnt;
} else {
bucket->infos_.insert({.db_refcnt = refcnt, .cell = std::move(cell)});
}
}
template <class F>
static td::unique_ptr<CellStorage> build(DynamicBagOfCellsDb::CreateInMemoryOptions options,
F &&parallel_scan_cells) {
auto storage = td::make_unique<CellStorage>(PrivateTag{});
storage->do_build(options, parallel_scan_cells);
return storage;
}
~CellStorage() {
clear();
}
CellStorage() = delete;
explicit CellStorage(PrivateTag) {
}
private:
template <class F>
void do_build(DynamicBagOfCellsDb::CreateInMemoryOptions options, F &&parallel_scan_cells) {
auto verbose = options.verbose;
td::Slice P = "loading in-memory cell database: ";
LOG_IF(WARNING, verbose) << P << "start with options use_arena=" << options.use_arena
<< " use_less_memory_during_creation=" << options.use_less_memory_during_creation
<< " use_dense_hash_map=" << use_dense_hash_map;
auto full_timer = td::Timer();
auto lock = local_access_.lock();
CHECK(ArenaPrunnedCellCreator::count() == 0);
ArenaPrunnedCellCreator arena_pc_creator;
DefaultPrunnedCellCreator default_pc_creator;
auto timer = td::Timer();
td::int64 cell_count{0};
td::int64 desc_count{0};
if (options.use_less_memory_during_creation) {
auto [new_cell_count, new_desc_count] = parallel_scan_cells(
default_pc_creator, options.use_arena,
[&](td::int32 refcnt, Ref<DataCell> cell) { initial_set_without_refs(refcnt, std::move(cell)); });
cell_count = new_cell_count;
desc_count = new_desc_count;
} else {
auto [new_cell_count, new_desc_count] =
parallel_scan_cells(arena_pc_creator, options.use_arena,
[&](td::int32 refcnt, Ref<DataCell> cell) { initial_set(refcnt, std::move(cell)); });
cell_count = new_cell_count;
desc_count = new_desc_count;
}
LOG_IF(WARNING, verbose) << P << "cells loaded in " << timer.elapsed() << "s, cells_count= " << cell_count
<< " prunned_cells_count=" << ArenaPrunnedCellCreator::count();
timer = td::Timer();
for_each_bucket(options.extra_threads, [&](size_t bucket_id, auto &bucket) { build_hashtable(bucket); });
size_t ht_capacity = 0;
size_t ht_size = 0;
for_each_bucket(0, [&](size_t bucket_id, auto &bucket) {
ht_size += bucket.infos_.size();
ht_capacity += bucket.infos_.bucket_count();
});
double load_factor = double(ht_size) / std::max(double(ht_capacity), 1.0);
LOG_IF(WARNING, verbose) << P << "hashtable created in " << timer.elapsed()
<< "s, hashtables_expected_size=" << td::format::as_size(ht_capacity * sizeof(CellInfo))
<< " load_factor=" << load_factor;
timer = td::Timer();
if (options.use_less_memory_during_creation) {
auto [new_cell_count, new_desc_count] =
parallel_scan_cells(default_pc_creator, false,
[&](td::int32 refcnt, Ref<DataCell> cell) { secondary_set(refcnt, std::move(cell)); });
CHECK(new_cell_count == cell_count);
CHECK(new_desc_count == desc_count);
} else {
for_each_bucket(options.extra_threads, [&](size_t bucket_id, auto &bucket) { reset_refs(bucket); });
}
LOG_IF(WARNING, verbose) << P << "refs rearranged in " << timer.elapsed() << "s";
timer = td::Timer();
using Stats = DynamicBagOfCellsDb::Stats;
std::vector<Stats> bucket_stats(buckets_.size());
std::atomic<size_t> boc_count{0};
for_each_bucket(options.extra_threads, [&](size_t bucket_id, auto &bucket) {
bucket_stats[bucket_id] = validate_bucket_a(bucket, options.use_arena);
boc_count += bucket.boc_count_;
});
for_each_bucket(options.extra_threads, [&](size_t bucket_id, auto &bucket) { validate_bucket_b(bucket); });
stats_ = {};
for (auto &bucket_stat : bucket_stats) {
stats_.apply_diff(bucket_stat);
}
LOG_IF(WARNING, verbose) << P << "refcnt validated in " << timer.elapsed() << "s";
timer = td::Timer();
build_roots();
LOG_IF(WARNING, verbose) << P << "roots hashtable built in " << timer.elapsed() << "s";
ArenaPrunnedCellCreator::clear_arena();
LOG_IF(WARNING, verbose) << P << "arena cleared in " << timer.elapsed();
lock.reset();
auto r_mem_stat = td::mem_stat();
td::MemStat mem_stat;
if (r_mem_stat.is_ok()) {
mem_stat = r_mem_stat.move_as_ok();
}
auto stats = get_stats();
td::StringBuilder sb;
for (auto &[key, value] : stats.custom_stats) {
sb << "\n\t" << key << "=" << value;
}
LOG_IF(ERROR, desc_count != 0 && desc_count != stats.roots_total_count + 1)
<< "desc<> keys count is " << desc_count << " wich is different from roots count " << stats.roots_total_count;
LOG_IF(WARNING, verbose)
<< P << "done in " << full_timer.elapsed() << "\n\troots_count=" << stats.roots_total_count << "\n\t"
<< desc_count << "\n\tcells_count=" << stats.cells_total_count
<< "\n\tcells_size=" << td::format::as_size(stats.cells_total_size) << "\n\tboc_count=" << boc_count.load()
<< sb.as_cslice() << "\n\tdata_cells_size=" << td::format::as_size(sizeof(DataCell) * stats.cells_total_count)
<< "\n\tdata_cell_size=" << sizeof(DataCell) << "\n\texpected_memory_used="
<< td::format::as_size(stats.cells_total_count * (sizeof(DataCell) + sizeof(CellInfo) * 3 / 2) +
stats.cells_total_size)
<< "\n\tbest_possible_memory_used"
<< td::format::as_size(stats.cells_total_count * (sizeof(DataCell) + sizeof(CellInfo)) + stats.cells_total_size)
<< "\n\tmemory_used=" << td::format::as_size(mem_stat.resident_size_)
<< "\n\tpeak_memory_used=" << td::format::as_size(mem_stat.resident_size_peak_);
inited_ = true;
}
template <class F>
void for_each_bucket(size_t extra_threads, F &&f) {
parallel_run(
buckets_.size(), [&](auto task_id) { f(task_id, *get_bucket(task_id).unique_access()); }, extra_threads);
}
void clear() {
auto unique_access = local_access_.lock();
for_each_bucket(td::thread::hardware_concurrency(), [&](size_t bucket_id, auto &bucket) { bucket.clear(); });
local_roots_.clear();
{
auto lock = std::lock_guard<std::mutex>(root_mutex_);
roots_.clear();
}
}
void initial_set(td::int32 refcnt, Ref<DataCell> cell) {
CHECK(!inited_);
auto bucket = get_bucket(cell->get_hash()).unique_access();
bucket->cells_.push_back({.db_refcnt = refcnt, .cell = std::move(cell)});
}
void initial_set_without_refs(td::int32 refcnt, Ref<DataCell> cell_ref) {
CHECK(!inited_);
auto bucket = get_bucket(cell_ref->get_hash()).unique_access();
auto &cell = const_cast<DataCell &>(*cell_ref);
for (unsigned i = 0; i < cell.size_refs(); i++) {
auto to_destroy = cell.reset_ref_unsafe(i, Ref<Cell>(), false);
if (to_destroy->is_loaded()) {
bucket->boc_count_++;
}
}
bucket->cells_.push_back({.db_refcnt = refcnt, .cell = std::move(cell_ref)});
}
void secondary_set(td::int32 refcnt, Ref<DataCell> cell_copy) {
CHECK(!inited_);
auto bucket = get_bucket(cell_copy->get_hash()).unique_access();
auto info = bucket->infos_.find(cell_copy->get_hash());
CHECK(info);
CellSlice cs(NoVm{}, std::move(cell_copy));
auto &cell = const_cast<DataCell &>(*info->cell);
CHECK(cs.size_refs() == cell.size_refs());
for (unsigned i = 0; i < cell.size_refs(); i++) {
auto prunned_cell_hash = cs.fetch_ref()->get_hash();
auto &prunned_cell_bucket = get_bucket(prunned_cell_hash);
auto full_cell_ptr = prunned_cell_bucket.infos_.find(prunned_cell_hash);
CHECK(full_cell_ptr);
auto full_cell = full_cell_ptr->cell;
auto to_destroy = cell.reset_ref_unsafe(i, std::move(full_cell), false);
CHECK(to_destroy.is_null());
}
}
void build_hashtable(CellBucket &bucket) {
bucket.infos_.init_from(bucket.cells_.begin(), bucket.cells_.end());
LOG_CHECK(bucket.infos_.size() == bucket.cells_.size()) << bucket.infos_.size() << " vs " << bucket.cells_.size();
td::reset_to_empty(bucket.cells_);
LOG_CHECK(bucket.cells_.capacity() == 0) << bucket.cells_.capacity();
}
void reset_refs(CellBucket &bucket) {
bucket.infos_.for_each([&](auto &it) {
// This is generally very dangerous, but should be safe here
auto &cell = const_cast<DataCell &>(*it.cell);
for (unsigned i = 0; i < cell.size_refs(); i++) {
auto prunned_cell = cell.get_ref_raw_ptr(i);
auto prunned_cell_hash = prunned_cell->get_hash();
auto &prunned_cell_bucket = get_bucket(prunned_cell_hash);
auto full_cell_ptr = prunned_cell_bucket.infos_.find(prunned_cell_hash);
CHECK(full_cell_ptr);
auto full_cell = full_cell_ptr->cell;
auto to_destroy = cell.reset_ref_unsafe(i, std::move(full_cell));
if (!to_destroy->is_loaded()) {
Ref<PrunnedCell<ArenaPrunnedCellCreator::Counter>> x(std::move(to_destroy));
x->~PrunnedCell<ArenaPrunnedCellCreator::Counter>();
x.release();
} else {
bucket.boc_count_++;
}
}
});
}
DynamicBagOfCellsDb::Stats validate_bucket_a(CellBucket &bucket, bool use_arena) {
DynamicBagOfCellsDb::Stats stats;
bucket.infos_.for_each([&](auto &it) {
int cell_ref_cnt = it.cell->get_refcnt();
CHECK(it.db_refcnt + 1 + use_arena >= cell_ref_cnt);
auto extra_refcnt = it.db_refcnt + 1 + use_arena - cell_ref_cnt;
if (extra_refcnt != 0) {
bucket.roots_.push_back(it.cell);
stats.roots_total_count++;
}
stats.cells_total_count++;
stats.cells_total_size += static_cast<td::int64>(it.cell->get_storage_size());
});
return stats;
}
void validate_bucket_b(CellBucket &bucket) {
// sanity check
bucket.infos_.for_each([&](auto &it) {
CellSlice cs(NoVm{}, it.cell);
while (cs.have_refs()) {
CHECK(cs.fetch_ref().not_null());
}
});
}
void build_roots() {
for (auto &it : buckets_) {
for (auto &root : it.roots_) {
local_roots_.insert(std::move(root));
}
td::reset_to_empty(it.roots_);
}
auto lock = std::lock_guard<std::mutex>(root_mutex_);
roots_ = local_roots_;
}
};
class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb {
public:
explicit InMemoryBagOfCellsDb(td::unique_ptr<CellStorage> storage) : storage_(std::move(storage)) {
}
td::Result<Ref<DataCell>> load_cell(td::Slice hash) override {
return storage_->load_cell(CellHash::from_slice(hash));
}
td::Result<Ref<DataCell>> load_root(td::Slice hash) override {
return storage_->load_root_local(CellHash::from_slice(hash));
}
td::Result<Ref<DataCell>> load_root_thread_safe(td::Slice hash) const override {
return storage_->load_root_shared(CellHash::from_slice(hash));
}
void inc(const Ref<Cell> &cell) override {
if (cell.is_null()) {
return;
}
if (cell->get_virtualization() != 0) {
return;
}
to_inc_.push_back(cell);
}
void dec(const Ref<Cell> &cell) override {
if (cell.is_null()) {
return;
}
if (cell->get_virtualization() != 0) {
return;
}
to_dec_.push_back(cell);
}
td::Status commit(CellStorer &cell_storer) override {
if (!to_inc_.empty() || !to_dec_.empty()) {
TRY_STATUS(prepare_commit());
}
Stats diff;
CHECK(to_dec_.empty());
for (auto &it : info_) {
auto &info = it.second;
if (info.diff_refcnt == 0) {
continue;
}
auto refcnt = td::narrow_cast<td::int32>(static_cast<td::int64>(info.db_refcnt) + info.diff_refcnt);
CHECK(refcnt >= 0);
if (refcnt > 0) {
cell_storer.set(refcnt, info.cell, false);
storage_->set(refcnt, info.cell);
if (info.db_refcnt == 0) {
diff.cells_total_count++;
diff.cells_total_size += static_cast<td::int64>(info.cell->get_storage_size());
}
} else {
cell_storer.erase(info.cell->get_hash().as_slice());
storage_->erase(info.cell->get_hash());
diff.cells_total_count--;
diff.cells_total_size -= static_cast<td::int64>(info.cell->get_storage_size());
}
}
storage_->apply_stats_diff(diff);
info_ = {};
return td::Status::OK();
}
td::Result<Stats> get_stats() override {
return storage_->get_stats();
}
// Not implemented or trivial or deprecated methods
td::Status set_loader(std::unique_ptr<CellLoader> loader) override {
return td::Status::OK();
}
td::Status prepare_commit() override {
CHECK(info_.empty());
for (auto &to_inc : to_inc_) {
auto new_root = do_inc(to_inc);
storage_->add_new_root(std::move(new_root));
}
for (auto &to_dec : to_dec_) {
do_dec(to_dec);
}
to_dec_ = {};
to_inc_ = {};
return td::Status::OK();
}
Stats get_stats_diff() override {
LOG(FATAL) << "Not implemented";
return {};
}
std::shared_ptr<CellDbReader> get_cell_db_reader() override {
return {};
}
void set_celldb_compress_depth(td::uint32 value) override {
LOG(FATAL) << "Not implemented";
}
ExtCellCreator &as_ext_cell_creator() override {
UNREACHABLE();
}
void load_cell_async(td::Slice hash, std::shared_ptr<AsyncExecutor> executor,
td::Promise<Ref<DataCell>> promise) override {
LOG(FATAL) << "Not implemented";
}
private:
td::unique_ptr<CellStorage> storage_;
struct Info {
td::int32 db_refcnt{0};
td::int32 diff_refcnt{0};
Ref<DataCell> cell;
};
td::HashMap<CellHash, Info> info_;
std::unique_ptr<CellLoader> loader_;
std::vector<Ref<Cell>> to_inc_;
std::vector<Ref<Cell>> to_dec_;
Ref<DataCell> do_inc(Ref<Cell> cell) {
auto cell_hash = cell->get_hash();
if (auto it = info_.find(cell_hash); it != info_.end()) {
CHECK(it->second.diff_refcnt != std::numeric_limits<td::int32>::max());
it->second.diff_refcnt++;
return it->second.cell;
}
if (auto o_info = storage_->get_info(cell_hash)) {
info_.emplace(cell_hash, Info{.db_refcnt = o_info->db_refcnt, .diff_refcnt = 1, .cell = o_info->cell});
return std::move(o_info->cell);
}
CellSlice cs(NoVm{}, std::move(cell));
CellBuilder cb;
cb.store_bits(cs.data(), cs.size());
while (cs.have_refs()) {
auto ref = do_inc(cs.fetch_ref());
cb.store_ref(std::move(ref));
}
auto res = cb.finalize(cs.is_special());
CHECK(res->get_hash() == cell_hash);
info_.emplace(cell_hash, Info{.db_refcnt = 0, .diff_refcnt = 1, .cell = res});
return res;
}
void do_dec(Ref<Cell> cell) {
auto cell_hash = cell->get_hash();
auto it = info_.find(cell_hash);
if (it != info_.end()) {
CHECK(it->second.diff_refcnt != std::numeric_limits<td::int32>::min());
--it->second.diff_refcnt;
} else {
auto info = *storage_->get_info(cell_hash);
it = info_.emplace(cell_hash, Info{.db_refcnt = info.db_refcnt, .diff_refcnt = -1, .cell = info.cell}).first;
}
if (it->second.diff_refcnt + it->second.db_refcnt != 0) {
return;
}
CellSlice cs(NoVm{}, std::move(cell));
while (cs.have_refs()) {
do_dec(cs.fetch_ref());
}
}
};
} // namespace
std::unique_ptr<DynamicBagOfCellsDb> DynamicBagOfCellsDb::create_in_memory(td::KeyValueReader *kv,
CreateInMemoryOptions options) {
if (kv == nullptr) {
LOG_IF(WARNING, options.verbose) << "Create empty in-memory cells database (no key value is given)";
auto storage = CellStorage::build(options, [](auto, auto, auto) { return std::make_pair(0, 0); });
return std::make_unique<InMemoryBagOfCellsDb>(std::move(storage));
}
std::vector<std::string> keys;
keys.emplace_back("");
for (td::uint32 c = 1; c <= 0xff; c++) {
keys.emplace_back(1, static_cast<char>(c));
}
keys.emplace_back(33, static_cast<char>(0xff));
auto parallel_scan_cells = [&](ExtCellCreator &pc_creator, bool use_arena,
auto &&f) -> std::pair<td::int64, td::int64> {
std::atomic<td::int64> cell_count{0};
std::atomic<td::int64> desc_count{0};
parallel_run(
keys.size() - 1,
[&](auto task_id) {
td::int64 local_cell_count = 0;
td::int64 local_desc_count = 0;
CHECK(!DataCell::use_arena);
DataCell::use_arena = use_arena;
kv->for_each_in_range(keys.at(task_id), keys.at(task_id + 1), [&](td::Slice key, td::Slice value) {
if (td::begins_with(key, "desc") && key.size() != 32) {
local_desc_count++;
return td::Status::OK();
}
auto r_res = CellLoader::load(key, value.str(), true, pc_creator);
if (r_res.is_error()) {
LOG(ERROR) << r_res.error() << " at " << td::format::escaped(key);
return td::Status::OK();
}
CHECK(key.size() == 32);
CHECK(key.ubegin()[0] == task_id);
auto res = r_res.move_as_ok();
f(res.refcnt(), res.cell());
local_cell_count++;
return td::Status::OK();
}).ensure();
DataCell::use_arena = false;
cell_count += local_cell_count;
desc_count += local_desc_count;
},
options.extra_threads);
return std::make_pair(cell_count.load(), desc_count.load());
};
auto storage = CellStorage::build(options, parallel_scan_cells);
return std::make_unique<InMemoryBagOfCellsDb>(std::move(storage));
}
} // namespace vm

View file

@ -113,7 +113,8 @@ class TonDbTransactionImpl;
using TonDbTransaction = std::unique_ptr<TonDbTransactionImpl>;
class TonDbTransactionImpl {
public:
SmartContractDb begin_smartcontract(td::Slice hash = {});
SmartContractDb begin_smartcontract(td::Slice hash = std::string(32, '\0'));
void commit_smartcontract(SmartContractDb txn);
void commit_smartcontract(SmartContractDiff txn);
@ -142,6 +143,20 @@ class TonDbTransactionImpl {
friend bool operator<(td::Slice hash, const SmartContractInfo &info) {
return hash < info.hash;
}
struct Eq {
using is_transparent = void; // Pred to use
bool operator()(const SmartContractInfo &info, const SmartContractInfo &other_info) const { return info.hash == other_info.hash;}
bool operator()(const SmartContractInfo &info, td::Slice hash) const { return info.hash == hash;}
bool operator()(td::Slice hash, const SmartContractInfo &info) const { return info.hash == hash;}
};
struct Hash {
using is_transparent = void; // Pred to use
using transparent_key_equal = Eq;
size_t operator()(td::Slice hash) const { return cell_hash_slice_hash(hash); }
size_t operator()(const SmartContractInfo &info) const { return cell_hash_slice_hash(info.hash);}
};
};
CellHashTable<SmartContractInfo> contracts_;

View file

@ -33,10 +33,12 @@ class LargeBocSerializer {
public:
using Hash = Cell::Hash;
explicit LargeBocSerializer(std::shared_ptr<CellDbReader> reader, td::CancellationToken cancellation_token = {})
: reader(std::move(reader)), cancellation_token(std::move(cancellation_token)) {
explicit LargeBocSerializer(std::shared_ptr<CellDbReader> reader) : reader(std::move(reader)) {
}
void set_logger(BagOfCellsLogger* logger_ptr) {
logger_ptr_ = logger_ptr;
}
void add_root(Hash root);
td::Status import_cells();
td::Status serialize(td::FileFd& fd, int mode);
@ -44,6 +46,7 @@ class LargeBocSerializer {
private:
std::shared_ptr<CellDbReader> reader;
struct CellInfo {
Cell::Hash hash;
std::array<int, 4> ref_idx;
int idx;
unsigned short serialized_size;
@ -67,7 +70,7 @@ class LargeBocSerializer {
return 4;
}
};
std::map<Hash, CellInfo> cells;
td::NodeHashMap<Hash, CellInfo> cells;
std::vector<std::pair<const Hash, CellInfo>*> cell_list;
struct RootInfo {
RootInfo(Hash hash, int idx) : hash(hash), idx(idx) {
@ -85,10 +88,7 @@ class LargeBocSerializer {
int revisit(int cell_idx, int force = 0);
td::uint64 compute_sizes(int mode, int& r_size, int& o_size);
td::CancellationToken cancellation_token;
td::Timestamp log_speed_at_;
size_t processed_cells_ = 0;
static constexpr double LOG_SPEED_PERIOD = 120.0;
BagOfCellsLogger* logger_ptr_{};
};
void LargeBocSerializer::add_root(Hash root) {
@ -96,16 +96,18 @@ void LargeBocSerializer::add_root(Hash root) {
}
td::Status LargeBocSerializer::import_cells() {
td::Timer timer;
log_speed_at_ = td::Timestamp::in(LOG_SPEED_PERIOD);
processed_cells_ = 0;
if (logger_ptr_) {
logger_ptr_->start_stage("import_cells");
}
for (auto& root : roots) {
TRY_RESULT(idx, import_cell(root.hash));
root.idx = idx;
}
reorder_cells();
CHECK(!cell_list.empty());
LOG(ERROR) << "serializer: import_cells took " << timer.elapsed() << "s, " << cell_count << " cells";
if (logger_ptr_) {
logger_ptr_->finish_stage(PSLICE() << cell_count << " cells");
}
return td::Status::OK();
}
@ -113,14 +115,8 @@ td::Result<int> LargeBocSerializer::import_cell(Hash hash, int depth) {
if (depth > Cell::max_depth) {
return td::Status::Error("error while importing a cell into a bag of cells: cell depth too large");
}
++processed_cells_;
if (processed_cells_ % 1000 == 0) {
TRY_STATUS(cancellation_token.check());
}
if (log_speed_at_.is_in_past()) {
log_speed_at_ += LOG_SPEED_PERIOD;
LOG(WARNING) << "serializer: import_cells " << (double)processed_cells_ / LOG_SPEED_PERIOD << " cells/s";
processed_cells_ = 0;
if (logger_ptr_) {
TRY_STATUS(logger_ptr_->on_cell_processed());
}
auto it = cells.find(hash);
if (it != cells.end()) {
@ -306,7 +302,6 @@ td::uint64 LargeBocSerializer::compute_sizes(int mode, int& r_size, int& o_size)
}
td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
td::Timer timer;
using Mode = BagOfCells::Mode;
BagOfCells::Info info;
if ((mode & Mode::WithCacheBits) && !(mode & Mode::WithIndex)) {
@ -370,6 +365,9 @@ td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
DCHECK(writer.position() == info.index_offset);
DCHECK((unsigned)cell_count == cell_list.size());
if (info.has_index) {
if (logger_ptr_) {
logger_ptr_->start_stage("generate_index");
}
std::size_t offs = 0;
for (int i = cell_count - 1; i >= 0; --i) {
const auto& dc_info = cell_list[i]->second;
@ -387,13 +385,20 @@ td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
fixed_offset = offs * 2 + dc_info.should_cache;
}
store_offset(fixed_offset);
if (logger_ptr_) {
TRY_STATUS(logger_ptr_->on_cell_processed());
}
}
DCHECK(offs == info.data_size);
if (logger_ptr_) {
logger_ptr_->finish_stage("");
}
}
DCHECK(writer.position() == info.data_offset);
size_t keep_position = writer.position();
log_speed_at_ = td::Timestamp::in(LOG_SPEED_PERIOD);
processed_cells_ = 0;
if (logger_ptr_) {
logger_ptr_->start_stage("serialize");
}
for (int i = 0; i < cell_count; ++i) {
auto hash = cell_list[cell_count - 1 - i]->first;
const auto& dc_info = cell_list[cell_count - 1 - i]->second;
@ -412,14 +417,8 @@ td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
DCHECK(k > i && k < cell_count);
store_ref(k);
}
++processed_cells_;
if (processed_cells_ % 1000 == 0) {
TRY_STATUS(cancellation_token.check());
}
if (log_speed_at_.is_in_past()) {
log_speed_at_ += LOG_SPEED_PERIOD;
LOG(WARNING) << "serializer: serialize " << (double)processed_cells_ / LOG_SPEED_PERIOD << " cells/s";
processed_cells_ = 0;
if (logger_ptr_) {
TRY_STATUS(logger_ptr_->on_cell_processed());
}
}
DCHECK(writer.position() - keep_position == info.data_size);
@ -429,8 +428,9 @@ td::Status LargeBocSerializer::serialize(td::FileFd& fd, int mode) {
}
DCHECK(writer.empty());
TRY_STATUS(writer.finalize());
LOG(ERROR) << "serializer: serialize took " << timer.elapsed() << "s, " << cell_count << " cells, "
<< writer.position() << " bytes";
if (logger_ptr_) {
logger_ptr_->finish_stage(PSLICE() << cell_count << " cells, " << writer.position() << " bytes");
}
return td::Status::OK();
}
} // namespace
@ -439,7 +439,9 @@ td::Status std_boc_serialize_to_file_large(std::shared_ptr<CellDbReader> reader,
int mode, td::CancellationToken cancellation_token) {
td::Timer timer;
CHECK(reader != nullptr)
LargeBocSerializer serializer(reader, std::move(cancellation_token));
LargeBocSerializer serializer(reader);
BagOfCellsLogger logger(std::move(cancellation_token));
serializer.set_logger(&logger);
serializer.add_root(root_hash);
TRY_STATUS(serializer.import_cells());
TRY_STATUS(serializer.serialize(fd, mode));