1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

Rework session stats

This commit is contained in:
SpyCheese 2024-12-26 14:03:12 +03:00
parent 392cf64758
commit 71342bdcd4
24 changed files with 436 additions and 334 deletions

View file

@ -79,6 +79,7 @@ class Collator final : public td::actor::Actor {
td::Timestamp timeout;
td::Timestamp queue_cleanup_timeout_, soft_timeout_, medium_timeout_;
td::Promise<BlockCandidate> main_promise;
adnl::AdnlNodeIdShort collator_node_id_ = adnl::AdnlNodeIdShort::zero();
unsigned mode_ = 0;
int attempt_idx_;
bool allow_repeat_collation_ = false;
@ -97,7 +98,8 @@ class Collator final : public td::actor::Actor {
Collator(ShardIdFull shard, bool is_hardfork, BlockIdExt min_masterchain_block_id, std::vector<BlockIdExt> prev,
Ref<ValidatorSet> validator_set, Ed25519_PublicKey collator_id, Ref<CollatorOptions> collator_opts,
td::actor::ActorId<ValidatorManager> manager, td::Timestamp timeout, td::Promise<BlockCandidate> promise,
td::CancellationToken cancellation_token, unsigned mode, int attempt_idx);
adnl::AdnlNodeIdShort collator_node_id, td::CancellationToken cancellation_token, unsigned mode,
int attempt_idx);
~Collator() override = default;
bool is_busy() const {
return busy_;

View file

@ -77,6 +77,7 @@ static inline bool dbg(int c) {
* @param manager The ActorId of the ValidatorManager.
* @param timeout The timeout for the collator.
* @param promise The promise to return the result.
* @param collator_node_id ADNL id of the collator node that generates the block (zero if it's not a collator node)
* @param cancellation_token Token to cancel collation.
* @param mode +1 - skip storing candidate to disk, +2 - called from CollatorNode.
* @param attempt_idx The index of the attempt, starting from 0. On later attempts collator decreases block limits and skips some steps.
@ -84,8 +85,8 @@ static inline bool dbg(int c) {
Collator::Collator(ShardIdFull shard, bool is_hardfork, BlockIdExt min_masterchain_block_id,
std::vector<BlockIdExt> prev, td::Ref<ValidatorSet> validator_set, Ed25519_PublicKey collator_id,
Ref<CollatorOptions> collator_opts, td::actor::ActorId<ValidatorManager> manager,
td::Timestamp timeout, td::Promise<BlockCandidate> promise, td::CancellationToken cancellation_token,
unsigned mode, int attempt_idx)
td::Timestamp timeout, td::Promise<BlockCandidate> promise, adnl::AdnlNodeIdShort collator_node_id,
td::CancellationToken cancellation_token, unsigned mode, int attempt_idx)
: shard_(shard)
, is_hardfork_(is_hardfork)
, min_mc_block_id{min_masterchain_block_id}
@ -100,6 +101,7 @@ Collator::Collator(ShardIdFull shard, bool is_hardfork, BlockIdExt min_mastercha
, soft_timeout_(td::Timestamp::at(timeout.at() - 3.0))
, medium_timeout_(td::Timestamp::at(timeout.at() - 1.5))
, main_promise(std::move(promise))
, collator_node_id_(collator_node_id)
, mode_(mode)
, attempt_idx_(attempt_idx)
, perf_timer_("collate", 0.1,
@ -374,8 +376,8 @@ bool Collator::fatal_error(td::Status error) {
!is_hardfork_ && !timeout.is_in_past()) {
LOG(WARNING) << "Repeating collation (attempt #" << attempt_idx_ + 1 << ")";
run_collate_query(shard_, min_mc_block_id, prev_blocks, created_by_, validator_set_, collator_opts_, manager,
td::Timestamp::in(10.0), std::move(main_promise), std::move(cancellation_token_), mode_,
attempt_idx_ + 1);
td::Timestamp::in(10.0), std::move(main_promise), collator_node_id_,
std::move(cancellation_token_), mode_, attempt_idx_ + 1);
} else {
LOG(INFO) << "collation failed in " << perf_timer_.elapsed() << " s " << error;
LOG(INFO) << perf_log_;
@ -5927,8 +5929,15 @@ bool Collator::create_block_candidate() {
double work_time = work_timer_.elapsed();
double cpu_work_time = cpu_work_timer_.elapsed();
LOG(WARNING) << "Collate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s";
stats_.block_id = block_candidate->id;
stats_.collated_data_hash = block_candidate->collated_file_hash;
stats_.cc_seqno = validator_set_->get_catchain_seqno();
stats_.collated_at = td::Clocks::system();
stats_.actual_bytes = block_candidate->data.size();
stats_.actual_collated_data_bytes = block_candidate->collated_data.size();
stats_.attempt = attempt_idx_;
stats_.collator_node_id = collator_node_id_.bits256_value();
stats_.validator_id = created_by_.as_bits256();
stats_.estimated_bytes = block_limit_status_->estimate_block_size();
stats_.gas = block_limit_status_->gas_used;
stats_.lt_delta = block_limit_status_->cur_lt - block_limit_status_->limits.start_lt;
@ -5938,20 +5947,12 @@ bool Collator::create_block_candidate() {
stats_.cat_lt_delta = block_limit_status_->limits.classify_lt(block_limit_status_->cur_lt);
stats_.cat_collated_data_bytes =
block_limit_status_->limits.classify_collated_data_size(stats_.estimated_collated_data_bytes);
stats_.total_time = perf_timer_.elapsed();
stats_.work_time = work_time;
stats_.cpu_work_time = cpu_work_time;
stats_.time_stats = (PSTRING() << perf_log_);
// TODO: remove this later (currently needed to collect stats)
if (mode_ & CollateMode::from_collator_node) {
size_t d;
stats_.serialized_size =
validatorsession::compress_candidate_data(block_candidate->data, block_candidate->collated_data, d).ok().size();
stats_.serialized_size_no_collated_data =
validatorsession::compress_candidate_data(block_candidate->data, td::Slice{}, d).ok().size();
}
td::actor::send_closure(manager, &ValidatorManager::record_collate_query_stats, block_candidate->id,
std::move(stats_));
td::actor::send_closure(manager, &ValidatorManager::log_collate_query_stats, std::move(stats_));
return true;
}

View file

@ -216,7 +216,8 @@ void run_collate_query(ShardIdFull shard, const BlockIdExt& min_masterchain_bloc
Ed25519_PublicKey creator, td::Ref<ValidatorSet> validator_set,
td::Ref<CollatorOptions> collator_opts, td::actor::ActorId<ValidatorManager> manager,
td::Timestamp timeout, td::Promise<BlockCandidate> promise,
td::CancellationToken cancellation_token, unsigned mode, int attempt_idx) {
adnl::AdnlNodeIdShort collator_node_id, td::CancellationToken cancellation_token, unsigned mode,
int attempt_idx) {
BlockSeqno seqno = 0;
for (auto& p : prev) {
if (p.seqno() > seqno) {
@ -227,7 +228,7 @@ void run_collate_query(ShardIdFull shard, const BlockIdExt& min_masterchain_bloc
<< (attempt_idx ? "_" + td::to_string(attempt_idx) : ""),
shard, false, min_masterchain_block_id, std::move(prev), std::move(validator_set),
creator, std::move(collator_opts), std::move(manager), timeout, std::move(promise),
std::move(cancellation_token), mode, attempt_idx)
collator_node_id, std::move(cancellation_token), mode, attempt_idx)
.release();
}
@ -240,10 +241,10 @@ void run_collate_hardfork(ShardIdFull shard, const BlockIdExt& min_masterchain_b
seqno = p.seqno();
}
}
td::actor::create_actor<Collator>(PSTRING() << "collate" << shard.to_str() << ":" << (seqno + 1), shard, true,
min_masterchain_block_id, std::move(prev), td::Ref<ValidatorSet>{},
Ed25519_PublicKey{Bits256::zero()}, td::Ref<CollatorOptions>{true},
std::move(manager), timeout, std::move(promise), td::CancellationToken{}, 0, 0)
td::actor::create_actor<Collator>(
PSTRING() << "collate" << shard.to_str() << ":" << (seqno + 1), shard, true, min_masterchain_block_id,
std::move(prev), td::Ref<ValidatorSet>{}, Ed25519_PublicKey{Bits256::zero()}, td::Ref<CollatorOptions>{true},
std::move(manager), timeout, std::move(promise), adnl::AdnlNodeIdShort::zero(), td::CancellationToken{}, 0, 0)
.release();
}

View file

@ -116,13 +116,12 @@ bool ValidateQuery::reject_query(std::string error, td::BufferSlice reason) {
error = error_ctx() + error;
LOG(ERROR) << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error;
if (main_promise) {
record_stats();
record_stats(false, error);
errorlog::ErrorLog::log(PSTRING() << "REJECT: aborting validation of block candidate for " << shard_.to_str()
<< " : " << error << ": data=" << block_candidate.id.file_hash.to_hex()
<< " collated_data=" << block_candidate.collated_file_hash.to_hex());
errorlog::ErrorLog::log_file(block_candidate.data.clone());
errorlog::ErrorLog::log_file(block_candidate.collated_data.clone());
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(CandidateReject{std::move(error), std::move(reason)});
}
stop();
@ -155,13 +154,12 @@ bool ValidateQuery::soft_reject_query(std::string error, td::BufferSlice reason)
error = error_ctx() + error;
LOG(ERROR) << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error;
if (main_promise) {
record_stats();
record_stats(false, error);
errorlog::ErrorLog::log(PSTRING() << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str()
<< " : " << error << ": data=" << block_candidate.id.file_hash.to_hex()
<< " collated_data=" << block_candidate.collated_file_hash.to_hex());
errorlog::ErrorLog::log_file(block_candidate.data.clone());
errorlog::ErrorLog::log_file(block_candidate.collated_data.clone());
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(CandidateReject{std::move(error), std::move(reason)});
}
stop();
@ -179,7 +177,7 @@ bool ValidateQuery::fatal_error(td::Status error) {
error.ensure_error();
LOG(ERROR) << "aborting validation of block candidate for " << shard_.to_str() << " : " << error.to_string();
if (main_promise) {
record_stats();
record_stats(false, error.message().str());
auto c = error.code();
if (c <= -667 && c >= -670) {
errorlog::ErrorLog::log(PSTRING() << "FATAL ERROR: aborting validation of block candidate for " << shard_.to_str()
@ -188,7 +186,6 @@ bool ValidateQuery::fatal_error(td::Status error) {
errorlog::ErrorLog::log_file(block_candidate.data.clone());
errorlog::ErrorLog::log_file(block_candidate.collated_data.clone());
}
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise(std::move(error));
}
stop();
@ -238,9 +235,8 @@ bool ValidateQuery::fatal_error(std::string err_msg, int err_code) {
*/
void ValidateQuery::finish_query() {
if (main_promise) {
record_stats();
record_stats(true);
LOG(WARNING) << "validate query done";
LOG(WARNING) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(now_);
}
stop();
@ -7061,13 +7057,25 @@ void ValidateQuery::written_candidate() {
/**
* Sends validation work time to manager.
*/
void ValidateQuery::record_stats() {
double work_time = work_timer_.elapsed();
double cpu_work_time = cpu_work_timer_.elapsed();
void ValidateQuery::record_stats(bool valid, std::string error_message) {
ValidationStats stats;
stats.block_id = id_;
stats.collated_data_hash = block_candidate.collated_file_hash;
stats.validated_at = td::Clocks::system();
stats.valid = valid;
if (valid) {
stats.comment = (PSTRING() << "OK ts=" << now_);
} else {
stats.comment = std::move(error_message);
}
stats.actual_bytes = block_candidate.data.size();
stats.actual_collated_data_bytes = block_candidate.collated_data.size();
stats.total_time = perf_timer_.elapsed();
stats.work_time = work_timer_.elapsed();
stats.cpu_work_time = cpu_work_timer_.elapsed();
LOG(WARNING) << "validation took " << perf_timer_.elapsed() << "s";
LOG(WARNING) << "Validate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s";
td::actor::send_closure(manager, &ValidatorManager::record_validate_query_stats, block_candidate.id, work_time,
cpu_work_time);
LOG(WARNING) << "Validate query work time = " << stats.work_time << "s, cpu time = " << stats.cpu_work_time << "s";
td::actor::send_closure(manager, &ValidatorManager::log_validate_query_stats, std::move(stats));
}
} // namespace validator

View file

@ -407,7 +407,7 @@ class ValidateQuery : public td::actor::Actor {
td::Timer work_timer_{true};
td::ThreadCpuTimer cpu_work_timer_{true};
void record_stats();
void record_stats(bool valid, std::string error_message = "");
};
} // namespace validator