diff --git a/catchain/catchain.h b/catchain/catchain.h index 912957e5..c5c8af28 100644 --- a/catchain/catchain.h +++ b/catchain/catchain.h @@ -96,6 +96,7 @@ class CatChain : public td::actor::Actor { virtual void send_query_via(const PublicKeyHash &dst, std::string name, td::Promise promise, td::Timestamp timeout, td::BufferSlice query, td::uint64 max_answer_size, td::actor::ActorId via) = 0; + virtual void get_source_heights(td::Promise> promise) = 0; virtual void destroy() = 0; static td::actor::ActorOwn create(std::unique_ptr callback, const CatChainOptions &opts, diff --git a/catchain/catchain.hpp b/catchain/catchain.hpp index 8c8bb99a..586cf474 100644 --- a/catchain/catchain.hpp +++ b/catchain/catchain.hpp @@ -115,6 +115,15 @@ class CatChainImpl : public CatChain { td::actor::send_closure(receiver_, &CatChainReceiverInterface::send_custom_query_data_via, dst, name, std::move(promise), timeout, std::move(query), max_answer_size, via); } + void get_source_heights(td::Promise> promise) override { + std::vector heights(top_source_blocks_.size(), 0); + for (size_t i = 0; i < top_source_blocks_.size(); ++i) { + if (top_source_blocks_[i]) { + heights[i] = top_source_blocks_[i]->height(); + } + } + promise.set_result(std::move(heights)); + } void destroy() override; CatChainImpl(std::unique_ptr callback, const CatChainOptions &opts, td::actor::ActorId keyring, td::actor::ActorId adnl, diff --git a/dht-server/dht-server.cpp b/dht-server/dht-server.cpp index 6e7c9b8b..006e7408 100644 --- a/dht-server/dht-server.cpp +++ b/dht-server/dht-server.cpp @@ -1233,15 +1233,19 @@ int main(int argc, char *argv[]) { }); td::uint32 threads = 7; p.add_checked_option( - 't', "threads", PSTRING() << "number of threads (default=" << threads << ")", [&](td::Slice fname) { + 't', "threads", PSTRING() << "number of threads (default=" << threads << ")", [&](td::Slice arg) { td::int32 v; try { - v = std::stoi(fname.str()); + v = std::stoi(arg.str()); } catch (...) { return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: not a number"); } - if (v < 1 || v > 256) { - return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: should be in range [1..256]"); + if (v <= 0) { + return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: should be > 0"); + } + if (v > 127) { + LOG(WARNING) << "`--threads " << v << "` is too big, effective value will be 127"; + v = 127; } threads = v; return td::Status::OK(); diff --git a/tdutils/td/utils/Timer.cpp b/tdutils/td/utils/Timer.cpp index 1f72fba9..24de099a 100644 --- a/tdutils/td/utils/Timer.cpp +++ b/tdutils/td/utils/Timer.cpp @@ -91,4 +91,47 @@ double PerfWarningTimer::elapsed() const { return Time::now() - start_at_; } +static double thread_cpu_clock() { +#if defined(CLOCK_THREAD_CPUTIME_ID) + timespec ts; + int result = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); + CHECK(result == 0); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1e-9; +#else + return 0.0; // TODO: MacOS and Windows support (currently cpu timer is used only in validators) +#endif +} + +ThreadCpuTimer::ThreadCpuTimer(bool is_paused) : is_paused_(is_paused) { + if (is_paused_) { + start_time_ = 0; + } else { + start_time_ = thread_cpu_clock(); + } +} + +void ThreadCpuTimer::pause() { + if (is_paused_) { + return; + } + elapsed_ += thread_cpu_clock() - start_time_; + is_paused_ = true; +} + +void ThreadCpuTimer::resume() { + if (!is_paused_) { + return; + } + start_time_ = thread_cpu_clock(); + is_paused_ = false; +} + +double ThreadCpuTimer::elapsed() const { + double res = elapsed_; + if (!is_paused_) { + res += thread_cpu_clock() - start_time_; + } + return res; +} + } // namespace td diff --git a/tdutils/td/utils/Timer.h b/tdutils/td/utils/Timer.h index 3e0cafbf..a27cac8a 100644 --- a/tdutils/td/utils/Timer.h +++ b/tdutils/td/utils/Timer.h @@ -62,4 +62,22 @@ class PerfWarningTimer { std::function callback_; }; +class ThreadCpuTimer { + public: + ThreadCpuTimer() : ThreadCpuTimer(false) { + } + explicit ThreadCpuTimer(bool is_paused); + ThreadCpuTimer(const ThreadCpuTimer &other) = default; + ThreadCpuTimer &operator=(const ThreadCpuTimer &other) = default; + + double elapsed() const; + void pause(); + void resume(); + + private: + double elapsed_{0}; + double start_time_; + bool is_paused_{false}; +}; + } // namespace td diff --git a/tl/generate/scheme/ton_api.tl b/tl/generate/scheme/ton_api.tl index cc2cc67e..57ee17a0 100644 --- a/tl/generate/scheme/ton_api.tl +++ b/tl/generate/scheme/ton_api.tl @@ -400,8 +400,8 @@ tonNode.newShardBlock block:tonNode.blockIdExt cc_seqno:int data:bytes = tonNode tonNode.blockBroadcastCompressed.data signatures:(vector tonNode.blockSignature) proof_data:bytes = tonNode.blockBroadcaseCompressed.Data; -tonNode.blockBroadcast id:tonNode.blockIdExt catchain_seqno:int validator_set_hash:int - signatures:(vector tonNode.blockSignature) +tonNode.blockBroadcast id:tonNode.blockIdExt catchain_seqno:int validator_set_hash:int + signatures:(vector tonNode.blockSignature) proof:bytes data:bytes = tonNode.Broadcast; tonNode.blockBroadcastCompressed id:tonNode.blockIdExt catchain_seqno:int validator_set_hash:int flags:# compressed:bytes = tonNode.Broadcast; @@ -832,13 +832,19 @@ http.server.config dhs:(vector http.server.dnsEntry) local_hosts:(vector http.se ---types--- -validatorSession.statsProducer id:int256 candidate_id:int256 block_status:int comment:string - block_timestamp:double is_accepted:Bool is_ours:Bool got_submit_at:double +validatorSession.collationStats bytes:int gas:int lt_delta:int cat_bytes:int cat_gas:int cat_lt_delta:int + limits_log:string ext_msgs_total:int ext_msgs_filtered:int ext_msgs_accepted:int ext_msgs_rejected:int = validadorSession.CollationStats; + +validatorSession.statsProducer id:int256 candidate_id:int256 block_status:int root_hash:int256 file_hash:int256 + comment:string block_timestamp:double is_accepted:Bool is_ours:Bool got_submit_at:double collation_time:double collated_at:double collation_cached:Bool + collation_work_time:double collation_cpu_work_time:double + collation_stats:validatorSession.collationStats validation_time:double validated_at:double validation_cached:Bool + validation_work_time:double validation_cpu_work_time:double gen_utime:double - approved_weight:long approved_33pct_at:double approved_66pct_at:double - signed_weight:long signed_33pct_at:double signed_66pct_at:double + approved_weight:long approved_33pct_at:double approved_66pct_at:double approvers:string + signed_weight:long signed_33pct_at:double signed_66pct_at:double signers:string serialize_time:double deserialize_time:double serialized_size:int = validatorSession.StatsProducer; validatorSession.statsRound timestamp:double producers:(vector validatorSession.statsProducer) = validatorSession.StatsRound; @@ -854,9 +860,14 @@ collatorNode.generateBlockSuccess candidate:collatorNode.Candidate = collatorNod collatorNode.generateBlockError code:int message:string = collatorNode.GenerateBlockResult; validatorSession.newValidatorGroupStats.node id:int256 weight:long = validatorSession.newValidatorGroupStats.Node; -validatorSession.newValidatorGroupStats session_id:int256 workchain:int shard:long cc_seqno:int timestamp:double +validatorSession.newValidatorGroupStats session_id:int256 workchain:int shard:long cc_seqno:int + last_key_block_seqno:int timestamp:double self_idx:int nodes:(vector validatorSession.newValidatorGroupStats.node) = validatorSession.NewValidatorGroupStats; +validatorSession.endValidatorGroupStats.node id:int256 catchain_blocks:int = validatorSession.endValidatorGroupStats.Node; +validatorSession.endValidatorGroupStats session_id:int256 timestamp:double + nodes:(vector validatorSession.endValidatorGroupStats.node) = validatorSession.EndValidatorGroupStats; + ---functions--- collatorNode.generateBlock shard:tonNode.shardId cc_seqno:int prev_blocks:(vector tonNode.blockIdExt) creator:int256 = collatorNode.GenerateBlockResult; diff --git a/tl/generate/scheme/ton_api.tlo b/tl/generate/scheme/ton_api.tlo index b788ae4f..466783d7 100644 Binary files a/tl/generate/scheme/ton_api.tlo and b/tl/generate/scheme/ton_api.tlo differ diff --git a/validator-engine/validator-engine.cpp b/validator-engine/validator-engine.cpp index 705439bc..00e98e68 100644 --- a/validator-engine/validator-engine.cpp +++ b/validator-engine/validator-engine.cpp @@ -4639,15 +4639,19 @@ int main(int argc, char *argv[]) { }); td::uint32 threads = 7; p.add_checked_option( - 't', "threads", PSTRING() << "number of threads (default=" << threads << ")", [&](td::Slice fname) { + 't', "threads", PSTRING() << "number of threads (default=" << threads << ")", [&](td::Slice arg) { td::int32 v; try { - v = std::stoi(fname.str()); + v = std::stoi(arg.str()); } catch (...) { return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: not a number"); } - if (v < 1 || v > 256) { - return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: should be in range [1..256]"); + if (v <= 0) { + return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: should be > 0"); + } + if (v > 127) { + LOG(WARNING) << "`--threads " << v << "` is too big, effective value will be 127"; + v = 127; } threads = v; return td::Status::OK(); diff --git a/validator-session/validator-session-types.h b/validator-session/validator-session-types.h index e13c36d2..78a9b246 100644 --- a/validator-session/validator-session-types.h +++ b/validator-session/validator-session-types.h @@ -77,6 +77,8 @@ struct ValidatorSessionStats { ValidatorSessionCandidateId candidate_id = ValidatorSessionCandidateId::zero(); int block_status = status_none; double block_timestamp = -1.0; + td::Bits256 root_hash = td::Bits256::zero(); + td::Bits256 file_hash = td::Bits256::zero(); std::string comment; bool is_accepted = false; @@ -159,11 +161,23 @@ struct NewValidatorGroupStats { ValidatorSessionId session_id = ValidatorSessionId::zero(); ShardIdFull shard{masterchainId}; CatchainSeqno cc_seqno = 0; + BlockSeqno last_key_block_seqno = 0; double timestamp = -1.0; td::uint32 self_idx = 0; std::vector nodes; }; +struct EndValidatorGroupStats { + struct Node { + PublicKeyHash id = PublicKeyHash::zero(); + td::uint32 catchain_blocks = 0; + }; + + ValidatorSessionId session_id = ValidatorSessionId::zero(); + double timestamp = -1.0; + std::vector nodes; +}; + } // namespace validatorsession } // namespace ton diff --git a/validator-session/validator-session.cpp b/validator-session/validator-session.cpp index 186d6600..07fb5994 100644 --- a/validator-session/validator-session.cpp +++ b/validator-session/validator-session.cpp @@ -271,6 +271,8 @@ void ValidatorSessionImpl::process_broadcast(PublicKeyHash src, td::BufferSlice } stat->deserialize_time = deserialize_time; stat->serialized_size = data.size(); + stat->root_hash = candidate->root_hash_; + stat->file_hash = file_hash; } if ((td::int32)block_round < (td::int32)cur_round_ - MAX_PAST_ROUND_BLOCK || @@ -469,6 +471,8 @@ void ValidatorSessionImpl::generated_block(td::uint32 round, ValidatorSessionCan stat->collated_at = td::Clocks::system(); stat->block_timestamp = td::Clocks::system(); stat->collation_cached = collation_cached; + stat->root_hash = root_hash; + stat->file_hash = file_hash; } if (round != cur_round_) { return; @@ -603,6 +607,8 @@ void ValidatorSessionImpl::try_approve_block(const SentBlock *block) { if (stat->block_timestamp <= 0.0) { stat->block_timestamp = td::Clocks::system(); } + stat->root_hash = B->root_hash_; + stat->file_hash = td::sha256_bits256(B->data_); } auto P = td::PromiseCreator::lambda([round = cur_round_, hash = block_id, root_hash = block->get_root_hash(), @@ -998,6 +1004,29 @@ void ValidatorSessionImpl::get_current_stats(td::Promise promise.set_result(cur_stats_); } +void ValidatorSessionImpl::get_end_stats(td::Promise promise) { + if (!started_) { + promise.set_error(td::Status::Error(ErrorCode::notready, "not started")); + return; + } + EndValidatorGroupStats stats; + stats.session_id = unique_hash_; + stats.timestamp = td::Clocks::system(); + stats.nodes.resize(description().get_total_nodes()); + for (size_t i = 0; i < stats.nodes.size(); ++i) { + stats.nodes[i].id = description().get_source_id(i); + } + td::actor::send_closure(catchain_, &catchain::CatChain::get_source_heights, + [promise = std::move(promise), + stats = std::move(stats)](td::Result> R) mutable { + TRY_RESULT_PROMISE(promise, heights, std::move(R)); + for (size_t i = 0; i < std::min(heights.size(), stats.nodes.size()); ++i) { + stats.nodes[i].catchain_blocks = heights[i]; + } + promise.set_result(std::move(stats)); + }); +} + void ValidatorSessionImpl::get_validator_group_info_for_litequery( td::uint32 cur_round, td::Promise>> promise) { diff --git a/validator-session/validator-session.h b/validator-session/validator-session.h index 0870f671..2e1ed9b1 100644 --- a/validator-session/validator-session.h +++ b/validator-session/validator-session.h @@ -105,6 +105,7 @@ class ValidatorSession : public td::actor::Actor { virtual void start() = 0; virtual void destroy() = 0; virtual void get_current_stats(td::Promise promise) = 0; + virtual void get_end_stats(td::Promise promise) = 0; virtual void get_validator_group_info_for_litequery( td::uint32 cur_round, td::Promise>> promise) = 0; diff --git a/validator-session/validator-session.hpp b/validator-session/validator-session.hpp index 58058282..2ee4885b 100644 --- a/validator-session/validator-session.hpp +++ b/validator-session/validator-session.hpp @@ -187,6 +187,7 @@ class ValidatorSessionImpl : public ValidatorSession { void start() override; void destroy() override; void get_current_stats(td::Promise promise) override; + void get_end_stats(td::Promise promise) override; void get_validator_group_info_for_litequery( td::uint32 cur_round, td::Promise>> promise) override; diff --git a/validator/impl/collator-impl.h b/validator/impl/collator-impl.h index e0e56a50..7754430f 100644 --- a/validator/impl/collator-impl.h +++ b/validator/impl/collator-impl.h @@ -372,6 +372,11 @@ class Collator final : public td::actor::Actor { public: static td::uint32 get_skip_externals_queue_size(); + + private: + td::Timer work_timer_{true}; + td::ThreadCpuTimer cpu_work_timer_{true}; + CollationStats stats_; }; } // namespace validator diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index aa30025b..f4b990e1 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -1855,6 +1855,12 @@ bool Collator::register_shard_block_creators(std::vector creator_li * @returns True if collation is successful, false otherwise. */ bool Collator::try_collate() { + work_timer_.resume(); + cpu_work_timer_.resume(); + SCOPE_EXIT { + work_timer_.pause(); + cpu_work_timer_.pause(); + }; if (!preinit_complete) { LOG(WARNING) << "running do_preinit()"; if (!do_preinit()) { @@ -3611,6 +3617,29 @@ bool Collator::process_inbound_message(Ref enq_msg, ton::LogicalT return true; } +/** + * Creates a string that explains which limit is exceeded. Used for collator stats. + * + * @param block_limit_status Status of block limits. + * @param cls Which limit class is exceeded. + * + * @returns String for collator stats. + */ +static std::string block_full_comment(const block::BlockLimitStatus& block_limit_status, unsigned cls) { + auto bytes = block_limit_status.estimate_block_size(); + if (!block_limit_status.limits.bytes.fits(cls, bytes)) { + return PSTRING() << "block_full bytes " << bytes; + } + if (!block_limit_status.limits.gas.fits(cls, block_limit_status.gas_used)) { + return PSTRING() << "block_full gas " << block_limit_status.gas_used; + } + auto lt_delta = block_limit_status.cur_lt - block_limit_status.limits.start_lt; + if (!block_limit_status.limits.lt_delta.fits(cls, lt_delta)) { + return PSTRING() << "block_full lt_delta " << lt_delta; + } + return ""; +} + /** * Processes inbound internal messages from message queues of the neighbors. * Messages are processed until the normal limit is reached, soft timeout is reached or there are no more messages. @@ -3642,11 +3671,14 @@ bool Collator::process_inbound_internal_messages() { } if (block_full_) { LOG(INFO) << "BLOCK FULL, stop processing inbound internal messages"; + stats_.limits_log += PSTRING() << "INBOUND_INT_MESSAGES: " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) << "\n"; break; } if (soft_timeout_.is_in_past(td::Timestamp::now())) { block_full_ = true; LOG(WARNING) << "soft timeout reached, stop processing inbound internal messages"; + stats_.limits_log += PSTRING() << "INBOUND_INT_MESSAGES: timeout\n"; break; } if (!check_cancelled()) { @@ -3695,10 +3727,13 @@ bool Collator::process_inbound_external_messages() { } if (full) { LOG(INFO) << "BLOCK FULL, stop processing external messages"; + stats_.limits_log += PSTRING() << "INBOUND_EXT_MESSAGES: " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_soft) << "\n"; break; } if (medium_timeout_.is_in_past(td::Timestamp::now())) { LOG(WARNING) << "medium timeout reached, stop processing inbound external messages"; + stats_.limits_log += PSTRING() << "INBOUND_EXT_MESSAGES: timeout\n"; break; } if (!check_cancelled()) { @@ -3707,6 +3742,11 @@ bool Collator::process_inbound_external_messages() { auto ext_msg = ext_msg_struct.cell; ton::Bits256 hash{ext_msg->get_hash().bits()}; int r = process_external_message(std::move(ext_msg)); + if (r > 0) { + ++stats_.ext_msgs_accepted; + } else { + ++stats_.ext_msgs_rejected; + } if (r < 0) { bad_ext_msgs_.emplace_back(ext_msg_struct.hash); return false; @@ -3812,11 +3852,15 @@ bool Collator::process_dispatch_queue() { block_full_ = !block_limit_status_->fits(block::ParamLimits::cl_normal); if (block_full_) { LOG(INFO) << "BLOCK FULL, stop processing dispatch queue"; + stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) + << "\n"; return true; } if (soft_timeout_.is_in_past(td::Timestamp::now())) { block_full_ = true; LOG(WARNING) << "soft timeout reached, stop processing dispatch queue"; + stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": timeout\n"; return true; } StdSmcAddress src_addr; @@ -3866,6 +3910,7 @@ bool Collator::process_dispatch_queue() { ++total_count; if (total_count >= max_total_count[iter]) { dispatch_queue_total_limit_reached_ = true; + stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": total limit reached\n"; break; } } @@ -4215,6 +4260,8 @@ bool Collator::process_new_messages(bool enqueue_only) { if ((block_full_ || have_unprocessed_account_dispatch_queue_) && !enqueue_only) { LOG(INFO) << "BLOCK FULL, enqueue all remaining new messages"; enqueue_only = true; + stats_.limits_log += PSTRING() << "NEW_MESSAGES: " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) << "\n"; } if (!check_cancelled()) { return false; @@ -4226,6 +4273,8 @@ bool Collator::process_new_messages(bool enqueue_only) { } else if (res == 3) { LOG(INFO) << "All remaining new messages must be enqueued (BLOCK FULL)"; enqueue_only = true; + stats_.limits_log += PSTRING() << "NEW_MESSAGES: " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) << "\n"; } } return true; @@ -5665,6 +5714,18 @@ bool Collator::create_block_candidate() { td::actor::send_closure_later(manager, &ValidatorManager::complete_external_messages, std::move(delay_ext_msgs_), std::move(bad_ext_msgs_)); } + + double work_time = work_timer_.elapsed(); + double cpu_work_time = cpu_work_timer_.elapsed(); + LOG(WARNING) << "Collate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s"; + stats_.bytes = block_limit_status_->estimate_block_size(); + stats_.gas = block_limit_status_->gas_used; + stats_.lt_delta = block_limit_status_->cur_lt - block_limit_status_->limits.start_lt; + stats_.cat_bytes = block_limit_status_->limits.classify_size(stats_.bytes); + stats_.cat_gas = block_limit_status_->limits.classify_gas(stats_.gas); + stats_.cat_lt_delta = block_limit_status_->limits.classify_lt(block_limit_status_->cur_lt); + td::actor::send_closure(manager, &ValidatorManager::record_collate_query_stats, block_candidate->id, work_time, + cpu_work_time, std::move(stats_)); return true; } @@ -5770,6 +5831,7 @@ void Collator::after_get_external_messages(td::Result ext_msg_cell = ext_msg->root_cell(); @@ -5781,6 +5843,7 @@ void Collator::after_get_external_messages(td::Resulthash()); } } diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index 469708e0..2e575806 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -117,6 +117,7 @@ bool ValidateQuery::reject_query(std::string error, td::BufferSlice reason) { error = error_ctx() + error; LOG(ERROR) << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error; if (main_promise) { + record_stats(); errorlog::ErrorLog::log(PSTRING() << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error << ": data=" << block_candidate.id.file_hash.to_hex() << " collated_data=" << block_candidate.collated_file_hash.to_hex()); @@ -155,6 +156,7 @@ bool ValidateQuery::soft_reject_query(std::string error, td::BufferSlice reason) error = error_ctx() + error; LOG(ERROR) << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error; if (main_promise) { + record_stats(); errorlog::ErrorLog::log(PSTRING() << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error << ": data=" << block_candidate.id.file_hash.to_hex() << " collated_data=" << block_candidate.collated_file_hash.to_hex()); @@ -178,6 +180,7 @@ bool ValidateQuery::fatal_error(td::Status error) { error.ensure_error(); LOG(ERROR) << "aborting validation of block candidate for " << shard_.to_str() << " : " << error.to_string(); if (main_promise) { + record_stats(); auto c = error.code(); if (c <= -667 && c >= -670) { errorlog::ErrorLog::log(PSTRING() << "FATAL ERROR: aborting validation of block candidate for " << shard_.to_str() @@ -236,6 +239,7 @@ bool ValidateQuery::fatal_error(std::string err_msg, int err_code) { */ void ValidateQuery::finish_query() { if (main_promise) { + record_stats(); LOG(WARNING) << "validate query done"; LOG(WARNING) << "validation took " << perf_timer_.elapsed() << " s"; main_promise.set_result(now_); @@ -6901,6 +6905,12 @@ bool ValidateQuery::try_validate() { if (pending) { return true; } + work_timer_.resume(); + cpu_work_timer_.resume(); + SCOPE_EXIT { + work_timer_.pause(); + cpu_work_timer_.pause(); + }; try { if (!stage_) { LOG(WARNING) << "try_validate stage 0"; @@ -7040,6 +7050,17 @@ void ValidateQuery::written_candidate() { finish_query(); } +/** + * Sends validation work time to manager. + */ +void ValidateQuery::record_stats() { + double work_time = work_timer_.elapsed(); + double cpu_work_time = cpu_work_timer_.elapsed(); + LOG(WARNING) << "Validate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s"; + td::actor::send_closure(manager, &ValidatorManager::record_validate_query_stats, block_candidate.id, work_time, + cpu_work_time); +} + } // namespace validator } // namespace ton diff --git a/validator/impl/validate-query.hpp b/validator/impl/validate-query.hpp index cbabdf07..a21a56e9 100644 --- a/validator/impl/validate-query.hpp +++ b/validator/impl/validate-query.hpp @@ -404,6 +404,10 @@ class ValidateQuery : public td::actor::Actor { } return true; } + + td::Timer work_timer_{true}; + td::ThreadCpuTimer cpu_work_timer_{true}; + void record_stats(); }; } // namespace validator diff --git a/validator/interfaces/validator-manager.h b/validator/interfaces/validator-manager.h index 1ee02c48..15965fc7 100644 --- a/validator/interfaces/validator-manager.h +++ b/validator/interfaces/validator-manager.h @@ -53,6 +53,16 @@ struct AsyncSerializerState { UnixTime last_written_block_ts; }; +struct CollationStats { + td::uint32 bytes, gas, lt_delta; + int cat_bytes, cat_gas, cat_lt_delta; + std::string limits_log; + td::uint32 ext_msgs_total = 0; + td::uint32 ext_msgs_filtered = 0; + td::uint32 ext_msgs_accepted = 0; + td::uint32 ext_msgs_rejected = 0; +}; + using ValidateCandidateResult = td::Variant; class ValidatorManager : public ValidatorManagerInterface { @@ -176,6 +186,7 @@ class ValidatorManager : public ValidatorManagerInterface { virtual void log_validator_session_stats(BlockIdExt block_id, validatorsession::ValidatorSessionStats stats) = 0; virtual void log_new_validator_group_stats(validatorsession::NewValidatorGroupStats stats) = 0; + virtual void log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) = 0; virtual void get_block_handle_for_litequery(BlockIdExt block_id, td::Promise promise) = 0; virtual void get_block_data_for_litequery(BlockIdExt block_id, td::Promise> promise) = 0; @@ -195,6 +206,12 @@ class ValidatorManager : public ValidatorManagerInterface { virtual void add_lite_query_stats(int lite_query_id) { } + virtual void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, + CollationStats stats) { + } + virtual void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { + } + virtual void add_persistent_state_description(td::Ref desc) = 0; static bool is_persistent_state(UnixTime ts, UnixTime prev_ts) { diff --git a/validator/manager-disk.hpp b/validator/manager-disk.hpp index f4f68468..8b5e2bd7 100644 --- a/validator/manager-disk.hpp +++ b/validator/manager-disk.hpp @@ -401,6 +401,9 @@ class ValidatorManagerImpl : public ValidatorManager { void log_new_validator_group_stats(validatorsession::NewValidatorGroupStats stats) override { UNREACHABLE(); } + void log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) override { + UNREACHABLE(); + } void get_out_msg_queue_size(BlockIdExt block_id, td::Promise promise) override { if (queue_size_counter_.empty()) { queue_size_counter_ = td::actor::create_actor("queuesizecounter", td::Ref{}, diff --git a/validator/manager-hardfork.hpp b/validator/manager-hardfork.hpp index 371c7570..e1c8c459 100644 --- a/validator/manager-hardfork.hpp +++ b/validator/manager-hardfork.hpp @@ -464,6 +464,9 @@ class ValidatorManagerImpl : public ValidatorManager { void log_new_validator_group_stats(validatorsession::NewValidatorGroupStats stats) override { UNREACHABLE(); } + void log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) override { + UNREACHABLE(); + } void get_out_msg_queue_size(BlockIdExt block_id, td::Promise promise) override { if (queue_size_counter_.empty()) { queue_size_counter_ = td::actor::create_actor("queuesizecounter", td::Ref{}, diff --git a/validator/manager.cpp b/validator/manager.cpp index d3003a83..0eeadd8a 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -42,6 +42,7 @@ #include "td/utils/JsonBuilder.h" #include "common/delay.h" +#include "td/utils/filesystem.h" #include "validator/stats-merger.h" @@ -2248,7 +2249,7 @@ void ValidatorManagerImpl::update_shards() { } new_validator_groups_.emplace(val_group_id, std::move(it2->second)); } else { - auto G = create_validator_group(val_group_id, shard, val_set, opts, started_); + auto G = create_validator_group(val_group_id, shard, val_set, key_seqno, opts, started_); if (!G.empty()) { td::actor::send_closure(G, &ValidatorGroup::start, prev, last_masterchain_block_id_); } @@ -2302,7 +2303,7 @@ void ValidatorManagerImpl::update_shards() { } new_validator_groups_.emplace(val_group_id, std::move(it2->second)); } else { - auto G = create_validator_group(val_group_id, shard, val_set, opts, started_); + auto G = create_validator_group(val_group_id, shard, val_set, key_seqno, opts, started_); if (!G.empty()) { td::actor::send_closure(G, &ValidatorGroup::start, prev, last_masterchain_block_id_); } @@ -2328,7 +2329,7 @@ void ValidatorManagerImpl::update_shards() { } else { new_next_validator_groups_.emplace( val_group_id, - ValidatorGroupEntry{create_validator_group(val_group_id, shard, val_set, opts, started_), shard}); + ValidatorGroupEntry{create_validator_group(val_group_id, shard, val_set, key_seqno, opts, started_), shard}); } } } @@ -2438,7 +2439,7 @@ ValidatorSessionId ValidatorManagerImpl::get_validator_set_id(ShardIdFull shard, } td::actor::ActorOwn ValidatorManagerImpl::create_validator_group( - ValidatorSessionId session_id, ShardIdFull shard, td::Ref validator_set, + ValidatorSessionId session_id, ShardIdFull shard, td::Ref validator_set, BlockSeqno key_seqno, validatorsession::ValidatorSessionOptions opts, bool init_session) { if (check_gc_list_.count(session_id) == 1) { return td::actor::ActorOwn{}; @@ -2449,7 +2450,7 @@ td::actor::ActorOwn ValidatorManagerImpl::create_validator_group auto validator_id = get_validator(shard, validator_set); CHECK(!validator_id.is_zero()); auto G = td::actor::create_actor( - PSTRING() << "valgroup" << shard.to_str(), shard, validator_id, session_id, validator_set, + PSTRING() << "valgroup" << shard.to_str(), shard, validator_id, session_id, validator_set, key_seqno, last_masterchain_state_->get_collator_config(true), opts, keyring_, adnl_, rldp_, overlays_, db_root_, actor_id(this), init_session, opts_->check_unsafe_resync_allowed(validator_set->get_catchain_seqno()), opts_, opts_->need_monitor(shard, last_masterchain_state_)); @@ -3055,13 +3056,35 @@ void ValidatorManagerImpl::log_validator_session_stats(BlockIdExt block_id, for (const auto &round : stats.rounds) { std::vector> producers; for (const auto &producer : round.producers) { + BlockIdExt cur_block_id{block_id.id, producer.root_hash, producer.file_hash}; + auto it = recorded_block_stats_.find(cur_block_id); + tl_object_ptr collation_stats; + if (it != recorded_block_stats_.end() && it->second.collator_stats_) { + auto &stats = it->second.collator_stats_.value(); + collation_stats = create_tl_object( + stats.bytes, stats.gas, stats.lt_delta, stats.cat_bytes, stats.cat_gas, stats.cat_lt_delta, + stats.limits_log, stats.ext_msgs_total, stats.ext_msgs_filtered, stats.ext_msgs_accepted, + stats.ext_msgs_rejected); + } + std::string approvers, signers; + for (bool x : producer.approvers) { + approvers += (x ? '1' : '0'); + } + for (bool x : producer.signers) { + signers += (x ? '1' : '0'); + } producers.push_back(create_tl_object( - producer.id.bits256_value(), producer.candidate_id, producer.block_status, producer.comment, - producer.block_timestamp, producer.is_accepted, producer.is_ours, producer.got_submit_at, - producer.collation_time, producer.collated_at, producer.collation_cached, producer.validation_time, - producer.validated_at, producer.validation_cached, producer.gen_utime, producer.approved_weight, - producer.approved_33pct_at, producer.approved_66pct_at, producer.signed_weight, producer.signed_33pct_at, - producer.signed_66pct_at, producer.serialize_time, producer.deserialize_time, producer.serialized_size)); + producer.id.bits256_value(), producer.candidate_id, producer.block_status, producer.root_hash, + producer.file_hash, producer.comment, producer.block_timestamp, producer.is_accepted, producer.is_ours, + producer.got_submit_at, producer.collation_time, producer.collated_at, producer.collation_cached, + it == recorded_block_stats_.end() ? -1.0 : it->second.collator_work_time_, + it == recorded_block_stats_.end() ? -1.0 : it->second.collator_cpu_work_time_, std::move(collation_stats), + producer.validation_time, producer.validated_at, producer.validation_cached, + it == recorded_block_stats_.end() ? -1.0 : it->second.validator_work_time_, + it == recorded_block_stats_.end() ? -1.0 : it->second.validator_cpu_work_time_, producer.gen_utime, + producer.approved_weight, producer.approved_33pct_at, producer.approved_66pct_at, std::move(approvers), + producer.signed_weight, producer.signed_33pct_at, producer.signed_66pct_at, std::move(signers), + producer.serialize_time, producer.deserialize_time, producer.serialized_size)); } rounds.push_back(create_tl_object(round.timestamp, std::move(producers))); } @@ -3093,8 +3116,8 @@ void ValidatorManagerImpl::log_new_validator_group_stats(validatorsession::NewVa create_tl_object(node.id.bits256_value(), node.weight)); } auto obj = create_tl_object( - stats.session_id, stats.shard.workchain, stats.shard.shard, stats.cc_seqno, stats.timestamp, stats.self_idx, - std::move(nodes)); + stats.session_id, stats.shard.workchain, stats.shard.shard, stats.cc_seqno, stats.last_key_block_seqno, + stats.timestamp, stats.self_idx, std::move(nodes)); auto s = td::json_encode(td::ToJson(*obj.get()), false); s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return c == '\n' || c == '\r'; }), s.end()); @@ -3103,7 +3126,31 @@ void ValidatorManagerImpl::log_new_validator_group_stats(validatorsession::NewVa file << s << "\n"; file.close(); - LOG(INFO) << "Writing new validator group stats for " << stats.shard.to_str(); + LOG(INFO) << "Writing new validator group stats for " << stats.session_id << " shard=" << stats.shard.to_str() + << " cc_seqno=" << stats.cc_seqno; +} + +void ValidatorManagerImpl::log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) { + std::string fname = opts_->get_session_logs_file(); + if (fname.empty()) { + return; + } + std::vector> nodes; + for (const auto &node : stats.nodes) { + nodes.push_back(create_tl_object(node.id.bits256_value(), + node.catchain_blocks)); + } + auto obj = create_tl_object(stats.session_id, stats.timestamp, + std::move(nodes)); + auto s = td::json_encode(td::ToJson(*obj.get()), false); + s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return c == '\n' || c == '\r'; }), s.end()); + + std::ofstream file; + file.open(fname, std::ios_base::app); + file << s << "\n"; + file.close(); + + LOG(INFO) << "Writing end validator group stats for " << stats.session_id; } void ValidatorManagerImpl::get_block_handle_for_litequery(BlockIdExt block_id, td::Promise promise) { @@ -3480,6 +3527,31 @@ td::actor::ActorOwn ValidatorManagerFactory::create( rldp, overlays); } +void ValidatorManagerImpl::record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, + CollationStats stats) { + auto &record = new_block_stats_record(block_id); + record.collator_work_time_ = work_time; + record.collator_cpu_work_time_ = cpu_work_time; + record.collator_stats_ = std::move(stats); +} + +void ValidatorManagerImpl::record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { + auto &record = new_block_stats_record(block_id); + record.validator_work_time_ = work_time; + record.validator_cpu_work_time_ = cpu_work_time; +} + +ValidatorManagerImpl::RecordedBlockStats &ValidatorManagerImpl::new_block_stats_record(BlockIdExt block_id) { + if (!recorded_block_stats_.count(block_id)) { + recorded_block_stats_lru_.push(block_id); + if (recorded_block_stats_lru_.size() > 4096) { + recorded_block_stats_.erase(recorded_block_stats_lru_.front()); + recorded_block_stats_lru_.pop(); + } + } + return recorded_block_stats_[block_id]; +} + size_t ValidatorManagerImpl::CheckedExtMsgCounter::get_msg_count(WorkchainId wc, StdSmcAddress addr) { before_query(); auto it1 = counter_cur_.find({wc, addr}); diff --git a/validator/manager.hpp b/validator/manager.hpp index 8c881f60..43524597 100644 --- a/validator/manager.hpp +++ b/validator/manager.hpp @@ -39,6 +39,7 @@ #include #include #include +#include namespace ton { @@ -278,7 +279,7 @@ class ValidatorManagerImpl : public ValidatorManager { BlockSeqno last_key_block_seqno, const validatorsession::ValidatorSessionOptions &opts); td::actor::ActorOwn create_validator_group(ValidatorSessionId session_id, ShardIdFull shard, - td::Ref validator_set, + td::Ref validator_set, BlockSeqno key_seqno, validatorsession::ValidatorSessionOptions opts, bool create_catchain); struct ValidatorGroupEntry { @@ -619,6 +620,7 @@ class ValidatorManagerImpl : public ValidatorManager { void log_validator_session_stats(BlockIdExt block_id, validatorsession::ValidatorSessionStats stats) override; void log_new_validator_group_stats(validatorsession::NewValidatorGroupStats stats) override; + void log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) override; void update_options(td::Ref opts) override; @@ -756,6 +758,21 @@ class ValidatorManagerImpl : public ValidatorManager { td::actor::ActorOwn candidates_buffer_; + struct RecordedBlockStats { + double collator_work_time_ = -1.0; + double collator_cpu_work_time_ = -1.0; + td::optional collator_stats_; + double validator_work_time_ = -1.0; + double validator_cpu_work_time_ = -1.0; + }; + std::map recorded_block_stats_; + std::queue recorded_block_stats_lru_; + + void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, + CollationStats stats) override; + void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) override; + RecordedBlockStats &new_block_stats_record(BlockIdExt block_id); + struct Collator { td::actor::ActorOwn actor; std::set shards; diff --git a/validator/validator-group.cpp b/validator/validator-group.cpp index 83e27358..ed0afea9 100644 --- a/validator/validator-group.cpp +++ b/validator/validator-group.cpp @@ -369,6 +369,7 @@ void ValidatorGroup::start(std::vector prev, BlockIdExt min_masterch stats.session_id = session_id_; stats.shard = shard_; stats.cc_seqno = validator_set_->get_catchain_seqno(); + stats.last_key_block_seqno = last_key_block_seqno_; stats.timestamp = td::Clocks::system(); td::uint32 idx = 0; for (const auto& node : validator_set_->export_vector()) { @@ -400,6 +401,16 @@ void ValidatorGroup::destroy() { td::actor::send_closure(manager, &ValidatorManager::log_validator_session_stats, block_id, std::move(stats)); }); + td::actor::send_closure(session_, &validatorsession::ValidatorSession::get_end_stats, + [manager = manager_](td::Result R) { + if (R.is_error()) { + LOG(DEBUG) << "Failed to get validator session end stats: " << R.move_as_error(); + return; + } + auto stats = R.move_as_ok(); + td::actor::send_closure(manager, &ValidatorManager::log_end_validator_group_stats, + std::move(stats)); + }); auto ses = session_.release(); delay_action([ses]() mutable { td::actor::send_closure(ses, &validatorsession::ValidatorSession::destroy); }, td::Timestamp::in(10.0)); diff --git a/validator/validator-group.hpp b/validator/validator-group.hpp index f44ce868..08171686 100644 --- a/validator/validator-group.hpp +++ b/validator/validator-group.hpp @@ -70,8 +70,8 @@ class ValidatorGroup : public td::actor::Actor { } ValidatorGroup(ShardIdFull shard, PublicKeyHash local_id, ValidatorSessionId session_id, - td::Ref validator_set, block::CollatorConfig collator_config, - validatorsession::ValidatorSessionOptions config, + td::Ref validator_set, BlockSeqno last_key_block_seqno, + block::CollatorConfig collator_config, validatorsession::ValidatorSessionOptions config, td::actor::ActorId keyring, td::actor::ActorId adnl, td::actor::ActorId rldp, td::actor::ActorId overlays, std::string db_root, td::actor::ActorId validator_manager, bool create_session, @@ -80,6 +80,7 @@ class ValidatorGroup : public td::actor::Actor { , local_id_(std::move(local_id)) , session_id_(session_id) , validator_set_(std::move(validator_set)) + , last_key_block_seqno_(last_key_block_seqno) , collator_config_(std::move(collator_config)) , config_(std::move(config)) , keyring_(keyring) @@ -122,6 +123,7 @@ class ValidatorGroup : public td::actor::Actor { BlockIdExt min_masterchain_block_id_; td::Ref validator_set_; + BlockSeqno last_key_block_seqno_; block::CollatorConfig collator_config_; validatorsession::ValidatorSessionOptions config_;