1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-03-09 15:40:10 +00:00

Merge branch 'session-stats-patch' into accelerator

This commit is contained in:
SpyCheese 2024-08-20 12:41:55 +03:00
commit 5080cdffdd
23 changed files with 385 additions and 32 deletions

View file

@ -372,6 +372,11 @@ class Collator final : public td::actor::Actor {
public:
static td::uint32 get_skip_externals_queue_size();
private:
td::Timer work_timer_{true};
td::ThreadCpuTimer cpu_work_timer_{true};
CollationStats stats_;
};
} // namespace validator

View file

@ -1855,6 +1855,12 @@ bool Collator::register_shard_block_creators(std::vector<td::Bits256> creator_li
* @returns True if collation is successful, false otherwise.
*/
bool Collator::try_collate() {
work_timer_.resume();
cpu_work_timer_.resume();
SCOPE_EXIT {
work_timer_.pause();
cpu_work_timer_.pause();
};
if (!preinit_complete) {
LOG(WARNING) << "running do_preinit()";
if (!do_preinit()) {
@ -3611,6 +3617,29 @@ bool Collator::process_inbound_message(Ref<vm::CellSlice> enq_msg, ton::LogicalT
return true;
}
/**
* Creates a string that explains which limit is exceeded. Used for collator stats.
*
* @param block_limit_status Status of block limits.
* @param cls Which limit class is exceeded.
*
* @returns String for collator stats.
*/
static std::string block_full_comment(const block::BlockLimitStatus& block_limit_status, unsigned cls) {
auto bytes = block_limit_status.estimate_block_size();
if (!block_limit_status.limits.bytes.fits(cls, bytes)) {
return PSTRING() << "block_full bytes " << bytes;
}
if (!block_limit_status.limits.gas.fits(cls, block_limit_status.gas_used)) {
return PSTRING() << "block_full gas " << block_limit_status.gas_used;
}
auto lt_delta = block_limit_status.cur_lt - block_limit_status.limits.start_lt;
if (!block_limit_status.limits.lt_delta.fits(cls, lt_delta)) {
return PSTRING() << "block_full lt_delta " << lt_delta;
}
return "";
}
/**
* Processes inbound internal messages from message queues of the neighbors.
* Messages are processed until the normal limit is reached, soft timeout is reached or there are no more messages.
@ -3642,11 +3671,14 @@ bool Collator::process_inbound_internal_messages() {
}
if (block_full_) {
LOG(INFO) << "BLOCK FULL, stop processing inbound internal messages";
stats_.limits_log += PSTRING() << "INBOUND_INT_MESSAGES: "
<< block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) << "\n";
break;
}
if (soft_timeout_.is_in_past(td::Timestamp::now())) {
block_full_ = true;
LOG(WARNING) << "soft timeout reached, stop processing inbound internal messages";
stats_.limits_log += PSTRING() << "INBOUND_INT_MESSAGES: timeout\n";
break;
}
if (!check_cancelled()) {
@ -3695,10 +3727,13 @@ bool Collator::process_inbound_external_messages() {
}
if (full) {
LOG(INFO) << "BLOCK FULL, stop processing external messages";
stats_.limits_log += PSTRING() << "INBOUND_EXT_MESSAGES: "
<< block_full_comment(*block_limit_status_, block::ParamLimits::cl_soft) << "\n";
break;
}
if (medium_timeout_.is_in_past(td::Timestamp::now())) {
LOG(WARNING) << "medium timeout reached, stop processing inbound external messages";
stats_.limits_log += PSTRING() << "INBOUND_EXT_MESSAGES: timeout\n";
break;
}
if (!check_cancelled()) {
@ -3707,6 +3742,11 @@ bool Collator::process_inbound_external_messages() {
auto ext_msg = ext_msg_struct.cell;
ton::Bits256 hash{ext_msg->get_hash().bits()};
int r = process_external_message(std::move(ext_msg));
if (r > 0) {
++stats_.ext_msgs_accepted;
} else {
++stats_.ext_msgs_rejected;
}
if (r < 0) {
bad_ext_msgs_.emplace_back(ext_msg_struct.hash);
return false;
@ -3812,11 +3852,15 @@ bool Collator::process_dispatch_queue() {
block_full_ = !block_limit_status_->fits(block::ParamLimits::cl_normal);
if (block_full_) {
LOG(INFO) << "BLOCK FULL, stop processing dispatch queue";
stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": "
<< block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal)
<< "\n";
return true;
}
if (soft_timeout_.is_in_past(td::Timestamp::now())) {
block_full_ = true;
LOG(WARNING) << "soft timeout reached, stop processing dispatch queue";
stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": timeout\n";
return true;
}
StdSmcAddress src_addr;
@ -3866,6 +3910,7 @@ bool Collator::process_dispatch_queue() {
++total_count;
if (total_count >= max_total_count[iter]) {
dispatch_queue_total_limit_reached_ = true;
stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": total limit reached\n";
break;
}
}
@ -4215,6 +4260,8 @@ bool Collator::process_new_messages(bool enqueue_only) {
if ((block_full_ || have_unprocessed_account_dispatch_queue_) && !enqueue_only) {
LOG(INFO) << "BLOCK FULL, enqueue all remaining new messages";
enqueue_only = true;
stats_.limits_log += PSTRING() << "NEW_MESSAGES: "
<< block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) << "\n";
}
if (!check_cancelled()) {
return false;
@ -4226,6 +4273,8 @@ bool Collator::process_new_messages(bool enqueue_only) {
} else if (res == 3) {
LOG(INFO) << "All remaining new messages must be enqueued (BLOCK FULL)";
enqueue_only = true;
stats_.limits_log += PSTRING() << "NEW_MESSAGES: "
<< block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) << "\n";
}
}
return true;
@ -5665,6 +5714,18 @@ bool Collator::create_block_candidate() {
td::actor::send_closure_later(manager, &ValidatorManager::complete_external_messages, std::move(delay_ext_msgs_),
std::move(bad_ext_msgs_));
}
double work_time = work_timer_.elapsed();
double cpu_work_time = cpu_work_timer_.elapsed();
LOG(WARNING) << "Collate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s";
stats_.bytes = block_limit_status_->estimate_block_size();
stats_.gas = block_limit_status_->gas_used;
stats_.lt_delta = block_limit_status_->cur_lt - block_limit_status_->limits.start_lt;
stats_.cat_bytes = block_limit_status_->limits.classify_size(stats_.bytes);
stats_.cat_gas = block_limit_status_->limits.classify_gas(stats_.gas);
stats_.cat_lt_delta = block_limit_status_->limits.classify_lt(block_limit_status_->cur_lt);
td::actor::send_closure(manager, &ValidatorManager::record_collate_query_stats, block_candidate->id, work_time,
cpu_work_time, std::move(stats_));
return true;
}
@ -5770,6 +5831,7 @@ void Collator::after_get_external_messages(td::Result<std::vector<std::pair<Ref<
}
auto vect = res.move_as_ok();
for (auto& p : vect) {
++stats_.ext_msgs_total;
auto& ext_msg = p.first;
int priority = p.second;
Ref<vm::Cell> ext_msg_cell = ext_msg->root_cell();
@ -5781,6 +5843,7 @@ void Collator::after_get_external_messages(td::Result<std::vector<std::pair<Ref<
}
}
if (err) {
++stats_.ext_msgs_filtered;
bad_ext_msgs_.emplace_back(ext_msg->hash());
}
}

View file

@ -117,6 +117,7 @@ bool ValidateQuery::reject_query(std::string error, td::BufferSlice reason) {
error = error_ctx() + error;
LOG(ERROR) << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error;
if (main_promise) {
record_stats();
errorlog::ErrorLog::log(PSTRING() << "REJECT: aborting validation of block candidate for " << shard_.to_str()
<< " : " << error << ": data=" << block_candidate.id.file_hash.to_hex()
<< " collated_data=" << block_candidate.collated_file_hash.to_hex());
@ -155,6 +156,7 @@ bool ValidateQuery::soft_reject_query(std::string error, td::BufferSlice reason)
error = error_ctx() + error;
LOG(ERROR) << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error;
if (main_promise) {
record_stats();
errorlog::ErrorLog::log(PSTRING() << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str()
<< " : " << error << ": data=" << block_candidate.id.file_hash.to_hex()
<< " collated_data=" << block_candidate.collated_file_hash.to_hex());
@ -178,6 +180,7 @@ bool ValidateQuery::fatal_error(td::Status error) {
error.ensure_error();
LOG(ERROR) << "aborting validation of block candidate for " << shard_.to_str() << " : " << error.to_string();
if (main_promise) {
record_stats();
auto c = error.code();
if (c <= -667 && c >= -670) {
errorlog::ErrorLog::log(PSTRING() << "FATAL ERROR: aborting validation of block candidate for " << shard_.to_str()
@ -236,6 +239,7 @@ bool ValidateQuery::fatal_error(std::string err_msg, int err_code) {
*/
void ValidateQuery::finish_query() {
if (main_promise) {
record_stats();
LOG(WARNING) << "validate query done";
LOG(WARNING) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(now_);
@ -6901,6 +6905,12 @@ bool ValidateQuery::try_validate() {
if (pending) {
return true;
}
work_timer_.resume();
cpu_work_timer_.resume();
SCOPE_EXIT {
work_timer_.pause();
cpu_work_timer_.pause();
};
try {
if (!stage_) {
LOG(WARNING) << "try_validate stage 0";
@ -7040,6 +7050,17 @@ void ValidateQuery::written_candidate() {
finish_query();
}
/**
* Sends validation work time to manager.
*/
void ValidateQuery::record_stats() {
double work_time = work_timer_.elapsed();
double cpu_work_time = cpu_work_timer_.elapsed();
LOG(WARNING) << "Validate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s";
td::actor::send_closure(manager, &ValidatorManager::record_validate_query_stats, block_candidate.id, work_time,
cpu_work_time);
}
} // namespace validator
} // namespace ton

View file

@ -404,6 +404,10 @@ class ValidateQuery : public td::actor::Actor {
}
return true;
}
td::Timer work_timer_{true};
td::ThreadCpuTimer cpu_work_timer_{true};
void record_stats();
};
} // namespace validator