1
0
Fork 0
mirror of https://github.com/ton-blockchain/ton synced 2025-02-15 04:32:21 +00:00

More logs for collators and validators

This commit is contained in:
SpyCheese 2023-07-18 17:06:06 +03:00
parent 3265e397f2
commit aa4f5769ca
7 changed files with 33 additions and 17 deletions

View file

@ -87,4 +87,8 @@ void PerfWarningTimer::reset() {
start_at_ = 0; start_at_ = 0;
} }
double PerfWarningTimer::elapsed() const {
return Time::now() - start_at_;
}
} // namespace td } // namespace td

View file

@ -53,6 +53,7 @@ class PerfWarningTimer {
PerfWarningTimer &operator=(PerfWarningTimer &&) = delete; PerfWarningTimer &operator=(PerfWarningTimer &&) = delete;
~PerfWarningTimer(); ~PerfWarningTimer();
void reset(); void reset();
double elapsed() const;
private: private:
string name_; string name_;

View file

@ -233,11 +233,13 @@ void CollatorNode::receive_query(adnl::AdnlNodeIdShort src, td::BufferSlice data
cache_entry = cache_[cache_key] = std::make_shared<CacheEntry>(); cache_entry = cache_[cache_key] = std::make_shared<CacheEntry>();
} }
if (cache_entry->result) { if (cache_entry->result) {
LOG(INFO) << "Using cached result";
new_promise.set_result(cache_entry->result.value().clone()); new_promise.set_result(cache_entry->result.value().clone());
return; return;
} }
cache_entry->promises.push_back(std::move(new_promise)); cache_entry->promises.push_back(std::move(new_promise));
if (cache_entry->started) { if (cache_entry->started) {
LOG(INFO) << "Collating of this block is already in progress, waiting";
return; return;
} }
cache_entry->started = true; cache_entry->started = true;

View file

@ -927,7 +927,6 @@ void FullNodeShardImpl::download_archive(BlockSeqno masterchain_seqno, std::stri
void FullNodeShardImpl::download_out_msg_queue_proof(BlockIdExt block_id, ShardIdFull dst_shard, td::Timestamp timeout, void FullNodeShardImpl::download_out_msg_queue_proof(BlockIdExt block_id, ShardIdFull dst_shard, td::Timestamp timeout,
td::Promise<td::Ref<OutMsgQueueProof>> promise) { td::Promise<td::Ref<OutMsgQueueProof>> promise) {
// TODO: maybe more complex download (like other requests here) // TODO: maybe more complex download (like other requests here)
// TODO: estimate max size
auto &b = choose_neighbour(true); auto &b = choose_neighbour(true);
if (b.adnl_id == adnl::AdnlNodeIdShort::zero()) { if (b.adnl_id == adnl::AdnlNodeIdShort::zero()) {
promise.set_error(td::Status::Error(ErrorCode::notready, "no nodes")); promise.set_error(td::Status::Error(ErrorCode::notready, "no nodes"));

View file

@ -279,6 +279,7 @@ bool Collator::fatal_error(td::Status error) {
error.ensure_error(); error.ensure_error();
LOG(ERROR) << "cannot generate block candidate for " << show_shard(shard_) << " : " << error.to_string(); LOG(ERROR) << "cannot generate block candidate for " << show_shard(shard_) << " : " << error.to_string();
if (busy_) { if (busy_) {
LOG(INFO) << "collation took " << perf_timer_.elapsed() << " s";
main_promise(std::move(error)); main_promise(std::move(error));
busy_ = false; busy_ = false;
} }
@ -4226,6 +4227,7 @@ void Collator::return_block_candidate(td::Result<td::Unit> saved) {
} else { } else {
CHECK(block_candidate); CHECK(block_candidate);
LOG(INFO) << "sending new BlockCandidate to Promise"; LOG(INFO) << "sending new BlockCandidate to Promise";
LOG(INFO) << "collation took " << perf_timer_.elapsed() << " s";
main_promise(block_candidate->clone()); main_promise(block_candidate->clone());
busy_ = false; busy_ = false;
stop(); stop();

View file

@ -88,6 +88,7 @@ bool ValidateQuery::reject_query(std::string error, td::BufferSlice reason) {
<< " collated_data=" << block_candidate.collated_file_hash.to_hex()); << " collated_data=" << block_candidate.collated_file_hash.to_hex());
errorlog::ErrorLog::log_file(block_candidate.data.clone()); errorlog::ErrorLog::log_file(block_candidate.data.clone());
errorlog::ErrorLog::log_file(block_candidate.collated_data.clone()); errorlog::ErrorLog::log_file(block_candidate.collated_data.clone());
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(CandidateReject{std::move(error), std::move(reason)}); main_promise.set_result(CandidateReject{std::move(error), std::move(reason)});
} }
stop(); stop();
@ -108,6 +109,7 @@ bool ValidateQuery::soft_reject_query(std::string error, td::BufferSlice reason)
<< " collated_data=" << block_candidate.collated_file_hash.to_hex()); << " collated_data=" << block_candidate.collated_file_hash.to_hex());
errorlog::ErrorLog::log_file(block_candidate.data.clone()); errorlog::ErrorLog::log_file(block_candidate.data.clone());
errorlog::ErrorLog::log_file(block_candidate.collated_data.clone()); errorlog::ErrorLog::log_file(block_candidate.collated_data.clone());
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(CandidateReject{std::move(error), std::move(reason)}); main_promise.set_result(CandidateReject{std::move(error), std::move(reason)});
} }
stop(); stop();
@ -126,6 +128,7 @@ bool ValidateQuery::fatal_error(td::Status error) {
errorlog::ErrorLog::log_file(block_candidate.data.clone()); errorlog::ErrorLog::log_file(block_candidate.data.clone());
errorlog::ErrorLog::log_file(block_candidate.collated_data.clone()); errorlog::ErrorLog::log_file(block_candidate.collated_data.clone());
} }
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise(std::move(error)); main_promise(std::move(error));
} }
stop(); stop();
@ -147,6 +150,7 @@ bool ValidateQuery::fatal_error(std::string err_msg, int err_code) {
void ValidateQuery::finish_query() { void ValidateQuery::finish_query() {
if (main_promise) { if (main_promise) {
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(now_); main_promise.set_result(now_);
} }
stop(); stop();

View file

@ -133,7 +133,7 @@ void ValidatorGroup::validate_block_candidate(td::uint32 round_id, BlockCandidat
P.set_error(td::Status::Error(ErrorCode::notready, "validator group not started")); P.set_error(td::Status::Error(ErrorCode::notready, "validator group not started"));
return; return;
} }
VLOG(VALIDATOR_DEBUG) << "validating block candidate " << next_block_id; VLOG(VALIDATOR_DEBUG) << "validating block candidate " << next_block_id.to_str();
block.id = next_block_id; block.id = next_block_id;
run_validate_query(shard_, min_masterchain_block_id_, prev_block_ids_, std::move(block), validator_set_, manager_, run_validate_query(shard_, min_masterchain_block_id_, prev_block_ids_, std::move(block), validator_set_, manager_,
td::Timestamp::in(15.0), std::move(P), td::Timestamp::in(15.0), std::move(P),
@ -397,6 +397,7 @@ void ValidatorGroup::send_collate_query(td::uint32 round_id, td::Timestamp timeo
promise.set_error(td::Status::Error("too old")); promise.set_error(td::Status::Error("too old"));
return; return;
} }
BlockId next_block_id = create_next_block_id_simple();
adnl::AdnlNodeIdShort collator = adnl::AdnlNodeIdShort::zero(); adnl::AdnlNodeIdShort collator = adnl::AdnlNodeIdShort::zero();
// TODO: some way to choose node (similar to "unreliability" in full-node) // TODO: some way to choose node (similar to "unreliability" in full-node)
int cnt = 0; int cnt = 0;
@ -413,20 +414,23 @@ void ValidatorGroup::send_collate_query(td::uint32 round_id, td::Timestamp timeo
return; return;
} }
if (max_retries > 0) { promise = td::PromiseCreator::lambda([=, SelfId = actor_id(this), promise = std::move(promise),
promise = td::PromiseCreator::lambda( timer = td::Timer()](td::Result<BlockCandidate> R) mutable {
[=, SelfId = actor_id(this), promise = std::move(promise)](td::Result<BlockCandidate> R) mutable { if (R.is_ok()) {
if (R.is_ok()) { LOG(WARNING) << "collate query for " << next_block_id.to_str() << ": success, time=" << timer.elapsed() << "s";
promise.set_result(R.move_as_ok()); promise.set_result(R.move_as_ok());
} else if (timeout && timeout.is_in_past()) { return;
promise.set_result(R.move_as_error()); }
} else { bool retry = (!timeout || !timeout.is_in_past()) && max_retries > 0;
LOG(WARNING) << "collate query error, retrying: " << R.move_as_error(); LOG(WARNING) << "collate query for " << next_block_id.to_str() << ": " << R.error() << ", time=" << timer.elapsed()
td::actor::send_closure(SelfId, &ValidatorGroup::send_collate_query, round_id, timeout, std::move(promise), << "s, " << (retry ? "retrying" : "giving up");
max_retries - 1); if (retry) {
} td::actor::send_closure(SelfId, &ValidatorGroup::send_collate_query, round_id, timeout, std::move(promise),
}); max_retries - 1);
} } else {
promise.set_result(td::Status::Error(ErrorCode::timeout, "timeout"));
}
});
std::vector<tl_object_ptr<ton_api::tonNode_blockIdExt>> prev_blocks; std::vector<tl_object_ptr<ton_api::tonNode_blockIdExt>> prev_blocks;
for (const BlockIdExt &p : prev_block_ids_) { for (const BlockIdExt &p : prev_block_ids_) {
@ -445,7 +449,7 @@ void ValidatorGroup::send_collate_query(td::uint32 round_id, td::Timestamp timeo
td::actor::send_closure(SelfId, &ValidatorGroup::receive_collate_query_response, round_id, R.move_as_ok(), td::actor::send_closure(SelfId, &ValidatorGroup::receive_collate_query_response, round_id, R.move_as_ok(),
std::move(promise)); std::move(promise));
}); });
LOG(INFO) << "collate query for " << create_next_block_id_simple().to_str() << ": send query to " << collator; LOG(INFO) << "sending collate query for " << next_block_id.to_str() << ": send to " << collator;
size_t max_answer_size = config_.max_block_size + config_.max_collated_data_size + 256; size_t max_answer_size = config_.max_block_size + config_.max_collated_data_size + 256;
td::Timestamp query_timeout = td::Timestamp::in(10.0); td::Timestamp query_timeout = td::Timestamp::in(10.0);
query_timeout.relax(timeout); query_timeout.relax(timeout);