mirror of
https://github.com/ton-blockchain/ton
synced 2025-02-14 20:22:19 +00:00
Improve collator node pings and collation manager stats
This commit is contained in:
parent
5fae8db7a0
commit
923f1cd69b
8 changed files with 66 additions and 7 deletions
|
@ -750,7 +750,7 @@ engine.validator.perfTimerStats stats:(vector engine.validator.PerfTimerStatsByN
|
|||
engine.validator.shardOutQueueSize size:long = engine.validator.ShardOutQueueSize;
|
||||
|
||||
engine.validator.collationManagerStats.shard shard_id:tonNode.shardId self_collate:Bool select_mode:string active:Bool collators:(vector int256) = engine.validator.collationManagerStats.Shard;
|
||||
engine.validator.collationManagerStats.collator adnl_id:int256 active:Bool alive:Bool ping_in:double = engine.validator.collationManagerStats.Collator;
|
||||
engine.validator.collationManagerStats.collator adnl_id:int256 active:Bool alive:Bool ping_in:double last_ping_ago:double last_ping_status:string = engine.validator.collationManagerStats.Collator;
|
||||
engine.validator.collationManagerStats.localId adnl_id:int256 shards:(vector engine.validator.collationManagerStats.shard)
|
||||
collators:(vector engine.validator.collationManagerStats.collator) = engine.validator.collationManagerStats.LocalId;
|
||||
engine.validator.collationManagerStats local_ids:(vector engine.validator.collationManagerStats.localId) = engine.validator.CollationManagerStats;
|
||||
|
|
Binary file not shown.
|
@ -1775,8 +1775,24 @@ td::Status GetCollationManagerStatsQuery::receive(td::BufferSlice data) {
|
|||
if (collator == nullptr) {
|
||||
return td::Status::Error("collator not found");
|
||||
}
|
||||
td::TerminalIO::out() << " " << id << " alive=" << (int)collator->alive_
|
||||
<< " ping_in=" << collator->ping_in_ << "\n";
|
||||
td::StringBuilder sb;
|
||||
sb << " " << id << "\n";
|
||||
sb << " alive=" << (int)collator->alive_;
|
||||
if (collator->active_) {
|
||||
sb << " ping_in=" << td::StringBuilder::FixedDouble(std::max(collator->ping_in_, 0.0), 3);
|
||||
}
|
||||
sb << " last_ping_ago=";
|
||||
if (collator->last_ping_ago_ < 0.0) {
|
||||
sb << "never";
|
||||
} else {
|
||||
std::string status = collator->last_ping_status_;
|
||||
std::erase_if(status, [](char c) { return c < (char)32; });
|
||||
if (status.size() > 128) {
|
||||
status.resize(128);
|
||||
}
|
||||
sb << td::StringBuilder::FixedDouble(collator->last_ping_ago_, 3) << ": " << status;
|
||||
}
|
||||
td::TerminalIO::out() << sb.as_cslice() << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -261,6 +261,8 @@ void CollationManager::get_stats(
|
|||
} else {
|
||||
obj->ping_in_ = -1.0;
|
||||
}
|
||||
obj->last_ping_ago_ = collator.last_ping_at ? td::Time::now() - collator.last_ping_at.at() : -1.0;
|
||||
obj->last_ping_status_ = collator.last_ping_status.is_ok() ? "OK" : collator.last_ping_status.message().str();
|
||||
stats->collators_.push_back(std::move(obj));
|
||||
}
|
||||
promise.set_value(std::move(stats));
|
||||
|
@ -323,7 +325,7 @@ void CollationManager::alarm() {
|
|||
td::actor::send_closure(SelfId, &CollationManager::got_pong, id, std::move(R));
|
||||
};
|
||||
LOG(DEBUG) << "sending ping to " << id;
|
||||
td::actor::send_closure(rldp_, &rldp::Rldp::send_query, local_id_, id, "collatorping", std::move(P),
|
||||
td::actor::send_closure(rldp_, &rldp::Rldp::send_query, local_id_, id, "ping", std::move(P),
|
||||
td::Timestamp::in(2.0), std::move(query));
|
||||
} else {
|
||||
alarm_timestamp().relax(collator.ping_at);
|
||||
|
@ -340,7 +342,7 @@ void CollationManager::got_pong(adnl::AdnlNodeIdShort id, td::Result<td::BufferS
|
|||
collator.sent_ping = false;
|
||||
|
||||
auto r_pong = [&]() -> td::Result<tl_object_ptr<ton_api::collatorNode_pong>> {
|
||||
TRY_RESULT_PREFIX(data, std::move(R), "rldp query error: ");
|
||||
TRY_RESULT(data, std::move(R));
|
||||
auto r_error = fetch_tl_object<ton_api::collatorNode_error>(data, true);
|
||||
if (r_error.is_ok()) {
|
||||
auto error = r_error.move_as_ok();
|
||||
|
@ -348,12 +350,15 @@ void CollationManager::got_pong(adnl::AdnlNodeIdShort id, td::Result<td::BufferS
|
|||
}
|
||||
return fetch_tl_object<ton_api::collatorNode_pong>(data, true);
|
||||
}();
|
||||
collator.last_ping_at = td::Timestamp::now();
|
||||
if (r_pong.is_error()) {
|
||||
LOG(DEBUG) << "pong from " << id << " : " << r_pong.move_as_error();
|
||||
LOG(DEBUG) << "pong from " << id << " : " << r_pong.error();
|
||||
collator.alive = false;
|
||||
collator.last_ping_status = r_pong.move_as_error();
|
||||
} else {
|
||||
LOG(DEBUG) << "pong from " << id << " : OK";
|
||||
collator.alive = true;
|
||||
collator.last_ping_status = td::Status::OK();
|
||||
}
|
||||
collator.ping_at = td::Timestamp::in(td::Random::fast(10.0, 20.0));
|
||||
if (collator.active_cnt && !collator.sent_ping) {
|
||||
|
|
|
@ -65,6 +65,8 @@ class CollationManager : public td::actor::Actor {
|
|||
td::Timestamp ping_at = td::Timestamp::now();
|
||||
bool sent_ping = false;
|
||||
size_t active_cnt = 0;
|
||||
td::Timestamp last_ping_at = td::Timestamp::never();
|
||||
td::Status last_ping_status = td::Status::Error("not pinged");
|
||||
};
|
||||
std::map<adnl::AdnlNodeIdShort, CollatorInfo> collators_;
|
||||
|
||||
|
|
|
@ -167,6 +167,10 @@ void CollatorNode::new_masterchain_block_notification(td::Ref<MasterchainState>
|
|||
}
|
||||
}
|
||||
|
||||
void CollatorNode::update_shard_client_handle(BlockHandle shard_client_handle) {
|
||||
shard_client_handle_ = shard_client_handle;
|
||||
}
|
||||
|
||||
void CollatorNode::update_validator_group_info(ShardIdFull shard, std::vector<BlockIdExt> prev,
|
||||
CatchainSeqno cc_seqno) {
|
||||
if (!can_collate_shard(shard)) {
|
||||
|
@ -225,7 +229,12 @@ void CollatorNode::update_validator_group_info(ShardIdFull shard, std::vector<Bl
|
|||
}
|
||||
++cache_it;
|
||||
}
|
||||
generate_block(shard, cc_seqno, info.prev, {}, td::Timestamp::in(10.0), [](td::Result<BlockCandidate>) {});
|
||||
auto S = check_out_of_sync();
|
||||
if (S.is_ok()) {
|
||||
generate_block(shard, cc_seqno, info.prev, {}, td::Timestamp::in(10.0), [](td::Result<BlockCandidate>) {});
|
||||
} else {
|
||||
LOG(DEBUG) << "not generating block automatically: " << S;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -535,9 +544,22 @@ void CollatorNode::process_result(std::shared_ptr<CacheEntry> cache_entry, td::R
|
|||
cache_entry->promises.clear();
|
||||
}
|
||||
|
||||
td::Status CollatorNode::check_out_of_sync() {
|
||||
if (last_masterchain_state_.is_null() || !shard_client_handle_) {
|
||||
return td::Status::Error("not inited");
|
||||
}
|
||||
auto now = (UnixTime)td::Clocks::system();
|
||||
if (last_masterchain_state_->get_unix_time() < now - 60 || shard_client_handle_->unix_time() < now - 60) {
|
||||
return td::Status::Error(PSTRING() << "out of sync: mc " << now - last_masterchain_state_->get_unix_time()
|
||||
<< "s ago, shardclient " << now - shard_client_handle_->unix_time() << "s ago");
|
||||
}
|
||||
return td::Status::OK();
|
||||
}
|
||||
|
||||
void CollatorNode::process_ping(adnl::AdnlNodeIdShort src, ton_api::collatorNode_ping& ping,
|
||||
td::Promise<td::BufferSlice> promise) {
|
||||
LOG(DEBUG) << "got ping from " << src;
|
||||
TRY_STATUS_PROMISE(promise, check_out_of_sync());
|
||||
promise.set_result(create_serialize_tl_object<ton_api::collatorNode_pong>(0));
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ class CollatorNode : public td::actor::Actor {
|
|||
void del_shard(ShardIdFull shard);
|
||||
|
||||
void new_masterchain_block_notification(td::Ref<MasterchainState> state);
|
||||
void update_shard_client_handle(BlockHandle shard_client_handle);
|
||||
void update_validator_group_info(ShardIdFull shard, std::vector<BlockIdExt> prev, CatchainSeqno cc_seqno);
|
||||
|
||||
void update_options(td::Ref<ValidatorManagerOptions> opts) {
|
||||
|
@ -84,6 +85,7 @@ class CollatorNode : public td::actor::Actor {
|
|||
std::map<std::pair<ShardIdFull, CatchainSeqno>, FutureValidatorGroup> future_validator_groups_;
|
||||
|
||||
td::Ref<MasterchainState> last_masterchain_state_;
|
||||
BlockHandle shard_client_handle_;
|
||||
|
||||
td::Result<FutureValidatorGroup*> get_future_validator_group(ShardIdFull shard, CatchainSeqno cc_seqno);
|
||||
|
||||
|
@ -92,6 +94,8 @@ class CollatorNode : public td::actor::Actor {
|
|||
td::Promise<BlockCandidate> promise);
|
||||
void process_result(std::shared_ptr<CacheEntry> cache_entry, td::Result<BlockCandidate> R);
|
||||
|
||||
td::Status check_out_of_sync();
|
||||
|
||||
public:
|
||||
static tl_object_ptr<ton_api::collatorNode_Candidate> serialize_candidate(const BlockCandidate& block, bool compress);
|
||||
static td::Result<BlockCandidate> deserialize_candidate(tl_object_ptr<ton_api::collatorNode_Candidate> f,
|
||||
|
|
|
@ -2754,6 +2754,9 @@ void ValidatorManagerImpl::update_shard_client_block_handle(BlockHandle handle,
|
|||
last_liteserver_state_ = std::move(state);
|
||||
}
|
||||
}
|
||||
for (auto &c : collator_nodes_) {
|
||||
td::actor::send_closure(c.second.actor, &CollatorNode::update_shard_client_handle, shard_client_handle_);
|
||||
}
|
||||
shard_client_update(seqno);
|
||||
promise.set_value(td::Unit());
|
||||
}
|
||||
|
@ -3509,6 +3512,13 @@ void ValidatorManagerImpl::add_collator(adnl::AdnlNodeIdShort id, ShardIdFull sh
|
|||
if (it == collator_nodes_.end()) {
|
||||
it = collator_nodes_.emplace(id, Collator()).first;
|
||||
it->second.actor = td::actor::create_actor<CollatorNode>("collatornode", id, opts_, actor_id(this), adnl_, rldp_);
|
||||
if (last_masterchain_state_.not_null()) {
|
||||
td::actor::send_closure(it->second.actor, &CollatorNode::new_masterchain_block_notification,
|
||||
last_masterchain_state_);
|
||||
}
|
||||
if (shard_client_handle_) {
|
||||
td::actor::send_closure(it->second.actor, &CollatorNode::update_shard_client_handle, shard_client_handle_);
|
||||
}
|
||||
}
|
||||
if (!it->second.shards.insert(shard).second) {
|
||||
return;
|
||||
|
|
Loading…
Reference in a new issue