mirror of
https://github.com/ton-blockchain/ton
synced 2025-02-15 04:32:21 +00:00
Improve collator node pings and collation manager stats
This commit is contained in:
parent
5fae8db7a0
commit
923f1cd69b
8 changed files with 66 additions and 7 deletions
|
@ -750,7 +750,7 @@ engine.validator.perfTimerStats stats:(vector engine.validator.PerfTimerStatsByN
|
||||||
engine.validator.shardOutQueueSize size:long = engine.validator.ShardOutQueueSize;
|
engine.validator.shardOutQueueSize size:long = engine.validator.ShardOutQueueSize;
|
||||||
|
|
||||||
engine.validator.collationManagerStats.shard shard_id:tonNode.shardId self_collate:Bool select_mode:string active:Bool collators:(vector int256) = engine.validator.collationManagerStats.Shard;
|
engine.validator.collationManagerStats.shard shard_id:tonNode.shardId self_collate:Bool select_mode:string active:Bool collators:(vector int256) = engine.validator.collationManagerStats.Shard;
|
||||||
engine.validator.collationManagerStats.collator adnl_id:int256 active:Bool alive:Bool ping_in:double = engine.validator.collationManagerStats.Collator;
|
engine.validator.collationManagerStats.collator adnl_id:int256 active:Bool alive:Bool ping_in:double last_ping_ago:double last_ping_status:string = engine.validator.collationManagerStats.Collator;
|
||||||
engine.validator.collationManagerStats.localId adnl_id:int256 shards:(vector engine.validator.collationManagerStats.shard)
|
engine.validator.collationManagerStats.localId adnl_id:int256 shards:(vector engine.validator.collationManagerStats.shard)
|
||||||
collators:(vector engine.validator.collationManagerStats.collator) = engine.validator.collationManagerStats.LocalId;
|
collators:(vector engine.validator.collationManagerStats.collator) = engine.validator.collationManagerStats.LocalId;
|
||||||
engine.validator.collationManagerStats local_ids:(vector engine.validator.collationManagerStats.localId) = engine.validator.CollationManagerStats;
|
engine.validator.collationManagerStats local_ids:(vector engine.validator.collationManagerStats.localId) = engine.validator.CollationManagerStats;
|
||||||
|
|
Binary file not shown.
|
@ -1775,8 +1775,24 @@ td::Status GetCollationManagerStatsQuery::receive(td::BufferSlice data) {
|
||||||
if (collator == nullptr) {
|
if (collator == nullptr) {
|
||||||
return td::Status::Error("collator not found");
|
return td::Status::Error("collator not found");
|
||||||
}
|
}
|
||||||
td::TerminalIO::out() << " " << id << " alive=" << (int)collator->alive_
|
td::StringBuilder sb;
|
||||||
<< " ping_in=" << collator->ping_in_ << "\n";
|
sb << " " << id << "\n";
|
||||||
|
sb << " alive=" << (int)collator->alive_;
|
||||||
|
if (collator->active_) {
|
||||||
|
sb << " ping_in=" << td::StringBuilder::FixedDouble(std::max(collator->ping_in_, 0.0), 3);
|
||||||
|
}
|
||||||
|
sb << " last_ping_ago=";
|
||||||
|
if (collator->last_ping_ago_ < 0.0) {
|
||||||
|
sb << "never";
|
||||||
|
} else {
|
||||||
|
std::string status = collator->last_ping_status_;
|
||||||
|
std::erase_if(status, [](char c) { return c < (char)32; });
|
||||||
|
if (status.size() > 128) {
|
||||||
|
status.resize(128);
|
||||||
|
}
|
||||||
|
sb << td::StringBuilder::FixedDouble(collator->last_ping_ago_, 3) << ": " << status;
|
||||||
|
}
|
||||||
|
td::TerminalIO::out() << sb.as_cslice() << "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -261,6 +261,8 @@ void CollationManager::get_stats(
|
||||||
} else {
|
} else {
|
||||||
obj->ping_in_ = -1.0;
|
obj->ping_in_ = -1.0;
|
||||||
}
|
}
|
||||||
|
obj->last_ping_ago_ = collator.last_ping_at ? td::Time::now() - collator.last_ping_at.at() : -1.0;
|
||||||
|
obj->last_ping_status_ = collator.last_ping_status.is_ok() ? "OK" : collator.last_ping_status.message().str();
|
||||||
stats->collators_.push_back(std::move(obj));
|
stats->collators_.push_back(std::move(obj));
|
||||||
}
|
}
|
||||||
promise.set_value(std::move(stats));
|
promise.set_value(std::move(stats));
|
||||||
|
@ -323,7 +325,7 @@ void CollationManager::alarm() {
|
||||||
td::actor::send_closure(SelfId, &CollationManager::got_pong, id, std::move(R));
|
td::actor::send_closure(SelfId, &CollationManager::got_pong, id, std::move(R));
|
||||||
};
|
};
|
||||||
LOG(DEBUG) << "sending ping to " << id;
|
LOG(DEBUG) << "sending ping to " << id;
|
||||||
td::actor::send_closure(rldp_, &rldp::Rldp::send_query, local_id_, id, "collatorping", std::move(P),
|
td::actor::send_closure(rldp_, &rldp::Rldp::send_query, local_id_, id, "ping", std::move(P),
|
||||||
td::Timestamp::in(2.0), std::move(query));
|
td::Timestamp::in(2.0), std::move(query));
|
||||||
} else {
|
} else {
|
||||||
alarm_timestamp().relax(collator.ping_at);
|
alarm_timestamp().relax(collator.ping_at);
|
||||||
|
@ -340,7 +342,7 @@ void CollationManager::got_pong(adnl::AdnlNodeIdShort id, td::Result<td::BufferS
|
||||||
collator.sent_ping = false;
|
collator.sent_ping = false;
|
||||||
|
|
||||||
auto r_pong = [&]() -> td::Result<tl_object_ptr<ton_api::collatorNode_pong>> {
|
auto r_pong = [&]() -> td::Result<tl_object_ptr<ton_api::collatorNode_pong>> {
|
||||||
TRY_RESULT_PREFIX(data, std::move(R), "rldp query error: ");
|
TRY_RESULT(data, std::move(R));
|
||||||
auto r_error = fetch_tl_object<ton_api::collatorNode_error>(data, true);
|
auto r_error = fetch_tl_object<ton_api::collatorNode_error>(data, true);
|
||||||
if (r_error.is_ok()) {
|
if (r_error.is_ok()) {
|
||||||
auto error = r_error.move_as_ok();
|
auto error = r_error.move_as_ok();
|
||||||
|
@ -348,12 +350,15 @@ void CollationManager::got_pong(adnl::AdnlNodeIdShort id, td::Result<td::BufferS
|
||||||
}
|
}
|
||||||
return fetch_tl_object<ton_api::collatorNode_pong>(data, true);
|
return fetch_tl_object<ton_api::collatorNode_pong>(data, true);
|
||||||
}();
|
}();
|
||||||
|
collator.last_ping_at = td::Timestamp::now();
|
||||||
if (r_pong.is_error()) {
|
if (r_pong.is_error()) {
|
||||||
LOG(DEBUG) << "pong from " << id << " : " << r_pong.move_as_error();
|
LOG(DEBUG) << "pong from " << id << " : " << r_pong.error();
|
||||||
collator.alive = false;
|
collator.alive = false;
|
||||||
|
collator.last_ping_status = r_pong.move_as_error();
|
||||||
} else {
|
} else {
|
||||||
LOG(DEBUG) << "pong from " << id << " : OK";
|
LOG(DEBUG) << "pong from " << id << " : OK";
|
||||||
collator.alive = true;
|
collator.alive = true;
|
||||||
|
collator.last_ping_status = td::Status::OK();
|
||||||
}
|
}
|
||||||
collator.ping_at = td::Timestamp::in(td::Random::fast(10.0, 20.0));
|
collator.ping_at = td::Timestamp::in(td::Random::fast(10.0, 20.0));
|
||||||
if (collator.active_cnt && !collator.sent_ping) {
|
if (collator.active_cnt && !collator.sent_ping) {
|
||||||
|
|
|
@ -65,6 +65,8 @@ class CollationManager : public td::actor::Actor {
|
||||||
td::Timestamp ping_at = td::Timestamp::now();
|
td::Timestamp ping_at = td::Timestamp::now();
|
||||||
bool sent_ping = false;
|
bool sent_ping = false;
|
||||||
size_t active_cnt = 0;
|
size_t active_cnt = 0;
|
||||||
|
td::Timestamp last_ping_at = td::Timestamp::never();
|
||||||
|
td::Status last_ping_status = td::Status::Error("not pinged");
|
||||||
};
|
};
|
||||||
std::map<adnl::AdnlNodeIdShort, CollatorInfo> collators_;
|
std::map<adnl::AdnlNodeIdShort, CollatorInfo> collators_;
|
||||||
|
|
||||||
|
|
|
@ -167,6 +167,10 @@ void CollatorNode::new_masterchain_block_notification(td::Ref<MasterchainState>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CollatorNode::update_shard_client_handle(BlockHandle shard_client_handle) {
|
||||||
|
shard_client_handle_ = shard_client_handle;
|
||||||
|
}
|
||||||
|
|
||||||
void CollatorNode::update_validator_group_info(ShardIdFull shard, std::vector<BlockIdExt> prev,
|
void CollatorNode::update_validator_group_info(ShardIdFull shard, std::vector<BlockIdExt> prev,
|
||||||
CatchainSeqno cc_seqno) {
|
CatchainSeqno cc_seqno) {
|
||||||
if (!can_collate_shard(shard)) {
|
if (!can_collate_shard(shard)) {
|
||||||
|
@ -225,7 +229,12 @@ void CollatorNode::update_validator_group_info(ShardIdFull shard, std::vector<Bl
|
||||||
}
|
}
|
||||||
++cache_it;
|
++cache_it;
|
||||||
}
|
}
|
||||||
generate_block(shard, cc_seqno, info.prev, {}, td::Timestamp::in(10.0), [](td::Result<BlockCandidate>) {});
|
auto S = check_out_of_sync();
|
||||||
|
if (S.is_ok()) {
|
||||||
|
generate_block(shard, cc_seqno, info.prev, {}, td::Timestamp::in(10.0), [](td::Result<BlockCandidate>) {});
|
||||||
|
} else {
|
||||||
|
LOG(DEBUG) << "not generating block automatically: " << S;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -535,9 +544,22 @@ void CollatorNode::process_result(std::shared_ptr<CacheEntry> cache_entry, td::R
|
||||||
cache_entry->promises.clear();
|
cache_entry->promises.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
td::Status CollatorNode::check_out_of_sync() {
|
||||||
|
if (last_masterchain_state_.is_null() || !shard_client_handle_) {
|
||||||
|
return td::Status::Error("not inited");
|
||||||
|
}
|
||||||
|
auto now = (UnixTime)td::Clocks::system();
|
||||||
|
if (last_masterchain_state_->get_unix_time() < now - 60 || shard_client_handle_->unix_time() < now - 60) {
|
||||||
|
return td::Status::Error(PSTRING() << "out of sync: mc " << now - last_masterchain_state_->get_unix_time()
|
||||||
|
<< "s ago, shardclient " << now - shard_client_handle_->unix_time() << "s ago");
|
||||||
|
}
|
||||||
|
return td::Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
void CollatorNode::process_ping(adnl::AdnlNodeIdShort src, ton_api::collatorNode_ping& ping,
|
void CollatorNode::process_ping(adnl::AdnlNodeIdShort src, ton_api::collatorNode_ping& ping,
|
||||||
td::Promise<td::BufferSlice> promise) {
|
td::Promise<td::BufferSlice> promise) {
|
||||||
LOG(DEBUG) << "got ping from " << src;
|
LOG(DEBUG) << "got ping from " << src;
|
||||||
|
TRY_STATUS_PROMISE(promise, check_out_of_sync());
|
||||||
promise.set_result(create_serialize_tl_object<ton_api::collatorNode_pong>(0));
|
promise.set_result(create_serialize_tl_object<ton_api::collatorNode_pong>(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,7 @@ class CollatorNode : public td::actor::Actor {
|
||||||
void del_shard(ShardIdFull shard);
|
void del_shard(ShardIdFull shard);
|
||||||
|
|
||||||
void new_masterchain_block_notification(td::Ref<MasterchainState> state);
|
void new_masterchain_block_notification(td::Ref<MasterchainState> state);
|
||||||
|
void update_shard_client_handle(BlockHandle shard_client_handle);
|
||||||
void update_validator_group_info(ShardIdFull shard, std::vector<BlockIdExt> prev, CatchainSeqno cc_seqno);
|
void update_validator_group_info(ShardIdFull shard, std::vector<BlockIdExt> prev, CatchainSeqno cc_seqno);
|
||||||
|
|
||||||
void update_options(td::Ref<ValidatorManagerOptions> opts) {
|
void update_options(td::Ref<ValidatorManagerOptions> opts) {
|
||||||
|
@ -84,6 +85,7 @@ class CollatorNode : public td::actor::Actor {
|
||||||
std::map<std::pair<ShardIdFull, CatchainSeqno>, FutureValidatorGroup> future_validator_groups_;
|
std::map<std::pair<ShardIdFull, CatchainSeqno>, FutureValidatorGroup> future_validator_groups_;
|
||||||
|
|
||||||
td::Ref<MasterchainState> last_masterchain_state_;
|
td::Ref<MasterchainState> last_masterchain_state_;
|
||||||
|
BlockHandle shard_client_handle_;
|
||||||
|
|
||||||
td::Result<FutureValidatorGroup*> get_future_validator_group(ShardIdFull shard, CatchainSeqno cc_seqno);
|
td::Result<FutureValidatorGroup*> get_future_validator_group(ShardIdFull shard, CatchainSeqno cc_seqno);
|
||||||
|
|
||||||
|
@ -92,6 +94,8 @@ class CollatorNode : public td::actor::Actor {
|
||||||
td::Promise<BlockCandidate> promise);
|
td::Promise<BlockCandidate> promise);
|
||||||
void process_result(std::shared_ptr<CacheEntry> cache_entry, td::Result<BlockCandidate> R);
|
void process_result(std::shared_ptr<CacheEntry> cache_entry, td::Result<BlockCandidate> R);
|
||||||
|
|
||||||
|
td::Status check_out_of_sync();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static tl_object_ptr<ton_api::collatorNode_Candidate> serialize_candidate(const BlockCandidate& block, bool compress);
|
static tl_object_ptr<ton_api::collatorNode_Candidate> serialize_candidate(const BlockCandidate& block, bool compress);
|
||||||
static td::Result<BlockCandidate> deserialize_candidate(tl_object_ptr<ton_api::collatorNode_Candidate> f,
|
static td::Result<BlockCandidate> deserialize_candidate(tl_object_ptr<ton_api::collatorNode_Candidate> f,
|
||||||
|
|
|
@ -2754,6 +2754,9 @@ void ValidatorManagerImpl::update_shard_client_block_handle(BlockHandle handle,
|
||||||
last_liteserver_state_ = std::move(state);
|
last_liteserver_state_ = std::move(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (auto &c : collator_nodes_) {
|
||||||
|
td::actor::send_closure(c.second.actor, &CollatorNode::update_shard_client_handle, shard_client_handle_);
|
||||||
|
}
|
||||||
shard_client_update(seqno);
|
shard_client_update(seqno);
|
||||||
promise.set_value(td::Unit());
|
promise.set_value(td::Unit());
|
||||||
}
|
}
|
||||||
|
@ -3509,6 +3512,13 @@ void ValidatorManagerImpl::add_collator(adnl::AdnlNodeIdShort id, ShardIdFull sh
|
||||||
if (it == collator_nodes_.end()) {
|
if (it == collator_nodes_.end()) {
|
||||||
it = collator_nodes_.emplace(id, Collator()).first;
|
it = collator_nodes_.emplace(id, Collator()).first;
|
||||||
it->second.actor = td::actor::create_actor<CollatorNode>("collatornode", id, opts_, actor_id(this), adnl_, rldp_);
|
it->second.actor = td::actor::create_actor<CollatorNode>("collatornode", id, opts_, actor_id(this), adnl_, rldp_);
|
||||||
|
if (last_masterchain_state_.not_null()) {
|
||||||
|
td::actor::send_closure(it->second.actor, &CollatorNode::new_masterchain_block_notification,
|
||||||
|
last_masterchain_state_);
|
||||||
|
}
|
||||||
|
if (shard_client_handle_) {
|
||||||
|
td::actor::send_closure(it->second.actor, &CollatorNode::update_shard_client_handle, shard_client_handle_);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!it->second.shards.insert(shard).second) {
|
if (!it->second.shards.insert(shard).second) {
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Reference in a new issue