diff --git a/tl/generate/scheme/ton_api.tl b/tl/generate/scheme/ton_api.tl index 720f8d49..4bfd5788 100644 --- a/tl/generate/scheme/ton_api.tl +++ b/tl/generate/scheme/ton_api.tl @@ -750,7 +750,7 @@ engine.validator.perfTimerStats stats:(vector engine.validator.PerfTimerStatsByN engine.validator.shardOutQueueSize size:long = engine.validator.ShardOutQueueSize; engine.validator.collationManagerStats.shard shard_id:tonNode.shardId self_collate:Bool select_mode:string active:Bool collators:(vector int256) = engine.validator.collationManagerStats.Shard; -engine.validator.collationManagerStats.collator adnl_id:int256 active:Bool alive:Bool ping_in:double = engine.validator.collationManagerStats.Collator; +engine.validator.collationManagerStats.collator adnl_id:int256 active:Bool alive:Bool ping_in:double last_ping_ago:double last_ping_status:string = engine.validator.collationManagerStats.Collator; engine.validator.collationManagerStats.localId adnl_id:int256 shards:(vector engine.validator.collationManagerStats.shard) collators:(vector engine.validator.collationManagerStats.collator) = engine.validator.collationManagerStats.LocalId; engine.validator.collationManagerStats local_ids:(vector engine.validator.collationManagerStats.localId) = engine.validator.CollationManagerStats; diff --git a/tl/generate/scheme/ton_api.tlo b/tl/generate/scheme/ton_api.tlo index 19a8d131..d5ebffaa 100644 Binary files a/tl/generate/scheme/ton_api.tlo and b/tl/generate/scheme/ton_api.tlo differ diff --git a/validator-engine-console/validator-engine-console-query.cpp b/validator-engine-console/validator-engine-console-query.cpp index 625b6932..71eecf24 100644 --- a/validator-engine-console/validator-engine-console-query.cpp +++ b/validator-engine-console/validator-engine-console-query.cpp @@ -1775,8 +1775,24 @@ td::Status GetCollationManagerStatsQuery::receive(td::BufferSlice data) { if (collator == nullptr) { return td::Status::Error("collator not found"); } - td::TerminalIO::out() << " " << id << " alive=" << (int)collator->alive_ - << " ping_in=" << collator->ping_in_ << "\n"; + td::StringBuilder sb; + sb << " " << id << "\n"; + sb << " alive=" << (int)collator->alive_; + if (collator->active_) { + sb << " ping_in=" << td::StringBuilder::FixedDouble(std::max(collator->ping_in_, 0.0), 3); + } + sb << " last_ping_ago="; + if (collator->last_ping_ago_ < 0.0) { + sb << "never"; + } else { + std::string status = collator->last_ping_status_; + std::erase_if(status, [](char c) { return c < (char)32; }); + if (status.size() > 128) { + status.resize(128); + } + sb << td::StringBuilder::FixedDouble(collator->last_ping_ago_, 3) << ": " << status; + } + td::TerminalIO::out() << sb.as_cslice() << "\n"; } } } diff --git a/validator/collation-manager.cpp b/validator/collation-manager.cpp index e8447059..879b8d89 100644 --- a/validator/collation-manager.cpp +++ b/validator/collation-manager.cpp @@ -261,6 +261,8 @@ void CollationManager::get_stats( } else { obj->ping_in_ = -1.0; } + obj->last_ping_ago_ = collator.last_ping_at ? td::Time::now() - collator.last_ping_at.at() : -1.0; + obj->last_ping_status_ = collator.last_ping_status.is_ok() ? "OK" : collator.last_ping_status.message().str(); stats->collators_.push_back(std::move(obj)); } promise.set_value(std::move(stats)); @@ -323,7 +325,7 @@ void CollationManager::alarm() { td::actor::send_closure(SelfId, &CollationManager::got_pong, id, std::move(R)); }; LOG(DEBUG) << "sending ping to " << id; - td::actor::send_closure(rldp_, &rldp::Rldp::send_query, local_id_, id, "collatorping", std::move(P), + td::actor::send_closure(rldp_, &rldp::Rldp::send_query, local_id_, id, "ping", std::move(P), td::Timestamp::in(2.0), std::move(query)); } else { alarm_timestamp().relax(collator.ping_at); @@ -340,7 +342,7 @@ void CollationManager::got_pong(adnl::AdnlNodeIdShort id, td::Result td::Result> { - TRY_RESULT_PREFIX(data, std::move(R), "rldp query error: "); + TRY_RESULT(data, std::move(R)); auto r_error = fetch_tl_object(data, true); if (r_error.is_ok()) { auto error = r_error.move_as_ok(); @@ -348,12 +350,15 @@ void CollationManager::got_pong(adnl::AdnlNodeIdShort id, td::Result(data, true); }(); + collator.last_ping_at = td::Timestamp::now(); if (r_pong.is_error()) { - LOG(DEBUG) << "pong from " << id << " : " << r_pong.move_as_error(); + LOG(DEBUG) << "pong from " << id << " : " << r_pong.error(); collator.alive = false; + collator.last_ping_status = r_pong.move_as_error(); } else { LOG(DEBUG) << "pong from " << id << " : OK"; collator.alive = true; + collator.last_ping_status = td::Status::OK(); } collator.ping_at = td::Timestamp::in(td::Random::fast(10.0, 20.0)); if (collator.active_cnt && !collator.sent_ping) { diff --git a/validator/collation-manager.hpp b/validator/collation-manager.hpp index 9ca69814..0ca4617d 100644 --- a/validator/collation-manager.hpp +++ b/validator/collation-manager.hpp @@ -65,6 +65,8 @@ class CollationManager : public td::actor::Actor { td::Timestamp ping_at = td::Timestamp::now(); bool sent_ping = false; size_t active_cnt = 0; + td::Timestamp last_ping_at = td::Timestamp::never(); + td::Status last_ping_status = td::Status::Error("not pinged"); }; std::map collators_; diff --git a/validator/collator-node.cpp b/validator/collator-node.cpp index 1b8eb79e..968835c2 100644 --- a/validator/collator-node.cpp +++ b/validator/collator-node.cpp @@ -167,6 +167,10 @@ void CollatorNode::new_masterchain_block_notification(td::Ref } } +void CollatorNode::update_shard_client_handle(BlockHandle shard_client_handle) { + shard_client_handle_ = shard_client_handle; +} + void CollatorNode::update_validator_group_info(ShardIdFull shard, std::vector prev, CatchainSeqno cc_seqno) { if (!can_collate_shard(shard)) { @@ -225,7 +229,12 @@ void CollatorNode::update_validator_group_info(ShardIdFull shard, std::vector) {}); + auto S = check_out_of_sync(); + if (S.is_ok()) { + generate_block(shard, cc_seqno, info.prev, {}, td::Timestamp::in(10.0), [](td::Result) {}); + } else { + LOG(DEBUG) << "not generating block automatically: " << S; + } } return; } @@ -535,9 +544,22 @@ void CollatorNode::process_result(std::shared_ptr cache_entry, td::R cache_entry->promises.clear(); } +td::Status CollatorNode::check_out_of_sync() { + if (last_masterchain_state_.is_null() || !shard_client_handle_) { + return td::Status::Error("not inited"); + } + auto now = (UnixTime)td::Clocks::system(); + if (last_masterchain_state_->get_unix_time() < now - 60 || shard_client_handle_->unix_time() < now - 60) { + return td::Status::Error(PSTRING() << "out of sync: mc " << now - last_masterchain_state_->get_unix_time() + << "s ago, shardclient " << now - shard_client_handle_->unix_time() << "s ago"); + } + return td::Status::OK(); +} + void CollatorNode::process_ping(adnl::AdnlNodeIdShort src, ton_api::collatorNode_ping& ping, td::Promise promise) { LOG(DEBUG) << "got ping from " << src; + TRY_STATUS_PROMISE(promise, check_out_of_sync()); promise.set_result(create_serialize_tl_object(0)); } diff --git a/validator/collator-node.hpp b/validator/collator-node.hpp index 54876c35..cca77d26 100644 --- a/validator/collator-node.hpp +++ b/validator/collator-node.hpp @@ -36,6 +36,7 @@ class CollatorNode : public td::actor::Actor { void del_shard(ShardIdFull shard); void new_masterchain_block_notification(td::Ref state); + void update_shard_client_handle(BlockHandle shard_client_handle); void update_validator_group_info(ShardIdFull shard, std::vector prev, CatchainSeqno cc_seqno); void update_options(td::Ref opts) { @@ -84,6 +85,7 @@ class CollatorNode : public td::actor::Actor { std::map, FutureValidatorGroup> future_validator_groups_; td::Ref last_masterchain_state_; + BlockHandle shard_client_handle_; td::Result get_future_validator_group(ShardIdFull shard, CatchainSeqno cc_seqno); @@ -92,6 +94,8 @@ class CollatorNode : public td::actor::Actor { td::Promise promise); void process_result(std::shared_ptr cache_entry, td::Result R); + td::Status check_out_of_sync(); + public: static tl_object_ptr serialize_candidate(const BlockCandidate& block, bool compress); static td::Result deserialize_candidate(tl_object_ptr f, diff --git a/validator/manager.cpp b/validator/manager.cpp index 903e3155..b8394bdc 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -2754,6 +2754,9 @@ void ValidatorManagerImpl::update_shard_client_block_handle(BlockHandle handle, last_liteserver_state_ = std::move(state); } } + for (auto &c : collator_nodes_) { + td::actor::send_closure(c.second.actor, &CollatorNode::update_shard_client_handle, shard_client_handle_); + } shard_client_update(seqno); promise.set_value(td::Unit()); } @@ -3509,6 +3512,13 @@ void ValidatorManagerImpl::add_collator(adnl::AdnlNodeIdShort id, ShardIdFull sh if (it == collator_nodes_.end()) { it = collator_nodes_.emplace(id, Collator()).first; it->second.actor = td::actor::create_actor("collatornode", id, opts_, actor_id(this), adnl_, rldp_); + if (last_masterchain_state_.not_null()) { + td::actor::send_closure(it->second.actor, &CollatorNode::new_masterchain_block_notification, + last_masterchain_state_); + } + if (shard_client_handle_) { + td::actor::send_closure(it->second.actor, &CollatorNode::update_shard_client_handle, shard_client_handle_); + } } if (!it->second.shards.insert(shard).second) { return;