Add prometheus metrics for Central controllers (#1969)

* add header-only prometheus lib to ext

* rename folder

* Undo rename directory

* prometheus simpleapi included on mac & linux

* wip

* wire up some controller stats

* Get windows building with prometheus

* bsd build flags for prometheus

* Fix multiple network join from environment entrypoint.sh.release (#1961)

* _bond_m guards _bond, not _paths_m (#1965)

* Fix: warning: mutex '_aqm_m' is not held on every path through here [-Wthread-safety-analysis] (#1964)

* Serve prom metrics from /metrics endpoint

* Add prom metrics for Central controller specific things

* reorganize metric initialization

* testing out a labled gauge on Networks

* increment error counter on throw

* Consolidate metrics definitions

Put all metric definitions into node/Metrics.hpp.  Accessed as needed
from there.

* Revert "testing out a labled gauge on Networks"

This reverts commit 499ed6d95e11452019cdf48e32ed4cd878c2705b.

* still blows up but adding to the record for completeness right now

* Fix runtime issues with metrics

* Add metrics files to visual studio project

* Missed an "extern"

* add copyright headers to new files

* Add metrics for sent/received bytes (total)

* put /metrics endpoint behind auth

* sendto returns int on Win32

---------

Co-authored-by: Leonardo Amaral <leleobhz@users.noreply.github.com>
Co-authored-by: Brenton Bostick <bostick@gmail.com>
This commit is contained in:
Grant Limberg 2023-04-21 12:12:43 -07:00 committed by GitHub
parent 0b03ad9a21
commit 8e6e4ede6d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
62 changed files with 4023 additions and 25 deletions

View file

@ -19,6 +19,8 @@
#define _DEBUG(x)
#endif
#include "../node/Metrics.hpp"
#include <deque>
#include <set>
#include <memory>
@ -61,6 +63,7 @@ public:
{
while(m_pool.size() < m_minPoolSize){
m_pool.push_back(m_factory->create());
Metrics::pool_avail++;
}
};
@ -91,6 +94,7 @@ public:
while((m_pool.size() + m_borrowed.size()) < m_minPoolSize) {
std::shared_ptr<Connection> conn = m_factory->create();
m_pool.push_back(conn);
Metrics::pool_avail++;
}
if(m_pool.size()==0){
@ -99,8 +103,10 @@ public:
try {
std::shared_ptr<Connection> conn = m_factory->create();
m_borrowed.insert(conn);
Metrics::pool_in_use++;
return std::static_pointer_cast<T>(conn);
} catch (std::exception &e) {
Metrics::pool_errors++;
throw ConnectionUnavailable();
}
} else {
@ -116,11 +122,13 @@ public:
return std::static_pointer_cast<T>(conn);
} catch(std::exception& e) {
// Error creating a replacement connection
Metrics::pool_errors++;
throw ConnectionUnavailable();
}
}
}
// Nothing available
Metrics::pool_errors++;
throw ConnectionUnavailable();
}
}
@ -128,8 +136,10 @@ public:
// Take one off the front
std::shared_ptr<Connection> conn = m_pool.front();
m_pool.pop_front();
Metrics::pool_avail--;
// Add it to the borrowed list
m_borrowed.insert(conn);
Metrics::pool_in_use++;
return std::static_pointer_cast<T>(conn);
};
@ -143,7 +153,9 @@ public:
// Lock
std::unique_lock<std::mutex> lock(m_poolMutex);
m_borrowed.erase(conn);
Metrics::pool_in_use--;
if ((m_pool.size() + m_borrowed.size()) < m_maxPoolSize) {
Metrics::pool_avail++;
m_pool.push_back(conn);
}
};
@ -158,4 +170,4 @@ protected:
}
#endif
#endif

View file

@ -13,6 +13,7 @@
#include "DB.hpp"
#include "EmbeddedNetworkController.hpp"
#include "../node/Metrics.hpp"
#include <chrono>
#include <algorithm>
@ -211,16 +212,19 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
{
std::lock_guard<std::mutex> l(_networks_l);
auto nw2 = _networks.find(networkId);
if (nw2 != _networks.end())
if (nw2 != _networks.end()) {
nw = nw2->second;
}
}
if (nw) {
std::lock_guard<std::mutex> l(nw->lock);
if (OSUtils::jsonBool(old["activeBridge"],false))
if (OSUtils::jsonBool(old["activeBridge"],false)) {
nw->activeBridgeMembers.erase(memberId);
}
wasAuth = OSUtils::jsonBool(old["authorized"],false);
if (wasAuth)
if (wasAuth) {
nw->authorizedMembers.erase(memberId);
}
json &ips = old["ipAssignments"];
if (ips.is_array()) {
for(unsigned long i=0;i<ips.size();++i) {
@ -255,11 +259,14 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
nw->members[memberId] = memberConfig;
if (OSUtils::jsonBool(memberConfig["activeBridge"],false))
if (OSUtils::jsonBool(memberConfig["activeBridge"],false)) {
nw->activeBridgeMembers.insert(memberId);
}
isAuth = OSUtils::jsonBool(memberConfig["authorized"],false);
if (isAuth)
if (isAuth) {
Metrics::member_auths++;
nw->authorizedMembers.insert(memberId);
}
json &ips = memberConfig["ipAssignments"];
if (ips.is_array()) {
for(unsigned long i=0;i<ips.size();++i) {
@ -303,6 +310,24 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
}
}
if (notifyListeners) {
if(networkId != 0 && memberId != 0 && old.is_object() && !memberConfig.is_object()) {
// member delete
Metrics::member_count--;
} else if (networkId != 0 && memberId != 0 && !old.is_object() && memberConfig.is_object()) {
// new member
Metrics::member_count++;
}
if (!wasAuth && isAuth) {
Metrics::member_auths++;
} else if (wasAuth && !isAuth) {
Metrics::member_deauths++;
} else {
Metrics::member_changes++;
}
}
if ((notifyListeners)&&((wasAuth)&&(!isAuth)&&(networkId)&&(memberId))) {
std::lock_guard<std::mutex> ll(_changeListeners_l);
for(auto i=_changeListeners.begin();i!=_changeListeners.end();++i) {
@ -313,6 +338,16 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
void DB::_networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool notifyListeners)
{
if (notifyListeners) {
if (old.is_object() && old.contains("id") && networkConfig.is_object() && networkConfig.contains("id")) {
Metrics::network_changes++;
} else if (!old.is_object() && networkConfig.is_object() && networkConfig.contains("id")) {
Metrics::network_count++;
} else if (old.is_object() && old.contains("id") && !networkConfig.is_object()) {
Metrics::network_count--;
}
}
if (networkConfig.is_object()) {
const std::string ids = networkConfig["id"];
const uint64_t networkId = Utils::hexStrToU64(ids.c_str());

View file

@ -35,6 +35,8 @@
#include <nlohmann/json.hpp>
#include <prometheus/simpleapi.h>
#define ZT_MEMBER_AUTH_TIMEOUT_NOTIFY_BEFORE 25000
namespace ZeroTier

View file

@ -13,6 +13,8 @@
#include "FileDB.hpp"
#include "../node/Metrics.hpp"
namespace ZeroTier
{
@ -39,6 +41,7 @@ FileDB::FileDB(const char *path) :
if (nwids.length() == 16) {
nlohmann::json nullJson;
_networkChanged(nullJson,network,false);
Metrics::network_count++;
std::string membersPath(_networksPath + ZT_PATH_SEPARATOR_S + nwids + ZT_PATH_SEPARATOR_S "member");
std::vector<std::string> members(OSUtils::listDirectory(membersPath.c_str(),false));
for(auto m=members.begin();m!=members.end();++m) {
@ -50,6 +53,7 @@ FileDB::FileDB(const char *path) :
if (addrs.length() == 10) {
nlohmann::json nullJson2;
_memberChanged(nullJson2,member,false);
Metrics::member_count++;
}
} catch ( ... ) {}
}
@ -88,8 +92,9 @@ bool FileDB::save(nlohmann::json &record,bool notifyListeners)
if ((!old.is_object())||(!_compareRecords(old,record))) {
record["revision"] = OSUtils::jsonInt(record["revision"],0ULL) + 1ULL;
OSUtils::ztsnprintf(p1,sizeof(p1),"%s" ZT_PATH_SEPARATOR_S "%.16llx.json",_networksPath.c_str(),nwid);
if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1)))
if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1))) {
fprintf(stderr,"WARNING: controller unable to write to path: %s" ZT_EOL_S,p1);
}
_networkChanged(old,record,notifyListeners);
modified = true;
}
@ -110,8 +115,9 @@ bool FileDB::save(nlohmann::json &record,bool notifyListeners)
OSUtils::ztsnprintf(p2,sizeof(p2),"%s" ZT_PATH_SEPARATOR_S "%.16llx",_networksPath.c_str(),(unsigned long long)nwid);
OSUtils::mkdir(p2);
OSUtils::mkdir(pb);
if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1)))
if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1))) {
fprintf(stderr,"WARNING: controller unable to write to path: %s" ZT_EOL_S,p1);
}
}
_memberChanged(old,record,notifyListeners);
modified = true;

View file

@ -119,6 +119,7 @@ MemberNotificationReceiver::MemberNotificationReceiver(PostgreSQL *p, pqxx::conn
void MemberNotificationReceiver::operator() (const std::string &payload, int packend_pid) {
fprintf(stderr, "Member Notification received: %s\n", payload.c_str());
Metrics::pgsql_mem_notification++;
json tmp(json::parse(payload));
json &ov = tmp["old_val"];
json &nv = tmp["new_val"];
@ -141,6 +142,7 @@ NetworkNotificationReceiver::NetworkNotificationReceiver(PostgreSQL *p, pqxx::co
void NetworkNotificationReceiver::operator() (const std::string &payload, int packend_pid) {
fprintf(stderr, "Network Notification received: %s\n", payload.c_str());
Metrics::pgsql_net_notification++;
json tmp(json::parse(payload));
json &ov = tmp["old_val"];
json &nv = tmp["new_val"];
@ -705,6 +707,8 @@ void PostgreSQL::initializeNetworks()
}
}
Metrics::network_count++;
_networkChanged(empty, config, false);
auto end = std::chrono::high_resolution_clock::now();
@ -925,6 +929,8 @@ void PostgreSQL::initializeMembers()
}
}
Metrics::member_count++;
_memberChanged(empty, config, false);
memberId = "";
@ -1034,7 +1040,6 @@ void PostgreSQL::heartbeat()
w.commit();
} catch (std::exception &e) {
fprintf(stderr, "%s: Heartbeat update failed: %s\n", controllerId, e.what());
w.abort();
_pool->unborrow(c);
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
continue;
@ -1230,6 +1235,7 @@ void PostgreSQL::_networksWatcher_Redis() {
}
lastID = id;
}
Metrics::redis_net_notification++;
}
}
} catch (sw::redis::Error &e) {
@ -1788,6 +1794,7 @@ uint64_t PostgreSQL::_doRedisUpdate(sw::redis::Transaction &tx, std::string &con
.sadd("network-nodes-all:{"+controllerId+"}:"+networkId, memberId)
.hmset("member:{"+controllerId+"}:"+networkId+":"+memberId, record.begin(), record.end());
++count;
Metrics::redis_mem_notification++;
}
// expire records from all-nodes and network-nodes member list

View file

@ -26,6 +26,8 @@
#include <memory>
#include <redis++/redis++.h>
#include "../node/Metrics.hpp"
extern "C" {
typedef struct pg_conn PGconn;
}
@ -53,6 +55,7 @@ public:
}
virtual std::shared_ptr<Connection> create() {
Metrics::conn_counter++;
auto c = std::shared_ptr<PostgresConnection>(new PostgresConnection());
c->c = std::make_shared<pqxx::connection>(m_connString);
return std::static_pointer_cast<Connection>(c);