Add prometheus metrics for Central controllers (#1969)
* add header-only prometheus lib to ext * rename folder * Undo rename directory * prometheus simpleapi included on mac & linux * wip * wire up some controller stats * Get windows building with prometheus * bsd build flags for prometheus * Fix multiple network join from environment entrypoint.sh.release (#1961) * _bond_m guards _bond, not _paths_m (#1965) * Fix: warning: mutex '_aqm_m' is not held on every path through here [-Wthread-safety-analysis] (#1964) * Serve prom metrics from /metrics endpoint * Add prom metrics for Central controller specific things * reorganize metric initialization * testing out a labled gauge on Networks * increment error counter on throw * Consolidate metrics definitions Put all metric definitions into node/Metrics.hpp. Accessed as needed from there. * Revert "testing out a labled gauge on Networks" This reverts commit 499ed6d95e11452019cdf48e32ed4cd878c2705b. * still blows up but adding to the record for completeness right now * Fix runtime issues with metrics * Add metrics files to visual studio project * Missed an "extern" * add copyright headers to new files * Add metrics for sent/received bytes (total) * put /metrics endpoint behind auth * sendto returns int on Win32 --------- Co-authored-by: Leonardo Amaral <leleobhz@users.noreply.github.com> Co-authored-by: Brenton Bostick <bostick@gmail.com>
This commit is contained in:
parent
0b03ad9a21
commit
8e6e4ede6d
62 changed files with 4023 additions and 25 deletions
|
@ -19,6 +19,8 @@
|
|||
#define _DEBUG(x)
|
||||
#endif
|
||||
|
||||
#include "../node/Metrics.hpp"
|
||||
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <memory>
|
||||
|
@ -61,6 +63,7 @@ public:
|
|||
{
|
||||
while(m_pool.size() < m_minPoolSize){
|
||||
m_pool.push_back(m_factory->create());
|
||||
Metrics::pool_avail++;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -91,6 +94,7 @@ public:
|
|||
while((m_pool.size() + m_borrowed.size()) < m_minPoolSize) {
|
||||
std::shared_ptr<Connection> conn = m_factory->create();
|
||||
m_pool.push_back(conn);
|
||||
Metrics::pool_avail++;
|
||||
}
|
||||
|
||||
if(m_pool.size()==0){
|
||||
|
@ -99,8 +103,10 @@ public:
|
|||
try {
|
||||
std::shared_ptr<Connection> conn = m_factory->create();
|
||||
m_borrowed.insert(conn);
|
||||
Metrics::pool_in_use++;
|
||||
return std::static_pointer_cast<T>(conn);
|
||||
} catch (std::exception &e) {
|
||||
Metrics::pool_errors++;
|
||||
throw ConnectionUnavailable();
|
||||
}
|
||||
} else {
|
||||
|
@ -116,11 +122,13 @@ public:
|
|||
return std::static_pointer_cast<T>(conn);
|
||||
} catch(std::exception& e) {
|
||||
// Error creating a replacement connection
|
||||
Metrics::pool_errors++;
|
||||
throw ConnectionUnavailable();
|
||||
}
|
||||
}
|
||||
}
|
||||
// Nothing available
|
||||
Metrics::pool_errors++;
|
||||
throw ConnectionUnavailable();
|
||||
}
|
||||
}
|
||||
|
@ -128,8 +136,10 @@ public:
|
|||
// Take one off the front
|
||||
std::shared_ptr<Connection> conn = m_pool.front();
|
||||
m_pool.pop_front();
|
||||
Metrics::pool_avail--;
|
||||
// Add it to the borrowed list
|
||||
m_borrowed.insert(conn);
|
||||
Metrics::pool_in_use++;
|
||||
return std::static_pointer_cast<T>(conn);
|
||||
};
|
||||
|
||||
|
@ -143,7 +153,9 @@ public:
|
|||
// Lock
|
||||
std::unique_lock<std::mutex> lock(m_poolMutex);
|
||||
m_borrowed.erase(conn);
|
||||
Metrics::pool_in_use--;
|
||||
if ((m_pool.size() + m_borrowed.size()) < m_maxPoolSize) {
|
||||
Metrics::pool_avail++;
|
||||
m_pool.push_back(conn);
|
||||
}
|
||||
};
|
||||
|
@ -158,4 +170,4 @@ protected:
|
|||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include "DB.hpp"
|
||||
#include "EmbeddedNetworkController.hpp"
|
||||
#include "../node/Metrics.hpp"
|
||||
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
|
@ -211,16 +212,19 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
|
|||
{
|
||||
std::lock_guard<std::mutex> l(_networks_l);
|
||||
auto nw2 = _networks.find(networkId);
|
||||
if (nw2 != _networks.end())
|
||||
if (nw2 != _networks.end()) {
|
||||
nw = nw2->second;
|
||||
}
|
||||
}
|
||||
if (nw) {
|
||||
std::lock_guard<std::mutex> l(nw->lock);
|
||||
if (OSUtils::jsonBool(old["activeBridge"],false))
|
||||
if (OSUtils::jsonBool(old["activeBridge"],false)) {
|
||||
nw->activeBridgeMembers.erase(memberId);
|
||||
}
|
||||
wasAuth = OSUtils::jsonBool(old["authorized"],false);
|
||||
if (wasAuth)
|
||||
if (wasAuth) {
|
||||
nw->authorizedMembers.erase(memberId);
|
||||
}
|
||||
json &ips = old["ipAssignments"];
|
||||
if (ips.is_array()) {
|
||||
for(unsigned long i=0;i<ips.size();++i) {
|
||||
|
@ -255,11 +259,14 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
|
|||
|
||||
nw->members[memberId] = memberConfig;
|
||||
|
||||
if (OSUtils::jsonBool(memberConfig["activeBridge"],false))
|
||||
if (OSUtils::jsonBool(memberConfig["activeBridge"],false)) {
|
||||
nw->activeBridgeMembers.insert(memberId);
|
||||
}
|
||||
isAuth = OSUtils::jsonBool(memberConfig["authorized"],false);
|
||||
if (isAuth)
|
||||
if (isAuth) {
|
||||
Metrics::member_auths++;
|
||||
nw->authorizedMembers.insert(memberId);
|
||||
}
|
||||
json &ips = memberConfig["ipAssignments"];
|
||||
if (ips.is_array()) {
|
||||
for(unsigned long i=0;i<ips.size();++i) {
|
||||
|
@ -303,6 +310,24 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
|
|||
}
|
||||
}
|
||||
|
||||
if (notifyListeners) {
|
||||
if(networkId != 0 && memberId != 0 && old.is_object() && !memberConfig.is_object()) {
|
||||
// member delete
|
||||
Metrics::member_count--;
|
||||
} else if (networkId != 0 && memberId != 0 && !old.is_object() && memberConfig.is_object()) {
|
||||
// new member
|
||||
Metrics::member_count++;
|
||||
}
|
||||
|
||||
if (!wasAuth && isAuth) {
|
||||
Metrics::member_auths++;
|
||||
} else if (wasAuth && !isAuth) {
|
||||
Metrics::member_deauths++;
|
||||
} else {
|
||||
Metrics::member_changes++;
|
||||
}
|
||||
}
|
||||
|
||||
if ((notifyListeners)&&((wasAuth)&&(!isAuth)&&(networkId)&&(memberId))) {
|
||||
std::lock_guard<std::mutex> ll(_changeListeners_l);
|
||||
for(auto i=_changeListeners.begin();i!=_changeListeners.end();++i) {
|
||||
|
@ -313,6 +338,16 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
|
|||
|
||||
void DB::_networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool notifyListeners)
|
||||
{
|
||||
if (notifyListeners) {
|
||||
if (old.is_object() && old.contains("id") && networkConfig.is_object() && networkConfig.contains("id")) {
|
||||
Metrics::network_changes++;
|
||||
} else if (!old.is_object() && networkConfig.is_object() && networkConfig.contains("id")) {
|
||||
Metrics::network_count++;
|
||||
} else if (old.is_object() && old.contains("id") && !networkConfig.is_object()) {
|
||||
Metrics::network_count--;
|
||||
}
|
||||
}
|
||||
|
||||
if (networkConfig.is_object()) {
|
||||
const std::string ids = networkConfig["id"];
|
||||
const uint64_t networkId = Utils::hexStrToU64(ids.c_str());
|
||||
|
|
|
@ -35,6 +35,8 @@
|
|||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include <prometheus/simpleapi.h>
|
||||
|
||||
#define ZT_MEMBER_AUTH_TIMEOUT_NOTIFY_BEFORE 25000
|
||||
|
||||
namespace ZeroTier
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
|
||||
#include "FileDB.hpp"
|
||||
|
||||
#include "../node/Metrics.hpp"
|
||||
|
||||
namespace ZeroTier
|
||||
{
|
||||
|
||||
|
@ -39,6 +41,7 @@ FileDB::FileDB(const char *path) :
|
|||
if (nwids.length() == 16) {
|
||||
nlohmann::json nullJson;
|
||||
_networkChanged(nullJson,network,false);
|
||||
Metrics::network_count++;
|
||||
std::string membersPath(_networksPath + ZT_PATH_SEPARATOR_S + nwids + ZT_PATH_SEPARATOR_S "member");
|
||||
std::vector<std::string> members(OSUtils::listDirectory(membersPath.c_str(),false));
|
||||
for(auto m=members.begin();m!=members.end();++m) {
|
||||
|
@ -50,6 +53,7 @@ FileDB::FileDB(const char *path) :
|
|||
if (addrs.length() == 10) {
|
||||
nlohmann::json nullJson2;
|
||||
_memberChanged(nullJson2,member,false);
|
||||
Metrics::member_count++;
|
||||
}
|
||||
} catch ( ... ) {}
|
||||
}
|
||||
|
@ -88,8 +92,9 @@ bool FileDB::save(nlohmann::json &record,bool notifyListeners)
|
|||
if ((!old.is_object())||(!_compareRecords(old,record))) {
|
||||
record["revision"] = OSUtils::jsonInt(record["revision"],0ULL) + 1ULL;
|
||||
OSUtils::ztsnprintf(p1,sizeof(p1),"%s" ZT_PATH_SEPARATOR_S "%.16llx.json",_networksPath.c_str(),nwid);
|
||||
if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1)))
|
||||
if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1))) {
|
||||
fprintf(stderr,"WARNING: controller unable to write to path: %s" ZT_EOL_S,p1);
|
||||
}
|
||||
_networkChanged(old,record,notifyListeners);
|
||||
modified = true;
|
||||
}
|
||||
|
@ -110,8 +115,9 @@ bool FileDB::save(nlohmann::json &record,bool notifyListeners)
|
|||
OSUtils::ztsnprintf(p2,sizeof(p2),"%s" ZT_PATH_SEPARATOR_S "%.16llx",_networksPath.c_str(),(unsigned long long)nwid);
|
||||
OSUtils::mkdir(p2);
|
||||
OSUtils::mkdir(pb);
|
||||
if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1)))
|
||||
if (!OSUtils::writeFile(p1,OSUtils::jsonDump(record,-1))) {
|
||||
fprintf(stderr,"WARNING: controller unable to write to path: %s" ZT_EOL_S,p1);
|
||||
}
|
||||
}
|
||||
_memberChanged(old,record,notifyListeners);
|
||||
modified = true;
|
||||
|
|
|
@ -119,6 +119,7 @@ MemberNotificationReceiver::MemberNotificationReceiver(PostgreSQL *p, pqxx::conn
|
|||
|
||||
void MemberNotificationReceiver::operator() (const std::string &payload, int packend_pid) {
|
||||
fprintf(stderr, "Member Notification received: %s\n", payload.c_str());
|
||||
Metrics::pgsql_mem_notification++;
|
||||
json tmp(json::parse(payload));
|
||||
json &ov = tmp["old_val"];
|
||||
json &nv = tmp["new_val"];
|
||||
|
@ -141,6 +142,7 @@ NetworkNotificationReceiver::NetworkNotificationReceiver(PostgreSQL *p, pqxx::co
|
|||
|
||||
void NetworkNotificationReceiver::operator() (const std::string &payload, int packend_pid) {
|
||||
fprintf(stderr, "Network Notification received: %s\n", payload.c_str());
|
||||
Metrics::pgsql_net_notification++;
|
||||
json tmp(json::parse(payload));
|
||||
json &ov = tmp["old_val"];
|
||||
json &nv = tmp["new_val"];
|
||||
|
@ -705,6 +707,8 @@ void PostgreSQL::initializeNetworks()
|
|||
}
|
||||
}
|
||||
|
||||
Metrics::network_count++;
|
||||
|
||||
_networkChanged(empty, config, false);
|
||||
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
|
@ -925,6 +929,8 @@ void PostgreSQL::initializeMembers()
|
|||
}
|
||||
}
|
||||
|
||||
Metrics::member_count++;
|
||||
|
||||
_memberChanged(empty, config, false);
|
||||
|
||||
memberId = "";
|
||||
|
@ -1034,7 +1040,6 @@ void PostgreSQL::heartbeat()
|
|||
w.commit();
|
||||
} catch (std::exception &e) {
|
||||
fprintf(stderr, "%s: Heartbeat update failed: %s\n", controllerId, e.what());
|
||||
w.abort();
|
||||
_pool->unborrow(c);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
|
||||
continue;
|
||||
|
@ -1230,6 +1235,7 @@ void PostgreSQL::_networksWatcher_Redis() {
|
|||
}
|
||||
lastID = id;
|
||||
}
|
||||
Metrics::redis_net_notification++;
|
||||
}
|
||||
}
|
||||
} catch (sw::redis::Error &e) {
|
||||
|
@ -1788,6 +1794,7 @@ uint64_t PostgreSQL::_doRedisUpdate(sw::redis::Transaction &tx, std::string &con
|
|||
.sadd("network-nodes-all:{"+controllerId+"}:"+networkId, memberId)
|
||||
.hmset("member:{"+controllerId+"}:"+networkId+":"+memberId, record.begin(), record.end());
|
||||
++count;
|
||||
Metrics::redis_mem_notification++;
|
||||
}
|
||||
|
||||
// expire records from all-nodes and network-nodes member list
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
#include <memory>
|
||||
#include <redis++/redis++.h>
|
||||
|
||||
#include "../node/Metrics.hpp"
|
||||
|
||||
extern "C" {
|
||||
typedef struct pg_conn PGconn;
|
||||
}
|
||||
|
@ -53,6 +55,7 @@ public:
|
|||
}
|
||||
|
||||
virtual std::shared_ptr<Connection> create() {
|
||||
Metrics::conn_counter++;
|
||||
auto c = std::shared_ptr<PostgresConnection>(new PostgresConnection());
|
||||
c->c = std::make_shared<pqxx::connection>(m_connString);
|
||||
return std::static_pointer_cast<Connection>(c);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue