Add prometheus metrics for Central controllers (#1969)
* add header-only prometheus lib to ext * rename folder * Undo rename directory * prometheus simpleapi included on mac & linux * wip * wire up some controller stats * Get windows building with prometheus * bsd build flags for prometheus * Fix multiple network join from environment entrypoint.sh.release (#1961) * _bond_m guards _bond, not _paths_m (#1965) * Fix: warning: mutex '_aqm_m' is not held on every path through here [-Wthread-safety-analysis] (#1964) * Serve prom metrics from /metrics endpoint * Add prom metrics for Central controller specific things * reorganize metric initialization * testing out a labled gauge on Networks * increment error counter on throw * Consolidate metrics definitions Put all metric definitions into node/Metrics.hpp. Accessed as needed from there. * Revert "testing out a labled gauge on Networks" This reverts commit 499ed6d95e11452019cdf48e32ed4cd878c2705b. * still blows up but adding to the record for completeness right now * Fix runtime issues with metrics * Add metrics files to visual studio project * Missed an "extern" * add copyright headers to new files * Add metrics for sent/received bytes (total) * put /metrics endpoint behind auth * sendto returns int on Win32 --------- Co-authored-by: Leonardo Amaral <leleobhz@users.noreply.github.com> Co-authored-by: Brenton Bostick <bostick@gmail.com>
This commit is contained in:
parent
0b03ad9a21
commit
8e6e4ede6d
62 changed files with 4023 additions and 25 deletions
|
@ -87,6 +87,8 @@
|
|||
#include "../ext/http-parser/http_parser.h"
|
||||
#endif
|
||||
|
||||
#include "../node/Metrics.hpp"
|
||||
|
||||
#if ZT_VAULT_SUPPORT
|
||||
extern "C" {
|
||||
#include <curl/curl.h>
|
||||
|
@ -846,6 +848,10 @@ public:
|
|||
_ports[1] = 0;
|
||||
_ports[2] = 0;
|
||||
|
||||
prometheus::simpleapi::saver.set_registry(prometheus::simpleapi::registry_ptr);
|
||||
prometheus::simpleapi::saver.set_delay(std::chrono::seconds(5));
|
||||
prometheus::simpleapi::saver.set_out_file(_homePath + ZT_PATH_SEPARATOR + "metrics.prom");
|
||||
|
||||
#if ZT_VAULT_SUPPORT
|
||||
curl_global_init(CURL_GLOBAL_DEFAULT);
|
||||
#endif
|
||||
|
@ -1667,7 +1673,15 @@ public:
|
|||
|
||||
} else scode = 404;
|
||||
_node->freeQueryResult((void *)pl);
|
||||
} else scode = 500;
|
||||
} else scode = 500;\
|
||||
} else if (ps[0] == "metrics") {
|
||||
std::string statspath = _homePath + ZT_PATH_SEPARATOR + "metrics.prom";
|
||||
if (!OSUtils::readFile(statspath.c_str(), responseBody)) {
|
||||
scode = 500;
|
||||
} else {
|
||||
scode = 200;
|
||||
responseContentType = "text/plain";
|
||||
}
|
||||
} else {
|
||||
if (_controller) {
|
||||
scode = _controller->handleControlPlaneHttpGET(std::vector<std::string>(ps.begin()+1,ps.end()),urlArgs,headers,body,responseBody,responseContentType);
|
||||
|
@ -2456,9 +2470,11 @@ public:
|
|||
if (_forceTcpRelay) {
|
||||
return;
|
||||
}
|
||||
Metrics::udp_recv += len;
|
||||
const uint64_t now = OSUtils::now();
|
||||
if ((len >= 16)&&(reinterpret_cast<const InetAddress *>(from)->ipScope() == InetAddress::IP_SCOPE_GLOBAL))
|
||||
if ((len >= 16)&&(reinterpret_cast<const InetAddress *>(from)->ipScope() == InetAddress::IP_SCOPE_GLOBAL)) {
|
||||
_lastDirectReceiveFromGlobal = now;
|
||||
}
|
||||
const ZT_ResultCode rc = _node->processWirePacket(nullptr,now,reinterpret_cast<int64_t>(sock),reinterpret_cast<const struct sockaddr_storage *>(from),data,len,&_nextBackgroundTaskDeadline);
|
||||
if (ZT_ResultCode_isFatal(rc)) {
|
||||
char tmp[256];
|
||||
|
@ -2545,6 +2561,7 @@ public:
|
|||
{
|
||||
try {
|
||||
if (!len) return; // sanity check, should never happen
|
||||
Metrics::tcp_recv += len;
|
||||
TcpConnection *tc = reinterpret_cast<TcpConnection *>(*uptr);
|
||||
tc->lastReceive = OSUtils::now();
|
||||
switch(tc->type) {
|
||||
|
@ -2683,6 +2700,7 @@ public:
|
|||
Mutex::Lock _l(tc->writeq_m);
|
||||
if (tc->writeq.length() > 0) {
|
||||
long sent = (long)_phy.streamSend(sock,tc->writeq.data(),(unsigned long)tc->writeq.length(),true);
|
||||
Metrics::tcp_send += sent;
|
||||
if (sent > 0) {
|
||||
if ((unsigned long)sent >= (unsigned long)tc->writeq.length()) {
|
||||
tc->writeq.clear();
|
||||
|
@ -3221,9 +3239,13 @@ public:
|
|||
// working we can instantly "fail forward" to it and stop using TCP
|
||||
// proxy fallback, which is slow.
|
||||
if ((localSocket != -1)&&(localSocket != 0)&&(_binder.isUdpSocketValid((PhySocket *)((uintptr_t)localSocket)))) {
|
||||
if ((ttl)&&(addr->ss_family == AF_INET)) _phy.setIp4UdpTtl((PhySocket *)((uintptr_t)localSocket),ttl);
|
||||
if ((ttl)&&(addr->ss_family == AF_INET)) {
|
||||
_phy.setIp4UdpTtl((PhySocket *)((uintptr_t)localSocket),ttl);
|
||||
}
|
||||
const bool r = _phy.udpSend((PhySocket *)((uintptr_t)localSocket),(const struct sockaddr *)addr,data,len);
|
||||
if ((ttl)&&(addr->ss_family == AF_INET)) _phy.setIp4UdpTtl((PhySocket *)((uintptr_t)localSocket),255);
|
||||
if ((ttl)&&(addr->ss_family == AF_INET)) {
|
||||
_phy.setIp4UdpTtl((PhySocket *)((uintptr_t)localSocket),255);
|
||||
}
|
||||
return ((r) ? 0 : -1);
|
||||
} else {
|
||||
return ((_binder.udpSendAll(_phy,addr,data,len,ttl)) ? 0 : -1);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue