Merge branch 'dev' into dns

This commit is contained in:
Grant Limberg 2020-07-30 13:15:43 -07:00
commit 6b197e067a
No known key found for this signature in database
GPG key ID: 2BA62CCABBB4095A
33 changed files with 5419 additions and 1436 deletions

1772
node/Bond.cpp Normal file

File diff suppressed because it is too large Load diff

707
node/Bond.hpp Normal file
View file

@ -0,0 +1,707 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_BOND_HPP
#define ZT_BOND_HPP
#include <map>
#include "Path.hpp"
#include "Peer.hpp"
#include "../osdep/Link.hpp"
#include "Flow.hpp"
namespace ZeroTier {
class RuntimeEnvironment;
class Link;
class Bond
{
friend class SharedPtr<Bond>;
friend class Peer;
friend class BondController;
struct PathQualityComparator
{
bool operator ()(const SharedPtr<Path> & a, const SharedPtr<Path> & b)
{
if(a->_failoverScore == b->_failoverScore) {
return a < b;
}
return a->_failoverScore > b->_failoverScore;
}
};
public:
// TODO: Remove
bool _header;
int64_t _lastLogTS;
int64_t _lastPrintTS;
void dumpInfo(const int64_t now);
bool relevant();
SharedPtr<Link> getLink(const SharedPtr<Path>& path);
/**
* Constructor. Creates a bond based off of ZT defaults
*
* @param renv Runtime environment
* @param policy Bonding policy
* @param peer
*/
Bond(const RuntimeEnvironment *renv, int policy, const SharedPtr<Peer>& peer);
/**
* Constructor. For use when user intends to manually specify parameters
*
* @param basePolicy
* @param policyAlias
* @param peer
*/
Bond(const RuntimeEnvironment *renv, std::string& basePolicy, std::string& policyAlias, const SharedPtr<Peer>& peer);
/**
* Constructor. Creates a bond based off of a user-defined bond template
*
* @param renv Runtime environment
* @param original
* @param peer
*/
Bond(const RuntimeEnvironment *renv, SharedPtr<Bond> originalBond, const SharedPtr<Peer>& peer);
/**
* @return The human-readable name of the bonding policy
*/
std::string policyAlias() { return _policyAlias; }
/**
* Inform the bond about the path that its peer (owning object) just learned about
*
* @param path Newly-learned Path which should now be handled by the Bond
* @param now Current time
*/
void nominatePath(const SharedPtr<Path>& path, int64_t now);
/**
* Propagate and memoize often-used bonding preferences for each path
*/
void applyUserPrefs();
/**
* Check path states and perform bond rebuilds if needed.
*
* @param now Current time
* @param rebuild Whether or not the bond should be reconstructed.
*/
void curateBond(const int64_t now, bool rebuild);
/**
* Periodically perform statistical summaries of quality metrics for all paths.
*
* @param now Current time
*/
void estimatePathQuality(int64_t now);
/**
* Record an invalid incoming packet. This packet failed
* MAC/compression/cipher checks and will now contribute to a
* Packet Error Ratio (PER).
*
* @param path Path over which packet was received
*/
void recordIncomingInvalidPacket(const SharedPtr<Path>& path);
/**
* Record statistics on outbound an packet.
*
* @param path Path over which packet is being sent
* @param packetId Packet ID
* @param payloadLength Packet data length
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void recordOutgoingPacket(const SharedPtr<Path> &path, uint64_t packetId,
uint16_t payloadLength, Packet::Verb verb, int32_t flowId, int64_t now);
/**
* Process the contents of an inbound VERB_QOS_MEASUREMENT to gather path quality observations.
*
* @param now Current time
* @param count Number of records
* @param rx_id table of packet IDs
* @param rx_ts table of holding times
*/
void receivedQoS(const SharedPtr<Path>& path, int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts);
/**
* Process the contents of an inbound VERB_ACK to gather path quality observations.
*
* @param path Path over which packet was received
* @param now Current time
* @param ackedBytes Number of bytes ACKed by this VERB_ACK
*/
void receivedAck(const SharedPtr<Path>& path, int64_t now, int32_t ackedBytes);
/**
* Generate the contents of a VERB_QOS_MEASUREMENT packet.
*
* @param now Current time
* @param qosBuffer destination buffer
* @return Size of payload
*/
int32_t generateQoSPacket(const SharedPtr<Path>& path, int64_t now, char *qosBuffer);
/**
* Record statistics for an inbound packet.
*
* @param path Path over which packet was received
* @param packetId Packet ID
* @param payloadLength Packet data length
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void recordIncomingPacket(const SharedPtr<Path>& path, uint64_t packetId, uint16_t payloadLength,
Packet::Verb verb, int32_t flowId, int64_t now);
/**
* Determines the most appropriate path for packet and flow egress. This decision is made by
* the underlying bonding policy as well as QoS-related statistical observations of path quality.
*
* @param now Current time
* @param flowId Flow ID
* @return Pointer to suggested Path
*/
SharedPtr<Path> getAppropriatePath(int64_t now, int32_t flowId);
/**
* Creates a new flow record
*
* @param path Path over which flow shall be handled
* @param flowId Flow ID
* @param entropy A byte of entropy to be used by the bonding algorithm
* @param now Current time
* @return Pointer to newly-created Flow
*/
SharedPtr<Flow> createFlow(const SharedPtr<Path> &path, int32_t flowId, unsigned char entropy, int64_t now);
/**
* Removes flow records that are past a certain age limit.
*
* @param age Age threshold to be forgotten
* @param oldest Whether only the oldest shall be forgotten
* @param now Current time
*/
void forgetFlowsWhenNecessary(uint64_t age, bool oldest, int64_t now);
/**
* Assigns a new flow to a bonded path
*
* @param flow Flow to be assigned
* @param now Current time
*/
bool assignFlowToBondedPath(SharedPtr<Flow> &flow, int64_t now);
/**
* Determine whether a path change should occur given the remote peer's reported utility and our
* local peer's known utility. This has the effect of assigning inbound and outbound traffic to
* the same path.
*
* @param now Current time
* @param path Path over which the negotiation request was received
* @param remoteUtility How much utility the remote peer claims to gain by using the declared path
*/
void processIncomingPathNegotiationRequest(uint64_t now, SharedPtr<Path> &path, int16_t remoteUtility);
/**
* Determine state of path synchronization and whether a negotiation request
* shall be sent to the peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param now Current time
*/
void pathNegotiationCheck(void *tPtr, const int64_t now);
/**
* Sends a VERB_ACK to the remote peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param path Path over which packet should be sent
* @param localSocket Local source socket
* @param atAddress
* @param now Current time
*/
void sendACK(void *tPtr,const SharedPtr<Path> &path,int64_t localSocket,
const InetAddress &atAddress,int64_t now);
/**
* Sends a VERB_QOS_MEASUREMENT to the remote peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param path Path over which packet should be sent
* @param localSocket Local source socket
* @param atAddress
* @param now Current time
*/
void sendQOS_MEASUREMENT(void *tPtr,const SharedPtr<Path> &path,int64_t localSocket,
const InetAddress &atAddress,int64_t now);
/**
* Sends a VERB_PATH_NEGOTIATION_REQUEST to the remote peer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param path Path over which packet should be sent
*/
void sendPATH_NEGOTIATION_REQUEST(void *tPtr, const SharedPtr<Path> &path);
/**
*
* @param now Current time
*/
void processBalanceTasks(int64_t now);
/**
* Perform periodic tasks unique to active-backup
*
* @param now Current time
*/
void processActiveBackupTasks(int64_t now);
/**
* Switches the active link in an active-backup scenario to the next best during
* a failover event.
*
* @param now Current time
*/
void dequeueNextActiveBackupPath(uint64_t now);
/**
* Set bond parameters to reasonable defaults, these may later be overwritten by
* user-specified parameters.
*
* @param policy Bonding policy
* @param templateBond
*/
void setReasonableDefaults(int policy, SharedPtr<Bond> templateBond, bool useTemplate);
/**
* Check and assign user-specified quality weights to this bond.
*
* @param weights Set of user-specified weights
* @param len Length of weight vector
*/
void setUserQualityWeights(float weights[], int len);
/**
* @param latencyInMilliseconds Maximum acceptable latency.
*/
void setMaxAcceptableLatency(int16_t latencyInMilliseconds) {
_maxAcceptableLatency = latencyInMilliseconds;
}
/**
* @param latencyInMilliseconds Maximum acceptable (mean) latency.
*/
void setMaxAcceptableMeanLatency(int16_t latencyInMilliseconds) {
_maxAcceptableMeanLatency = latencyInMilliseconds;
}
/**
* @param latencyVarianceInMilliseconds Maximum acceptable packet delay variance (jitter).
*/
void setMaxAcceptablePacketDelayVariance(int16_t latencyVarianceInMilliseconds) {
_maxAcceptablePacketDelayVariance = latencyVarianceInMilliseconds;
}
/**
* @param lossRatio Maximum acceptable packet loss ratio (PLR).
*/
void setMaxAcceptablePacketLossRatio(float lossRatio) {
_maxAcceptablePacketLossRatio = lossRatio;
}
/**
* @param errorRatio Maximum acceptable packet error ratio (PER).
*/
void setMaxAcceptablePacketErrorRatio(float errorRatio) {
_maxAcceptablePacketErrorRatio = errorRatio;
}
/**
* @param errorRatio Maximum acceptable packet error ratio (PER).
*/
void setMinAcceptableAllocation(float minAlloc) {
_minAcceptableAllocation = minAlloc * 255;
}
/**
* @return Whether the user has defined links for use on this bond
*/
inline bool userHasSpecifiedLinks() { return _userHasSpecifiedLinks; }
/**
* @return Whether the user has defined a set of failover link(s) for this bond
*/
inline bool userHasSpecifiedFailoverInstructions() { return _userHasSpecifiedFailoverInstructions; };
/**
* @return Whether the user has specified a primary link
*/
inline bool userHasSpecifiedPrimaryLink() { return _userHasSpecifiedPrimaryLink; }
/**
* @return Whether the user has specified link speeds
*/
inline bool userHasSpecifiedLinkSpeeds() { return _userHasSpecifiedLinkSpeeds; }
/**
* Periodically perform maintenance tasks for each active bond.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param now Current time
*/
void processBackgroundTasks(void *tPtr, int64_t now);
/**
* Rate limit gate for VERB_ACK
*
* @param now Current time
* @return Whether the incoming packet should be rate-gated
*/
inline bool rateGateACK(const int64_t now)
{
_ackCutoffCount++;
int numToDrain = _lastAckRateCheck ? (now - _lastAckRateCheck) / ZT_ACK_DRAINAGE_DIVISOR : _ackCutoffCount;
_lastAckRateCheck = now;
if (_ackCutoffCount > numToDrain) {
_ackCutoffCount-=numToDrain;
} else {
_ackCutoffCount = 0;
}
return (_ackCutoffCount < ZT_ACK_CUTOFF_LIMIT);
}
/**
* Rate limit gate for VERB_QOS_MEASUREMENT
*
* @param now Current time
* @return Whether the incoming packet should be rate-gated
*/
inline bool rateGateQoS(const int64_t now)
{
_qosCutoffCount++;
int numToDrain = (now - _lastQoSRateCheck) / ZT_QOS_DRAINAGE_DIVISOR;
_lastQoSRateCheck = now;
if (_qosCutoffCount > numToDrain) {
_qosCutoffCount-=numToDrain;
} else {
_qosCutoffCount = 0;
}
return (_qosCutoffCount < ZT_QOS_CUTOFF_LIMIT);
}
/**
* Rate limit gate for VERB_PATH_NEGOTIATION_REQUEST
*
* @param now Current time
* @return Whether the incoming packet should be rate-gated
*/
inline bool rateGatePathNegotiation(const int64_t now)
{
if ((now - _lastPathNegotiationReceived) <= ZT_PATH_NEGOTIATION_CUTOFF_TIME)
++_pathNegotiationCutoffCount;
else _pathNegotiationCutoffCount = 0;
_lastPathNegotiationReceived = now;
return (_pathNegotiationCutoffCount < ZT_PATH_NEGOTIATION_CUTOFF_LIMIT);
}
/**
* @param interval Maximum amount of time user expects a failover to take on this bond.
*/
inline void setFailoverInterval(uint32_t interval) { _failoverInterval = interval; }
/**
* @param strategy Strategy that the bond uses to re-assign protocol flows.
*/
inline void setFlowRebalanceStrategy(uint32_t strategy) { _flowRebalanceStrategy = strategy; }
/**
* @param strategy Strategy that the bond uses to prob for path aliveness and quality
*/
inline void setLinkMonitorStrategy(uint8_t strategy) { _linkMonitorStrategy = strategy; }
/**
* @return the current up delay parameter
*/
inline uint16_t getUpDelay() { return _upDelay; }
/**
* @param upDelay Length of time before a newly-discovered path is admitted to the bond
*/
inline void setUpDelay(int upDelay) { if (upDelay >= 0) { _upDelay = upDelay; } }
/**
* @return Length of time before a newly-failed path is removed from the bond
*/
inline uint16_t getDownDelay() { return _downDelay; }
/**
* @param downDelay Length of time before a newly-failed path is removed from the bond
*/
inline void setDownDelay(int downDelay) { if (downDelay >= 0) { _downDelay = downDelay; } }
/**
* @return the current monitoring interval for the bond (can be overridden with intervals specific to certain links.)
*/
inline uint16_t getBondMonitorInterval() { return _bondMonitorInterval; }
/**
* Set the current monitoring interval for the bond (can be overridden with intervals specific to certain links.)
*
* @param monitorInterval How often gratuitous VERB_HELLO(s) are sent to remote peer.
*/
inline void setBondMonitorInterval(uint16_t interval) { _bondMonitorInterval = interval; }
/**
* @param policy Bonding policy for this bond
*/
inline void setPolicy(uint8_t policy) { _bondingPolicy = policy; }
/**
* @return the current bonding policy
*/
inline uint8_t getPolicy() { return _bondingPolicy; }
/**
* @return the health status of the bond
*/
inline bool isHealthy() { return _isHealthy; }
/**
* @return the number of links comprising this bond which are considered alive
*/
inline uint8_t getNumAliveLinks() { return _numAliveLinks; };
/**
* @return the number of links comprising this bond
*/
inline uint8_t getNumTotalLinks() { return _numTotalLinks; }
/**
*
* @param allowFlowHashing
*/
inline void setFlowHashing(bool allowFlowHashing) { _allowFlowHashing = allowFlowHashing; }
/**
* @return Whether flow-hashing is currently enabled for this bond.
*/
bool flowHashingEnabled() { return _allowFlowHashing; }
/**
*
* @param packetsPerLink
*/
inline void setPacketsPerLink(int packetsPerLink) { _packetsPerLink = packetsPerLink; }
/**
*
* @param linkSelectMethod
*/
inline void setLinkSelectMethod(uint8_t method) { _abLinkSelectMethod = method; }
/**
*
* @return
*/
inline uint8_t getLinkSelectMethod() { return _abLinkSelectMethod; }
/**
*
* @param allowPathNegotiation
*/
inline void setAllowPathNegotiation(bool allowPathNegotiation) { _allowPathNegotiation = allowPathNegotiation; }
/**
*
* @return
*/
inline bool allowPathNegotiation() { return _allowPathNegotiation; }
private:
const RuntimeEnvironment *RR;
AtomicCounter __refCount;
/**
* Custom name given by the user to this bond type.
*/
std::string _policyAlias;
/**
* Paths that this bond has been made aware of but that are not necessarily
* part of the bond proper.
*/
SharedPtr<Path> _paths[ZT_MAX_PEER_NETWORK_PATHS];
/**
* Set of indices corresponding to paths currently included in the bond proper. This
* may only be updated during a call to curateBond(). The reason for this is so that
* we can simplify the high frequency packet egress logic.
*/
int _bondedIdx[ZT_MAX_PEER_NETWORK_PATHS];
/**
* Number of paths currently included in the _bondedIdx set.
*/
int _numBondedPaths;
/**
* Flows hashed according to port and protocol
*/
std::map<int32_t,SharedPtr<Flow> > _flows;
float _qualityWeights[ZT_QOS_WEIGHT_SIZE]; // How much each factor contributes to the "quality" score of a path.
uint8_t _bondingPolicy;
uint32_t _upDelay;
uint32_t _downDelay;
// active-backup
SharedPtr<Path> _abPath; // current active path
std::list<SharedPtr<Path> > _abFailoverQueue;
uint8_t _abLinkSelectMethod; // link re-selection policy for the primary link in active-backup
uint64_t _lastActiveBackupPathChange;
// balance-rr
uint8_t _rrIdx; // index to path currently in use during Round Robin operation
uint16_t _rrPacketsSentOnCurrLink; // number of packets sent on this link since the most recent path switch.
/**
* How many packets will be sent on a path before moving to the next path
* in the round-robin sequence. A value of zero will cause a random path
* selection for each outgoing packet.
*/
int _packetsPerLink;
// balance-aware
uint64_t _totalBondUnderload;
uint8_t _flowRebalanceStrategy;
// dynamic link monitoring
uint8_t _linkMonitorStrategy;
uint64_t _lastFrame;
uint32_t _dynamicPathMonitorInterval;
// path negotiation
int16_t _localUtility;
SharedPtr<Path> negotiatedPath;
uint8_t _numSentPathNegotiationRequests;
unsigned int _pathNegotiationCutoffCount;
bool _allowPathNegotiation;
uint64_t _lastPathNegotiationReceived;
uint64_t _lastSentPathNegotiationRequest;
// timers
uint32_t _failoverInterval;
uint32_t _qosSendInterval;
uint32_t _ackSendInterval;
uint16_t _ackCutoffCount;
uint64_t _lastAckRateCheck;
uint16_t _qosCutoffCount;
uint64_t _lastQoSRateCheck;
uint32_t throughputMeasurementInterval;
uint32_t _qualityEstimationInterval;
// timestamps
uint64_t _lastCheckUserPreferences;
uint64_t _lastQualityEstimation;
uint64_t _lastFlowStatReset;
uint64_t _lastFlowExpirationCheck;
uint64_t _lastFlowRebalance;
uint64_t _lastPathNegotiationCheck;
uint64_t _lastBackgroundTaskCheck;
float _maxAcceptablePacketLossRatio;
float _maxAcceptablePacketErrorRatio;
uint16_t _maxAcceptableLatency;
uint16_t _maxAcceptableMeanLatency;
uint16_t _maxAcceptablePacketDelayVariance;
uint8_t _minAcceptableAllocation;
bool _isHealthy;
uint8_t _numAliveLinks;
uint8_t _numTotalLinks;
/**
* Default initial punishment inflicted on misbehaving paths. Punishment slowly
* drains linearly. For each eligibility change the remaining punishment is doubled.
*/
uint32_t _defaultPathRefractoryPeriod;
/**
* Whether the current bonding policy requires computation of path statistics
*/
bool _shouldCollectPathStatistics;
/**
* Free byte of entropy that is updated on every packet egress event.
*/
unsigned char _freeRandomByte;
/**
* Remote peer that this bond services
*/
SharedPtr<Peer> _peer;
Mutex _paths_m;
Mutex _flows_m;
/**
* Whether the user has specified links for this bond.
*/
bool _userHasSpecifiedLinks;
/**
* Whether the user has specified a primary link for this bond.
*/
bool _userHasSpecifiedPrimaryLink;
/**
* Whether the user has specified failover instructions for this bond.
*/
bool _userHasSpecifiedFailoverInstructions;
/**
* Whether the user has specified links speeds for this bond.
*/
bool _userHasSpecifiedLinkSpeeds;
/**
* How frequently (in ms) a VERB_ECHO is sent to a peer to verify that a
* path is still active. A value of zero (0) will disable active path
* monitoring; as result, all monitoring will be a function of traffic.
*/
uint16_t _bondMonitorInterval;
/**
* Whether or not flow hashing is allowed.
*/
bool _allowFlowHashing;
};
} // namespace ZeroTier
#endif

203
node/BondController.cpp Normal file
View file

@ -0,0 +1,203 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#include "Constants.hpp"
#include "BondController.hpp"
#include "Peer.hpp"
namespace ZeroTier {
int BondController::_minReqPathMonitorInterval;
uint8_t BondController::_defaultBondingPolicy;
BondController::BondController(const RuntimeEnvironment *renv) :
RR(renv)
{
bondStartTime = RR->node->now();
_defaultBondingPolicy = ZT_BONDING_POLICY_NONE;
}
bool BondController::linkAllowed(std::string &policyAlias, SharedPtr<Link> link)
{
bool foundInDefinitions = false;
if (_linkDefinitions.count(policyAlias)) {
auto it = _linkDefinitions[policyAlias].begin();
while (it != _linkDefinitions[policyAlias].end()) {
if (link->ifname() == (*it)->ifname()) {
foundInDefinitions = true;
break;
}
++it;
}
}
return _linkDefinitions[policyAlias].empty() || foundInDefinitions;
}
void BondController::addCustomLink(std::string& policyAlias, SharedPtr<Link> link)
{
Mutex::Lock _l(_links_m);
_linkDefinitions[policyAlias].push_back(link);
auto search = _interfaceToLinkMap[policyAlias].find(link->ifname());
if (search == _interfaceToLinkMap[policyAlias].end()) {
link->setAsUserSpecified(true);
_interfaceToLinkMap[policyAlias].insert(std::pair<std::string, SharedPtr<Link>>(link->ifname(), link));
} else {
//fprintf(stderr, "link already exists=%s\n", link->ifname().c_str());
// Link is already defined, overlay user settings
}
}
bool BondController::addCustomPolicy(const SharedPtr<Bond>& newBond)
{
Mutex::Lock _l(_bonds_m);
if (!_bondPolicyTemplates.count(newBond->policyAlias())) {
_bondPolicyTemplates[newBond->policyAlias()] = newBond;
return true;
}
return false;
}
bool BondController::assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias)
{
Mutex::Lock _l(_bonds_m);
if (!_policyTemplateAssignments.count(identity)) {
_policyTemplateAssignments[identity] = policyAlias;
return true;
}
return false;
}
SharedPtr<Bond> BondController::createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr<Peer>& peer)
{
Mutex::Lock _l(_bonds_m);
int64_t identity = peer->identity().address().toInt();
Bond *bond = nullptr;
if (!_bonds.count(identity)) {
std::string policyAlias;
//fprintf(stderr, "new bond, registering for %llx\n", identity);
if (!_policyTemplateAssignments.count(identity)) {
if (_defaultBondingPolicy) {
//fprintf(stderr, " no assignment, using default (%d)\n", _defaultBondingPolicy);
bond = new Bond(renv, _defaultBondingPolicy, peer);
}
if (!_defaultBondingPolicy && _defaultBondingPolicyStr.length()) {
//fprintf(stderr, " no assignment, using default custom (%s)\n", _defaultBondingPolicyStr.c_str());
bond = new Bond(renv, _bondPolicyTemplates[_defaultBondingPolicyStr].ptr(), peer);
}
}
else {
//fprintf(stderr, " assignment found for %llx, using it as a template (%s)\n", identity,_policyTemplateAssignments[identity].c_str());
if (!_bondPolicyTemplates[_policyTemplateAssignments[identity]]) {
//fprintf(stderr, "unable to locate template (%s), ignoring assignment for (%llx), using defaults\n", _policyTemplateAssignments[identity].c_str(), identity);
bond = new Bond(renv, _defaultBondingPolicy, peer);
}
else {
bond = new Bond(renv, _bondPolicyTemplates[_policyTemplateAssignments[identity]].ptr(), peer);
}
}
}
else {
//fprintf(stderr, "bond already exists for %llx.\n", identity);
}
if (bond) {
_bonds[identity] = bond;
/**
* Determine if user has specified anything that could affect the bonding policy's decisions
*/
if (_interfaceToLinkMap.count(bond->policyAlias())) {
std::map<std::string, SharedPtr<Link> >::iterator it = _interfaceToLinkMap[bond->policyAlias()].begin();
while (it != _interfaceToLinkMap[bond->policyAlias()].end()) {
if (it->second->isUserSpecified()) {
bond->_userHasSpecifiedLinks = true;
}
if (it->second->isUserSpecified() && it->second->primary()) {
bond->_userHasSpecifiedPrimaryLink = true;
}
if (it->second->isUserSpecified() && it->second->userHasSpecifiedFailoverInstructions()) {
bond->_userHasSpecifiedFailoverInstructions = true;
}
if (it->second->isUserSpecified() && (it->second->speed() > 0)) {
bond->_userHasSpecifiedLinkSpeeds = true;
}
++it;
}
}
return bond;
}
return SharedPtr<Bond>();
}
SharedPtr<Link> BondController::getLinkBySocket(const std::string& policyAlias, uint64_t localSocket)
{
Mutex::Lock _l(_links_m);
char ifname[16];
_phy->getIfName((PhySocket *) ((uintptr_t)localSocket), ifname, 16);
std::string ifnameStr(ifname);
auto search = _interfaceToLinkMap[policyAlias].find(ifnameStr);
if (search == _interfaceToLinkMap[policyAlias].end()) {
SharedPtr<Link> s = new Link(ifnameStr, 0, 0, 0, 0, 0, true, ZT_MULTIPATH_SLAVE_MODE_SPARE, "", 0.0);
_interfaceToLinkMap[policyAlias].insert(std::pair<std::string,SharedPtr<Link> >(ifnameStr, s));
return s;
}
else {
return search->second;
}
}
SharedPtr<Link> BondController::getLinkByName(const std::string& policyAlias, const std::string& ifname)
{
Mutex::Lock _l(_links_m);
auto search = _interfaceToLinkMap[policyAlias].find(ifname);
if (search != _interfaceToLinkMap[policyAlias].end()) {
return search->second;
}
return SharedPtr<Link>();
}
bool BondController::allowedToBind(const std::string& ifname)
{
return true;
/*
if (!_defaultBondingPolicy) {
return true; // no restrictions
}
Mutex::Lock _l(_links_m);
if (_interfaceToLinkMap.empty()) {
return true; // no restrictions
}
std::map<std::string, std::map<std::string, SharedPtr<Link> > >::iterator policyItr = _interfaceToLinkMap.begin();
while (policyItr != _interfaceToLinkMap.end()) {
std::map<std::string, SharedPtr<Link> >::iterator linkItr = policyItr->second.begin();
while (linkItr != policyItr->second.end()) {
if (linkItr->first == ifname) {
return true;
}
++linkItr;
}
++policyItr;
}
return false;
*/
}
void BondController::processBackgroundTasks(void *tPtr, const int64_t now)
{
Mutex::Lock _l(_bonds_m);
std::map<int64_t,SharedPtr<Bond> >::iterator bondItr = _bonds.begin();
while (bondItr != _bonds.end()) {
bondItr->second->processBackgroundTasks(tPtr, now);
++bondItr;
}
}
} // namespace ZeroTier

231
node/BondController.hpp Normal file
View file

@ -0,0 +1,231 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_BONDCONTROLLER_HPP
#define ZT_BONDCONTROLLER_HPP
#include <map>
#include <vector>
#include "SharedPtr.hpp"
#include "../osdep/Phy.hpp"
#include "../osdep/Link.hpp"
namespace ZeroTier {
class RuntimeEnvironment;
class Bond;
class Peer;
class BondController
{
friend class Bond;
public:
BondController(const RuntimeEnvironment *renv);
/**
* @return Whether this link is permitted to become a member of a bond.
*/
bool linkAllowed(std::string &policyAlias, SharedPtr<Link> link);
/**
* @return The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements.
*/
int minReqPathMonitorInterval() { return _minReqPathMonitorInterval; }
/**
* @param minReqPathMonitorInterval The minimum interval required to poll the active bonds to fulfill all active monitoring timing requirements.
*/
static void setMinReqPathMonitorInterval(int minReqPathMonitorInterval) { _minReqPathMonitorInterval = minReqPathMonitorInterval; }
/**
* @return Whether the bonding layer is currently set up to be used.
*/
bool inUse() { return !_bondPolicyTemplates.empty() || _defaultBondingPolicy; }
/**
* @param basePolicyName Bonding policy name (See ZeroTierOne.h)
* @return The bonding policy code for a given human-readable bonding policy name
*/
static int getPolicyCodeByStr(const std::string& basePolicyName)
{
if (basePolicyName == "active-backup") { return 1; }
if (basePolicyName == "broadcast") { return 2; }
if (basePolicyName == "balance-rr") { return 3; }
if (basePolicyName == "balance-xor") { return 4; }
if (basePolicyName == "balance-aware") { return 5; }
return 0; // "none"
}
/**
* @param policy Bonding policy code (See ZeroTierOne.h)
* @return The human-readable name for the given bonding policy code
*/
static std::string getPolicyStrByCode(int policy)
{
if (policy == 1) { return "active-backup"; }
if (policy == 2) { return "broadcast"; }
if (policy == 3) { return "balance-rr"; }
if (policy == 4) { return "balance-xor"; }
if (policy == 5) { return "balance-aware"; }
return "none";
}
/**
* Sets the default bonding policy for new or undefined bonds.
*
* @param bp Bonding policy
*/
void setBondingLayerDefaultPolicy(uint8_t bp) { _defaultBondingPolicy = bp; }
/**
* Sets the default (custom) bonding policy for new or undefined bonds.
*
* @param alias Human-readable string alias for bonding policy
*/
void setBondingLayerDefaultPolicyStr(std::string alias) { _defaultBondingPolicyStr = alias; }
/**
* @return The default bonding policy
*/
static int defaultBondingPolicy() { return _defaultBondingPolicy; }
/**
* Add a user-defined link to a given bonding policy.
*
* @param policyAlias User-defined custom name for variant of bonding policy
* @param link Pointer to new link definition
*/
void addCustomLink(std::string& policyAlias, SharedPtr<Link> link);
/**
* Add a user-defined bonding policy that is based on one of the standard types.
*
* @param newBond Pointer to custom Bond object
* @return Whether a uniquely-named custom policy was successfully added
*/
bool addCustomPolicy(const SharedPtr<Bond>& newBond);
/**
* Assigns a specific bonding policy
*
* @param identity
* @param policyAlias
* @return
*/
bool assignBondingPolicyToPeer(int64_t identity, const std::string& policyAlias);
/**
* Add a new bond to the bond controller.
*
* @param renv Runtime environment
* @param peer Remote peer that this bond services
* @return A pointer to the newly created Bond
*/
SharedPtr<Bond> createTransportTriggeredBond(const RuntimeEnvironment *renv, const SharedPtr<Peer>& peer);
/**
* Periodically perform maintenance tasks for the bonding layer.
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param now Current time
*/
void processBackgroundTasks(void *tPtr, int64_t now);
/**
* Gets a reference to a physical link definition given a policy alias and a local socket.
*
* @param policyAlias Policy in use
* @param localSocket Local source socket
* @return Physical link definition
*/
SharedPtr<Link> getLinkBySocket(const std::string& policyAlias, uint64_t localSocket);
/**
* Gets a reference to a physical link definition given its human-readable system name.
*
* @param policyAlias Policy in use
* @param ifname Alphanumeric human-readable name
* @return Physical link definition
*/
SharedPtr<Link> getLinkByName(const std::string& policyAlias, const std::string& ifname);
/**
* @param ifname Name of interface that we want to know if we can bind to
*/
bool allowedToBind(const std::string& ifname);
uint64_t getBondStartTime() { return bondStartTime; }
private:
Phy<BondController *> *_phy;
const RuntimeEnvironment *RR;
Mutex _bonds_m;
Mutex _links_m;
/**
* The last time that the bond controller updated the set of bonds.
*/
uint64_t _lastBackgroundBondControlTaskCheck;
/**
* The minimum monitoring interval among all paths in this bond.
*/
static int _minReqPathMonitorInterval;
/**
* The default bonding policy used for new bonds unless otherwise specified.
*/
static uint8_t _defaultBondingPolicy;
/**
* The default bonding policy used for new bonds unless otherwise specified.
*/
std::string _defaultBondingPolicyStr;
/**
* All currently active bonds.
*/
std::map<int64_t,SharedPtr<Bond> > _bonds;
/**
* Map of peers to custom bonding policies
*/
std::map<int64_t,std::string> _policyTemplateAssignments;
/**
* User-defined bonding policies (can be assigned to a peer)
*/
std::map<std::string,SharedPtr<Bond> > _bondPolicyTemplates;
/**
* Set of links defined for a given bonding policy
*/
std::map<std::string,std::vector<SharedPtr<Link> > > _linkDefinitions;
/**
* Set of link objects mapped to their physical interfaces
*/
std::map<std::string, std::map<std::string, SharedPtr<Link> > > _interfaceToLinkMap;
// TODO: Remove
uint64_t bondStartTime;
};
} // namespace ZeroTier
#endif

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -192,7 +192,7 @@
/**
* Minimum delay between timer task checks to prevent thrashing
*/
#define ZT_CORE_TIMER_TASK_GRANULARITY 500
#define ZT_CORE_TIMER_TASK_GRANULARITY 60
/**
* How often Topology::clean() and Network::clean() and similar are called, in ms
@ -253,184 +253,6 @@
*/
#define ZT_LOCAL_CONF_FILE_CHECK_INTERVAL 10000
/**
* How frequently to check for changes to the system's network interfaces. When
* the service decides to use this constant it's because we want to react more
* quickly to new interfaces that pop up or go down.
*/
#define ZT_MULTIPATH_BINDER_REFRESH_PERIOD 5000
/**
* Packets are only used for QoS/ACK statistical sampling if their packet ID is divisible by
* this integer. This is to provide a mechanism for both peers to agree on which packets need
* special treatment without having to exchange information. Changing this value would be
* a breaking change and would necessitate a protocol version upgrade. Since each incoming and
* outgoing packet ID is checked against this value its evaluation is of the form:
* (id & (divisor - 1)) == 0, thus the divisor must be a power of 2.
*
* This value is set at (16) so that given a normally-distributed RNG output we will sample
* 1/16th (or ~6.25%) of packets.
*/
#define ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR 0x10
/**
* Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff
*/
#define ZT_PATH_QOS_ACK_CUTOFF_TIME 30000
/**
* Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be
* processed within cutoff time. Separate totals are kept for each type but
* the limit is the same for both.
*
* This limits how often this peer will compute statistical estimates
* of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to
* CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent
* this from being useful for DOS amplification attacks.
*/
#define ZT_PATH_QOS_ACK_CUTOFF_LIMIT 128
/**
* Path choice history window size. This is used to keep track of which paths were
* previously selected so that we can maintain a target allocation over time.
*/
#define ZT_MULTIPATH_PROPORTION_WIN_SZ 128
/**
* How often we will sample packet latency. Should be at least greater than ZT_PING_CHECK_INVERVAL
* since we will record a 0 bit/s measurement if no valid latency measurement was made within this
* window of time.
*/
#define ZT_PATH_LATENCY_SAMPLE_INTERVAL (ZT_MULTIPATH_PEER_PING_PERIOD * 2)
/**
* Interval used for rate-limiting the computation of path quality estimates.
*/
#define ZT_PATH_QUALITY_COMPUTE_INTERVAL 1000
/**
* Number of samples to consider when computing real-time path statistics
*/
#define ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ 128
/**
* Number of samples to consider when computing performing long-term path quality analysis.
* By default this value is set to ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ but can
* be set to any value greater than that to observe longer-term path quality behavior.
*/
#define ZT_PATH_QUALITY_METRIC_WIN_SZ ZT_PATH_QUALITY_METRIC_REALTIME_CONSIDERATION_WIN_SZ
/**
* Maximum acceptable Packet Delay Variance (PDV) over a path
*/
#define ZT_PATH_MAX_PDV 1000
/**
* Maximum acceptable time interval between expectation and receipt of at least one ACK over a path
*/
#define ZT_PATH_MAX_AGE 30000
/**
* Maximum acceptable mean latency over a path
*/
#define ZT_PATH_MAX_MEAN_LATENCY 1000
/**
* How much each factor contributes to the "stability" score of a path
*/
#define ZT_PATH_CONTRIB_PDV (1.0 / 3.0)
#define ZT_PATH_CONTRIB_LATENCY (1.0 / 3.0)
#define ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE (1.0 / 3.0)
/**
* How much each factor contributes to the "quality" score of a path
*/
#define ZT_PATH_CONTRIB_STABILITY (0.75 / 3.0)
#define ZT_PATH_CONTRIB_THROUGHPUT (1.50 / 3.0)
#define ZT_PATH_CONTRIB_SCOPE (0.75 / 3.0)
/**
* How often a QoS packet is sent
*/
#define ZT_PATH_QOS_INTERVAL 3000
/**
* Min and max acceptable sizes for a VERB_QOS_MEASUREMENT packet
*/
#define ZT_PATH_MIN_QOS_PACKET_SZ 8 + 1
#define ZT_PATH_MAX_QOS_PACKET_SZ 1400
/**
* How many ID:sojourn time pairs in a single QoS packet
*/
#define ZT_PATH_QOS_TABLE_SIZE ((ZT_PATH_MAX_QOS_PACKET_SZ * 8) / (64 + 16))
/**
* Maximum number of outgoing packets we monitor for QoS information
*/
#define ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS 128
/**
* Timeout for QoS records
*/
#define ZT_PATH_QOS_TIMEOUT (ZT_PATH_QOS_INTERVAL * 2)
/**
* How often the service tests the path throughput
*/
#define ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL (ZT_PATH_ACK_INTERVAL * 8)
/**
* Minimum amount of time between each ACK packet
*/
#define ZT_PATH_ACK_INTERVAL 1000
/**
* How often an aggregate link statistics report is emitted into this tracing system
*/
#define ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL 60000
/**
* How much an aggregate link's component paths can vary from their target allocation
* before the link is considered to be in a state of imbalance.
*/
#define ZT_PATH_IMBALANCE_THRESHOLD 0.20
/**
* Max allowable time spent in any queue
*/
#define ZT_QOS_TARGET 5 // ms
/**
* Time period where the time spent in the queue by a packet should fall below
* target at least once
*/
#define ZT_QOS_INTERVAL 100 // ms
/**
* The number of bytes that each queue is allowed to send during each DRR cycle.
* This approximates a single-byte-based fairness queuing scheme
*/
#define ZT_QOS_QUANTUM ZT_DEFAULT_MTU
/**
* The maximum total number of packets that can be queued among all
* active/inactive, old/new queues
*/
#define ZT_QOS_MAX_ENQUEUED_PACKETS 1024
/**
* Number of QoS queues (buckets)
*/
#define ZT_QOS_NUM_BUCKETS 9
/**
* All unspecified traffic is put in this bucket. Anything in a bucket with a smaller
* value is de-prioritized. Anything in a bucket with a higher value is prioritized over
* other traffic.
*/
#define ZT_QOS_DEFAULT_BUCKET 0
/**
* How frequently to send heartbeats over in-use paths
*/
@ -446,16 +268,6 @@
*/
#define ZT_PEER_PING_PERIOD 60000
/**
* Delay between full-fledge pings of directly connected peers.
*
* With multipath bonding enabled ping peers more often to measure
* packet loss and latency. This uses more bandwidth so is disabled
* by default to avoid increasing idle bandwidth use for regular
* links.
*/
#define ZT_MULTIPATH_PEER_PING_PERIOD 5000
/**
* Paths are considered expired if they have not sent us a real packet in this long
*/
@ -466,6 +278,205 @@
*/
#define ZT_PEER_EXPIRED_PATH_TRIAL_PERIOD (ZT_PEER_PING_PERIOD * 10)
/**
* Outgoing packets are only used for QoS/ACK statistical sampling if their
* packet ID is divisible by this integer. This is to provide a mechanism for
* both peers to agree on which packets need special treatment without having
* to exchange information. Changing this value would be a breaking change and
* would necessitate a protocol version upgrade. Since each incoming and
* outgoing packet ID is checked against this value its evaluation is of the
* form:
*
* (id & (divisor - 1)) == 0, thus the divisor must be a power of 2.
*
* This value is set at (16) so that given a normally-distributed RNG output
* we will sample 1/16th (or ~6.25%) of packets.
*/
#define ZT_QOS_ACK_DIVISOR 0x2
/**
* Time horizon for VERB_QOS_MEASUREMENT and VERB_ACK packet processing cutoff
*/
#define ZT_QOS_ACK_CUTOFF_TIME 30000
/**
* Maximum number of VERB_QOS_MEASUREMENT and VERB_ACK packets allowed to be
* processed within cutoff time. Separate totals are kept for each type but
* the limit is the same for both.
*
* This limits how often this peer will compute statistical estimates
* of various QoS measures from a VERB_QOS_MEASUREMENT or VERB_ACK packets to
* CUTOFF_LIMIT times per CUTOFF_TIME milliseconds per peer to prevent
* this from being useful for DOS amplification attacks.
*/
#define ZT_QOS_ACK_CUTOFF_LIMIT 128
/**
* Minimum acceptable size for a VERB_QOS_MEASUREMENT packet
*/
#define ZT_QOS_MIN_PACKET_SIZE (8 + 1)
/**
* Maximum acceptable size for a VERB_QOS_MEASUREMENT packet
*/
#define ZT_QOS_MAX_PACKET_SIZE 1400
/**
* How many ID:sojourn time pairs are in a single QoS packet
*/
#define ZT_QOS_TABLE_SIZE ((ZT_QOS_MAX_PACKET_SIZE * 8) / (64 + 16))
/**
* Maximum number of outgoing packets we monitor for QoS information
*/
#define ZT_QOS_MAX_OUTSTANDING_RECORDS (1024*16)
/**
* Interval used for rate-limiting the computation of path quality estimates.
*/
#define ZT_QOS_COMPUTE_INTERVAL 1000
/**
* Number of samples to consider when processing real-time path statistics
*/
#define ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE 32
/**
* Max allowable time spent in any queue (in ms)
*/
#define ZT_AQM_TARGET 5
/**
* Time period where the time spent in the queue by a packet should fall below.
* target at least once. (in ms)
*/
#define ZT_AQM_INTERVAL 100
/**
* The number of bytes that each queue is allowed to send during each DRR cycle.
* This approximates a single-byte-based fairness queuing scheme.
*/
#define ZT_AQM_QUANTUM ZT_DEFAULT_MTU
/**
* The maximum total number of packets that can be queued among all
* active/inactive, old/new queues.
*/
#define ZT_AQM_MAX_ENQUEUED_PACKETS 1024
/**
* Number of QoS queues (buckets)
*/
#define ZT_AQM_NUM_BUCKETS 9
/**
* All unspecified traffic is put in this bucket. Anything in a bucket with a
* smaller value is deprioritized. Anything in a bucket with a higher value is
prioritized over other traffic.
*/
#define ZT_AQM_DEFAULT_BUCKET 0
/**
* How long before we consider a path to be dead in the general sense. This is
* used while searching for default or alternative paths to try in the absence
* of direct guidance from the user or a selection policy.
*/
#define ZT_MULTIPATH_DEFAULT_FAILOVER_INTERVAL 10000
/**
* How often flows are evaluated
*/
#define ZT_MULTIPATH_FLOW_CHECK_INTERVAL 10000
/**
* How long before we consider a flow to be dead and remove it from the
* policy's list.
*/
#define ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL 30000
/**
* How often a flow's statistical counters are reset
*/
#define ZT_FLOW_STATS_RESET_INTERVAL ZT_MULTIPATH_FLOW_EXPIRATION_INTERVAL
/**
* Maximum number of flows allowed before we start forcibly forgetting old ones
*/
#define ZT_FLOW_MAX_COUNT (1024*64)
/**
* How often flows are rebalanced across link (if at all)
*/
#define ZT_FLOW_MIN_REBALANCE_INTERVAL 5000
/**
* How often flows are rebalanced across link (if at all)
*/
#define ZT_FLOW_REBALANCE_INTERVAL 5000
/**
* A defensive timer to prevent path quality metrics from being
* processed too often.
*/
#define ZT_BOND_BACKGROUND_TASK_MIN_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY
/**
* How often a bonding policy's background tasks are processed,
* some need more frequent attention than others.
*/
#define ZT_MULTIPATH_ACTIVE_BACKUP_CHECK_INTERVAL ZT_CORE_TIMER_TASK_GRANULARITY
/**
* Minimum amount of time (since a previous transition) before the active-backup bonding
* policy is allowed to transition to a different link. Only valid for active-backup.
*/
#define ZT_MULTIPATH_MIN_ACTIVE_BACKUP_AUTOFLOP_INTERVAL 10000
/**
* How often a peer checks that incoming (and outgoing) traffic on a bonded link is
* appropriately paired.
*/
#define ZT_PATH_NEGOTIATION_CHECK_INTERVAL 15000
/**
* Time horizon for path negotiation paths cutoff
*/
#define ZT_PATH_NEGOTIATION_CUTOFF_TIME 60000
/**
* Maximum number of path negotiations within cutoff time
*
* This limits response to PATH_NEGOTIATION to CUTOFF_LIMIT responses
* per CUTOFF_TIME milliseconds per peer to prevent this from being
* useful for DOS amplification attacks.
*/
#define ZT_PATH_NEGOTIATION_CUTOFF_LIMIT 8
/**
* How many times a peer will attempt to petition another peer to synchronize its
* traffic to the same path before giving up and surrendering to the other peer's preference.
*/
#define ZT_PATH_NEGOTIATION_TRY_COUNT 3
/**
* How much greater the quality of a path should be before an
* optimization procedure triggers a switch.
*/
#define ZT_MULTIPATH_ACTIVE_BACKUP_OPTIMIZE_MIN_THRESHOLD 0.10
/**
* Artificially inflates the failover score for paths which meet
* certain non-performance-related policy ranking criteria.
*/
#define ZT_MULTIPATH_FAILOVER_HANDICAP_PREFERRED 500
#define ZT_MULTIPATH_FAILOVER_HANDICAP_PRIMARY 1000
#define ZT_MULTIPATH_FAILOVER_HANDICAP_NEGOTIATED 5000
/**
* An indicator that no flow is to be associated with the given packet
*/
#define ZT_QOS_NO_FLOW -1
/**
* Timeout for overall peer activity (measured from last receive)
*/
@ -538,6 +549,27 @@
*/
#define ZT_PUSH_DIRECT_PATHS_CUTOFF_TIME 30000
/**
* Drainage constants for VERB_ECHO rate-limiters
*/
#define ZT_ECHO_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS)
#define ZT_ECHO_DRAINAGE_DIVISOR (1000 / ZT_ECHO_CUTOFF_LIMIT)
/**
* Drainage constants for VERB_QOS rate-limiters
*/
#define ZT_QOS_CUTOFF_LIMIT ((1000 / ZT_CORE_TIMER_TASK_GRANULARITY) * ZT_MAX_PEER_NETWORK_PATHS)
#define ZT_QOS_DRAINAGE_DIVISOR (1000 / ZT_QOS_CUTOFF_LIMIT)
/**
* Drainage constants for VERB_ACK rate-limiters
*/
#define ZT_ACK_CUTOFF_LIMIT 128
#define ZT_ACK_DRAINAGE_DIVISOR (1000 / ZT_ACK_CUTOFF_LIMIT)
#define ZT_MULTIPATH_DEFAULT_REFRCTORY_PERIOD 8000
#define ZT_MULTIPATH_MAX_REFRACTORY_PERIOD 600000
/**
* Maximum number of direct path pushes within cutoff time
*

124
node/Flow.hpp Normal file
View file

@ -0,0 +1,124 @@
/*
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
*/
/****/
#ifndef ZT_FLOW_HPP
#define ZT_FLOW_HPP
#include "Path.hpp"
#include "SharedPtr.hpp"
namespace ZeroTier {
/**
* A protocol flow that is identified by the origin and destination port.
*/
struct Flow
{
/**
* @param flowId Given flow ID
* @param now Current time
*/
Flow(int32_t flowId, int64_t now) :
_flowId(flowId),
_bytesInPerUnitTime(0),
_bytesOutPerUnitTime(0),
_lastActivity(now),
_lastPathReassignment(0),
_assignedPath(SharedPtr<Path>())
{}
/**
* Reset flow statistics
*/
void resetByteCounts()
{
_bytesInPerUnitTime = 0;
_bytesOutPerUnitTime = 0;
}
/**
* @return The Flow's ID
*/
int32_t id() { return _flowId; }
/**
* @return Number of incoming bytes processed on this flow per unit time
*/
int64_t bytesInPerUnitTime() { return _bytesInPerUnitTime; }
/**
* Record number of incoming bytes on this flow
*
* @param bytes Number of incoming bytes
*/
void recordIncomingBytes(uint64_t bytes) { _bytesInPerUnitTime += bytes; }
/**
* @return Number of outgoing bytes processed on this flow per unit time
*/
int64_t bytesOutPerUnitTime() { return _bytesOutPerUnitTime; }
/**
* Record number of outgoing bytes on this flow
*
* @param bytes
*/
void recordOutgoingBytes(uint64_t bytes) { _bytesOutPerUnitTime += bytes; }
/**
* @return The total number of bytes processed on this flow
*/
uint64_t totalBytes() { return _bytesInPerUnitTime + _bytesOutPerUnitTime; }
/**
* How long since a packet was sent or received in this flow
*
* @param now Current time
* @return The age of the flow in terms of last recorded activity
*/
int64_t age(int64_t now) { return now - _lastActivity; }
/**
* Record that traffic was processed on this flow at the given time.
*
* @param now Current time
*/
void updateActivity(int64_t now) { _lastActivity = now; }
/**
* @return Path assigned to this flow
*/
SharedPtr<Path> assignedPath() { return _assignedPath; }
/**
* @param path Assigned path over which this flow should be handled
*/
void assignPath(const SharedPtr<Path> &path, int64_t now) {
_assignedPath = path;
_lastPathReassignment = now;
}
AtomicCounter __refCount;
int32_t _flowId;
uint64_t _bytesInPerUnitTime;
uint64_t _bytesOutPerUnitTime;
int64_t _lastActivity;
int64_t _lastPathReassignment;
SharedPtr<Path> _assignedPath;
SharedPtr<Path> _previouslyAssignedPath;
};
} // namespace ZeroTier
#endif

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -35,10 +35,12 @@
#include "Tag.hpp"
#include "Revocation.hpp"
#include "Trace.hpp"
#include "Path.hpp"
#include "Bond.hpp"
namespace ZeroTier {
bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId)
{
const Address sourceAddress(source());
@ -67,7 +69,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
if (!trusted) {
if (!dearmor(peer->key())) {
RR->t->incomingPacketMessageAuthenticationFailure(tPtr,_path,packetId(),sourceAddress,hops(),"invalid MAC");
_path->recordInvalidPacket();
peer->recordIncomingInvalidPacket(_path);
return true;
}
}
@ -78,11 +80,12 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
}
const Packet::Verb v = verb();
bool r = true;
switch(v) {
//case Packet::VERB_NOP:
default: // ignore unknown verbs, but if they pass auth check they are "received"
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),v,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
break;
case Packet::VERB_HELLO: r = _doHELLO(RR,tPtr,true); break;
case Packet::VERB_ACK: r = _doACK(RR,tPtr,peer); break;
@ -91,8 +94,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
case Packet::VERB_OK: r = _doOK(RR,tPtr,peer); break;
case Packet::VERB_WHOIS: r = _doWHOIS(RR,tPtr,peer); break;
case Packet::VERB_RENDEZVOUS: r = _doRENDEZVOUS(RR,tPtr,peer); break;
case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer); break;
case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer); break;
case Packet::VERB_FRAME: r = _doFRAME(RR,tPtr,peer,flowId); break;
case Packet::VERB_EXT_FRAME: r = _doEXT_FRAME(RR,tPtr,peer,flowId); break;
case Packet::VERB_ECHO: r = _doECHO(RR,tPtr,peer); break;
case Packet::VERB_MULTICAST_LIKE: r = _doMULTICAST_LIKE(RR,tPtr,peer); break;
case Packet::VERB_NETWORK_CREDENTIALS: r = _doNETWORK_CREDENTIALS(RR,tPtr,peer); break;
@ -103,6 +106,7 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
case Packet::VERB_PUSH_DIRECT_PATHS: r = _doPUSH_DIRECT_PATHS(RR,tPtr,peer); break;
case Packet::VERB_USER_MESSAGE: r = _doUSER_MESSAGE(RR,tPtr,peer); break;
case Packet::VERB_REMOTE_TRACE: r = _doREMOTE_TRACE(RR,tPtr,peer); break;
case Packet::VERB_PATH_NEGOTIATION_REQUEST: r = _doPATH_NEGOTIATION_REQUEST(RR,tPtr,peer); break;
}
if (r) {
RR->node->statsLogVerb((unsigned int)v,(unsigned int)size());
@ -113,9 +117,6 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,void *tPtr)
RR->sw->requestWhois(tPtr,RR->node->now(),sourceAddress);
return false;
}
} catch (int ztExcCode) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()");
return true;
} catch ( ... ) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),sourceAddress,hops(),verb(),"unexpected exception in tryDecode()");
return true;
@ -193,59 +194,61 @@ bool IncomingPacket::_doERROR(const RuntimeEnvironment *RR,void *tPtr,const Shar
default: break;
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_ERROR,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW);
return true;
}
bool IncomingPacket::_doACK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
if (!peer->rateGateACK(RR->node->now()))
SharedPtr<Bond> bond = peer->bond();
if (!bond || !bond->rateGateACK(RR->node->now())) {
return true;
}
/* Dissect incoming ACK packet. From this we can estimate current throughput of the path, establish known
* maximums and detect packet loss. */
if (peer->localMultipathSupport()) {
int32_t ackedBytes;
if (payloadLength() != sizeof(ackedBytes)) {
return true; // ignore
}
memcpy(&ackedBytes, payload(), sizeof(ackedBytes));
_path->receivedAck(RR->node->now(), Utils::ntoh(ackedBytes));
peer->inferRemoteMultipathEnabled();
int32_t ackedBytes;
if (payloadLength() != sizeof(ackedBytes)) {
return true; // ignore
}
memcpy(&ackedBytes, payload(), sizeof(ackedBytes));
if (bond) {
bond->receivedAck(_path, RR->node->now(), Utils::ntoh(ackedBytes));
}
return true;
}
bool IncomingPacket::_doQOS_MEASUREMENT(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
if (!peer->rateGateQoS(RR->node->now()))
SharedPtr<Bond> bond = peer->bond();
/* TODO: Fix rate gate issue
if (!bond || !bond->rateGateQoS(RR->node->now())) {
return true;
}
*/
/* Dissect incoming QoS packet. From this we can compute latency values and their variance.
* The latency variance is used as a measure of "jitter". */
if (peer->localMultipathSupport()) {
if (payloadLength() > ZT_PATH_MAX_QOS_PACKET_SZ || payloadLength() < ZT_PATH_MIN_QOS_PACKET_SZ) {
return true; // ignore
}
const int64_t now = RR->node->now();
uint64_t rx_id[ZT_PATH_QOS_TABLE_SIZE];
uint16_t rx_ts[ZT_PATH_QOS_TABLE_SIZE];
char *begin = (char *)payload();
char *ptr = begin;
int count = 0;
int len = payloadLength();
// Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet
while (ptr < (begin + len) && (count < ZT_PATH_QOS_TABLE_SIZE)) {
memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t));
ptr+=sizeof(uint64_t);
memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t));
ptr+=sizeof(uint16_t);
count++;
}
_path->receivedQoS(now, count, rx_id, rx_ts);
peer->inferRemoteMultipathEnabled();
if (payloadLength() > ZT_QOS_MAX_PACKET_SIZE || payloadLength() < ZT_QOS_MIN_PACKET_SIZE) {
return true; // ignore
}
const int64_t now = RR->node->now();
uint64_t rx_id[ZT_QOS_TABLE_SIZE];
uint16_t rx_ts[ZT_QOS_TABLE_SIZE];
char *begin = (char *)payload();
char *ptr = begin;
int count = 0;
unsigned int len = payloadLength();
// Read packet IDs and latency compensation intervals for each packet tracked by this QoS packet
while (ptr < (begin + len) && (count < ZT_QOS_TABLE_SIZE)) {
memcpy((void*)&rx_id[count], ptr, sizeof(uint64_t));
ptr+=sizeof(uint64_t);
memcpy((void*)&rx_ts[count], ptr, sizeof(uint16_t));
ptr+=sizeof(uint16_t);
count++;
}
if (bond) {
bond->receivedQoS(_path, now, count, rx_id, rx_ts);
}
return true;
}
@ -441,11 +444,12 @@ bool IncomingPacket::_doHELLO(const RuntimeEnvironment *RR,void *tPtr,const bool
}
outp.setAt<uint16_t>(worldUpdateSizeAt,(uint16_t)(outp.size() - (worldUpdateSizeAt + 2)));
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),now);
peer->setRemoteVersion(protoVersion,vMajor,vMinor,vRevision); // important for this to go first so received() knows the version
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_HELLO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -493,7 +497,10 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP
}
if (!hops()) {
_path->updateLatency((unsigned int)latency,RR->node->now());
SharedPtr<Bond> bond = peer->bond();
if (!bond) {
_path->updateLatency((unsigned int)latency,RR->node->now());
}
}
peer->setRemoteVersion(vProto,vMajor,vMinor,vRevision);
@ -522,8 +529,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP
if (network) {
const MulticastGroup mg(MAC(field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_MAC,6),6),at<uint32_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_ADI));
const unsigned int count = at<uint16_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 4);
if (((ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6) + (count * 5)) <= size())
RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at<uint32_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS));
RR->mc->addMultiple(tPtr,RR->node->now(),networkId,mg,field(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS + 6,count * 5),count,at<uint32_t>(ZT_PROTO_VERB_MULTICAST_GATHER__OK__IDX_GATHER_RESULTS));
}
} break;
@ -556,7 +562,7 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedP
default: break;
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_OK,inRePacketId,inReVerb,false,networkId,ZT_QOS_NO_FLOW);
return true;
}
@ -591,7 +597,7 @@ bool IncomingPacket::_doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const Shar
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_WHOIS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -615,13 +621,108 @@ bool IncomingPacket::_doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_RENDEZVOUS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
// Returns true if packet appears valid; pos and proto will be set
static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto)
{
if (frameLen < 40)
return false;
pos = 40;
proto = frameData[6];
while (pos <= frameLen) {
switch(proto) {
case 0: // hop-by-hop options
case 43: // routing
case 60: // destination options
case 135: // mobility options
if ((pos + 8) > frameLen)
return false; // invalid!
proto = frameData[pos];
pos += ((unsigned int)frameData[pos + 1] * 8) + 8;
break;
//case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway
//case 50:
//case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff
default:
return true;
}
}
return false; // overflow == invalid
}
bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId)
{
int32_t _flowId = ZT_QOS_NO_FLOW;
SharedPtr<Bond> bond = peer->bond();
if (bond && bond->flowHashingEnabled()) {
if (size() > ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD) {
const unsigned int etherType = at<uint16_t>(ZT_PROTO_VERB_FRAME_IDX_ETHERTYPE);
const unsigned int frameLen = size() - ZT_PROTO_VERB_FRAME_IDX_PAYLOAD;
const uint8_t *const frameData = reinterpret_cast<const uint8_t *>(data()) + ZT_PROTO_VERB_FRAME_IDX_PAYLOAD;
if (etherType == ZT_ETHERTYPE_IPV4 && (frameLen >= 20)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
uint8_t proto = (reinterpret_cast<const uint8_t *>(frameData)[9]);
const unsigned int headerLen = 4 * (reinterpret_cast<const uint8_t *>(frameData)[0] & 0xf);
switch(proto) {
case 0x01: // ICMP
//flowId = 0x01;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (frameLen > (headerLen + 4)) {
unsigned int pos = headerLen + 0;
srcPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
_flowId = dstPort ^ srcPort ^ proto;
}
break;
}
}
if (etherType == ZT_ETHERTYPE_IPV6 && (frameLen >= 40)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
unsigned int pos;
unsigned int proto;
_ipv6GetPayload((const uint8_t *)frameData, frameLen, pos, proto);
switch(proto) {
case 0x3A: // ICMPv6
//flowId = 0x3A;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (frameLen > (pos + 4)) {
srcPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(frameData)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(frameData)[pos]);
_flowId = dstPort ^ srcPort ^ proto;
}
break;
default:
break;
}
}
}
}
const uint64_t nwid = at<uint64_t>(ZT_PROTO_VERB_FRAME_IDX_NETWORK_ID);
const SharedPtr<Network> network(RR->node->network(nwid));
bool trustEstablished = false;
@ -641,13 +742,12 @@ bool IncomingPacket::_doFRAME(const RuntimeEnvironment *RR,void *tPtr,const Shar
return false;
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_FRAME,0,Packet::VERB_NOP,trustEstablished,nwid,_flowId);
return true;
}
bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId)
{
const uint64_t nwid = at<uint64_t>(ZT_PROTO_VERB_EXT_FRAME_IDX_NETWORK_ID);
const SharedPtr<Network> network(RR->node->network(nwid));
@ -676,7 +776,7 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
const uint8_t *const frameData = (const uint8_t *)field(comLen + ZT_PROTO_VERB_EXT_FRAME_IDX_PAYLOAD,frameLen);
if ((!from)||(from == network->mac())) {
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
@ -687,19 +787,19 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
network->learnBridgeRoute(from,peer->address());
} else {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (remote)");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
} else if (to != network->mac()) {
if (to.isMulticast()) {
if (network->config().multicastLimit == 0) {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"multicast disabled");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
} else if (!network->config().permitsBridging(RR->identity.address())) {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_EXT_FRAME,from,to,"bridging not allowed (local)");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId); // trustEstablished because COM is okay
return true;
}
}
@ -715,13 +815,15 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
outp.append((uint8_t)Packet::VERB_EXT_FRAME);
outp.append((uint64_t)packetId());
outp.append((uint64_t)nwid);
const int64_t now = RR->node->now();
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,true,nwid,flowId);
} else {
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_EXT_FRAME,0,Packet::VERB_NOP,false,nwid,flowId);
}
return true;
@ -729,8 +831,10 @@ bool IncomingPacket::_doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const
bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
if (!peer->rateGateEchoRequest(RR->node->now()))
uint64_t now = RR->node->now();
if (!peer->rateGateEchoRequest(now)) {
return true;
}
const uint64_t pid = packetId();
Packet outp(peer->address(),RR->identity.address(),Packet::VERB_OK);
@ -738,10 +842,11 @@ bool IncomingPacket::_doECHO(const RuntimeEnvironment *RR,void *tPtr,const Share
outp.append((uint64_t)pid);
if (size() > ZT_PACKET_IDX_PAYLOAD)
outp.append(reinterpret_cast<const unsigned char *>(data()) + ZT_PACKET_IDX_PAYLOAD,size() - ZT_PACKET_IDX_PAYLOAD);
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),pid,payloadLength(),Packet::VERB_ECHO,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -767,7 +872,7 @@ bool IncomingPacket::_doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,c
RR->mc->add(tPtr,now,nwid,MulticastGroup(MAC(field(ptr + 8,6),6),at<uint32_t>(ptr + 14)),peer->address());
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_LIKE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -889,7 +994,7 @@ bool IncomingPacket::_doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *t
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CREDENTIALS,0,Packet::VERB_NOP,trustEstablished,(network) ? network->id() : 0,ZT_QOS_NO_FLOW);
return true;
}
@ -915,7 +1020,7 @@ bool IncomingPacket::_doNETWORK_CONFIG_REQUEST(const RuntimeEnvironment *RR,void
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid);
peer->received(tPtr,_path,hopCount,requestPacketId,payloadLength(),Packet::VERB_NETWORK_CONFIG_REQUEST,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW);
return true;
}
@ -931,12 +1036,14 @@ bool IncomingPacket::_doNETWORK_CONFIG(const RuntimeEnvironment *RR,void *tPtr,c
outp.append((uint64_t)packetId());
outp.append((uint64_t)network->id());
outp.append((uint64_t)configUpdateId);
const int64_t now = RR->node->now();
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_NETWORK_CONFIG,0,Packet::VERB_NOP,false,(network) ? network->id() : 0,ZT_QOS_NO_FLOW);
return true;
}
@ -979,12 +1086,13 @@ bool IncomingPacket::_doMULTICAST_GATHER(const RuntimeEnvironment *RR,void *tPtr
outp.append((uint32_t)mg.adi());
const unsigned int gatheredLocally = RR->mc->gather(peer->address(),nwid,mg,outp,gatherLimit);
if (gatheredLocally > 0) {
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),now);
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_GATHER,0,Packet::VERB_NOP,trustEstablished,nwid,ZT_QOS_NO_FLOW);
return true;
}
@ -1032,19 +1140,19 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr,
if (network->config().multicastLimit == 0) {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"multicast disabled");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,false,nwid,ZT_QOS_NO_FLOW);
return true;
}
if ((frameLen > 0)&&(frameLen <= ZT_MAX_MTU)) {
if (!to.mac().isMulticast()) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"destination not multicast");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay
return true;
}
if ((!from)||(from.isMulticast())||(from == network->mac())) {
RR->t->incomingPacketInvalid(tPtr,_path,packetId(),source(),hops(),Packet::VERB_MULTICAST_FRAME,"invalid source MAC");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay
return true;
}
@ -1058,7 +1166,7 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr,
network->learnBridgeRoute(from,peer->address());
} else {
RR->t->incomingNetworkFrameDropped(tPtr,network,_path,packetId(),size(),peer->address(),Packet::VERB_MULTICAST_FRAME,from,to.mac(),"bridging not allowed (remote)");
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid); // trustEstablished because COM is okay
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW); // trustEstablished because COM is okay
return true;
}
}
@ -1076,12 +1184,14 @@ bool IncomingPacket::_doMULTICAST_FRAME(const RuntimeEnvironment *RR,void *tPtr,
outp.append((uint32_t)to.adi());
outp.append((unsigned char)0x02); // flag 0x02 = contains gather results
if (RR->mc->gather(peer->address(),nwid,to,outp,gatherLimit)) {
const int64_t now = RR->node->now();
peer->recordOutgoingPacket(_path,outp.packetId(),outp.payloadLength(),outp.verb(),ZT_QOS_NO_FLOW,now);
outp.armor(peer->key(),true);
_path->send(RR,tPtr,outp.data(),outp.size(),RR->node->now());
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_MULTICAST_FRAME,0,Packet::VERB_NOP,true,nwid,ZT_QOS_NO_FLOW);
} else {
_sendErrorNeedCredentials(RR,tPtr,peer,nwid);
return false;
@ -1094,9 +1204,8 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
{
const int64_t now = RR->node->now();
// First, subject this to a rate limit
if (!peer->rateGatePushDirectPaths(now)) {
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -1108,8 +1217,6 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
unsigned int ptr = ZT_PACKET_IDX_PAYLOAD + 2;
while (count--) { // if ptr overflows Buffer will throw
// TODO: some flags are not yet implemented
unsigned int flags = (*this)[ptr++];
unsigned int extLen = at<uint16_t>(ptr); ptr += 2;
ptr += extLen; // unused right now
@ -1132,6 +1239,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
}
} break;
case 6: {
const InetAddress a(field(ptr,16),16,at<uint16_t>(ptr + 16));
if (
((flags & ZT_PUSH_DIRECT_PATHS_FLAG_FORGET_PATH) == 0) && // not being told to forget
@ -1149,7 +1257,7 @@ bool IncomingPacket::_doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPt
ptr += addrLen;
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_PUSH_DIRECT_PATHS,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -1165,7 +1273,7 @@ bool IncomingPacket::_doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,con
RR->node->postEvent(tPtr,ZT_EVENT_USER_MESSAGE,reinterpret_cast<const void *>(&um));
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_USER_MESSAGE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
@ -1189,11 +1297,29 @@ bool IncomingPacket::_doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,con
}
}
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0);
peer->received(tPtr,_path,hops(),packetId(),payloadLength(),Packet::VERB_REMOTE_TRACE,0,Packet::VERB_NOP,false,0,ZT_QOS_NO_FLOW);
return true;
}
bool IncomingPacket::_doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer)
{
uint64_t now = RR->node->now();
SharedPtr<Bond> bond = peer->bond();
if (!bond || !bond->rateGatePathNegotiation(now)) {
return true;
}
if (payloadLength() != sizeof(int16_t)) {
return true;
}
int16_t remoteUtility = 0;
memcpy(&remoteUtility, payload(), sizeof(int16_t));
if (peer->bond()) {
peer->bond()->processIncomingPathNegotiationRequest(now, _path, Utils::ntoh(remoteUtility));
}
return true;
}
void IncomingPacket::_sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,const uint64_t nwid)
{
Packet outp(source(),RR->identity.address(),Packet::VERB_ERROR);

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -100,7 +100,7 @@ public:
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @return True if decoding and processing is complete, false if caller should try again
*/
bool tryDecode(const RuntimeEnvironment *RR,void *tPtr);
bool tryDecode(const RuntimeEnvironment *RR,void *tPtr,int32_t flowId);
/**
* @return Time of packet receipt / start of decode
@ -117,8 +117,8 @@ private:
bool _doOK(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doWHOIS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doRENDEZVOUS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doFRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId);
bool _doEXT_FRAME(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,int32_t flowId);
bool _doECHO(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doMULTICAST_LIKE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doNETWORK_CREDENTIALS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
@ -129,6 +129,7 @@ private:
bool _doPUSH_DIRECT_PATHS(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doUSER_MESSAGE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doREMOTE_TRACE(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
bool _doPATH_NEGOTIATION_REQUEST(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer);
void _sendErrorNeedCredentials(const RuntimeEnvironment *RR,void *tPtr,const SharedPtr<Peer> &peer,const uint64_t nwid);

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -48,6 +48,7 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64
_networks(8),
_now(now),
_lastPingCheck(0),
_lastGratuitousPingCheck(0),
_lastHousekeepingRun(0),
_lastMemoizedTraceSettings(0)
{
@ -102,8 +103,9 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64
const unsigned long mcs = sizeof(Multicaster) + (((sizeof(Multicaster) & 0xf) != 0) ? (16 - (sizeof(Multicaster) & 0xf)) : 0);
const unsigned long topologys = sizeof(Topology) + (((sizeof(Topology) & 0xf) != 0) ? (16 - (sizeof(Topology) & 0xf)) : 0);
const unsigned long sas = sizeof(SelfAwareness) + (((sizeof(SelfAwareness) & 0xf) != 0) ? (16 - (sizeof(SelfAwareness) & 0xf)) : 0);
const unsigned long bc = sizeof(BondController) + (((sizeof(BondController) & 0xf) != 0) ? (16 - (sizeof(BondController) & 0xf)) : 0);
m = reinterpret_cast<char *>(::malloc(16 + ts + sws + mcs + topologys + sas));
m = reinterpret_cast<char *>(::malloc(16 + ts + sws + mcs + topologys + sas + bc));
if (!m)
throw std::bad_alloc();
RR->rtmem = m;
@ -118,12 +120,15 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64
RR->topology = new (m) Topology(RR,tptr);
m += topologys;
RR->sa = new (m) SelfAwareness(RR);
m += sas;
RR->bc = new (m) BondController(RR);
} catch ( ... ) {
if (RR->sa) RR->sa->~SelfAwareness();
if (RR->topology) RR->topology->~Topology();
if (RR->mc) RR->mc->~Multicaster();
if (RR->sw) RR->sw->~Switch();
if (RR->t) RR->t->~Trace();
if (RR->bc) RR->bc->~BondController();
::free(m);
throw;
}
@ -142,6 +147,7 @@ Node::~Node()
if (RR->mc) RR->mc->~Multicaster();
if (RR->sw) RR->sw->~Switch();
if (RR->t) RR->t->~Trace();
if (RR->bc) RR->bc->~BondController();
::free(RR->rtmem);
}
@ -246,9 +252,23 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64
_now = now;
Mutex::Lock bl(_backgroundTasksLock);
unsigned long bondCheckInterval = ZT_CORE_TIMER_TASK_GRANULARITY;
if (RR->bc->inUse()) {
// Gratuitously ping active peers so that QoS metrics have enough data to work with (if active path monitoring is enabled)
bondCheckInterval = std::min(std::max(RR->bc->minReqPathMonitorInterval(), ZT_CORE_TIMER_TASK_GRANULARITY), ZT_PING_CHECK_INVERVAL);
if ((now - _lastGratuitousPingCheck) >= bondCheckInterval) {
Hashtable< Address,std::vector<InetAddress> > alwaysContact;
_PingPeersThatNeedPing pfunc(RR,tptr,alwaysContact,now);
RR->topology->eachPeer<_PingPeersThatNeedPing &>(pfunc);
_lastGratuitousPingCheck = now;
}
RR->bc->processBackgroundTasks(tptr, now);
}
unsigned long timeUntilNextPingCheck = ZT_PING_CHECK_INVERVAL;
const int64_t timeSinceLastPingCheck = now - _lastPingCheck;
if (timeSinceLastPingCheck >= ZT_PING_CHECK_INVERVAL) {
if (timeSinceLastPingCheck >= timeUntilNextPingCheck) {
try {
_lastPingCheck = now;
@ -354,7 +374,7 @@ ZT_ResultCode Node::processBackgroundTasks(void *tptr,int64_t now,volatile int64
}
try {
*nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now)),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY);
*nextBackgroundTaskDeadline = now + (int64_t)std::max(std::min(bondCheckInterval,std::min(timeUntilNextPingCheck,RR->sw->doTimerTasks(tptr,now))),(unsigned long)ZT_CORE_TIMER_TASK_GRANULARITY);
} catch ( ... ) {
return ZT_RESULT_FATAL_ERROR_INTERNAL;
}
@ -461,7 +481,7 @@ ZT_PeerList *Node::peers() const
for(std::vector< std::pair< Address,SharedPtr<Peer> > >::iterator pi(peers.begin());pi!=peers.end();++pi) {
ZT_Peer *p = &(pl->peers[pl->peerCount++]);
p->address = pi->second->address().toInt();
p->hadAggregateLink = 0;
p->isBonded = 0;
if (pi->second->remoteVersionKnown()) {
p->versionMajor = pi->second->remoteVersionMajor();
p->versionMinor = pi->second->remoteVersionMinor();
@ -478,28 +498,25 @@ ZT_PeerList *Node::peers() const
std::vector< SharedPtr<Path> > paths(pi->second->paths(_now));
SharedPtr<Path> bestp(pi->second->getAppropriatePath(_now,false));
p->hadAggregateLink |= pi->second->hasAggregateLink();
p->pathCount = 0;
for(std::vector< SharedPtr<Path> >::iterator path(paths.begin());path!=paths.end();++path) {
memcpy(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage));
p->paths[p->pathCount].localSocket = (*path)->localSocket();
p->paths[p->pathCount].lastSend = (*path)->lastOut();
p->paths[p->pathCount].lastReceive = (*path)->lastIn();
p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address());
p->paths[p->pathCount].expired = 0;
p->paths[p->pathCount].preferred = ((*path) == bestp) ? 1 : 0;
p->paths[p->pathCount].latency = (float)(*path)->latency();
p->paths[p->pathCount].packetDelayVariance = (*path)->packetDelayVariance();
p->paths[p->pathCount].throughputDisturbCoeff = (*path)->throughputDisturbanceCoefficient();
p->paths[p->pathCount].packetErrorRatio = (*path)->packetErrorRatio();
p->paths[p->pathCount].packetLossRatio = (*path)->packetLossRatio();
p->paths[p->pathCount].stability = (*path)->lastComputedStability();
p->paths[p->pathCount].throughput = (*path)->meanThroughput();
p->paths[p->pathCount].maxThroughput = (*path)->maxLifetimeThroughput();
p->paths[p->pathCount].allocation = (float)(*path)->allocation() / (float)255;
p->paths[p->pathCount].ifname = (*path)->getName();
p->paths[p->pathCount].scope = (*path)->ipScope();
++p->pathCount;
}
if (pi->second->bond()) {
p->isBonded = pi->second->bond();
p->bondingPolicy = pi->second->bond()->getPolicy();
p->isHealthy = pi->second->bond()->isHealthy();
p->numAliveLinks = pi->second->bond()->getNumAliveLinks();
p->numTotalLinks = pi->second->bond()->getNumTotalLinks();
}
}
return pl;

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -34,6 +34,7 @@
#include "Salsa20.hpp"
#include "NetworkController.hpp"
#include "Hashtable.hpp"
#include "BondController.hpp"
// Bit mask for "expecting reply" hash
#define ZT_EXPECTING_REPLIES_BUCKET_MASK1 255
@ -186,6 +187,8 @@ public:
inline const Identity &identity() const { return _RR.identity; }
inline BondController *bondController() const { return _RR.bc; }
/**
* Register that we are expecting a reply to a packet ID
*
@ -247,9 +250,6 @@ public:
inline const Address &remoteTraceTarget() const { return _remoteTraceTarget; }
inline Trace::Level remoteTraceLevel() const { return _remoteTraceLevel; }
inline void setMultipathMode(uint8_t mode) { _multipathMode = mode; }
inline uint8_t getMultipathMode() { return _multipathMode; }
inline bool localControllerHasAuthorized(const int64_t now,const uint64_t nwid,const Address &addr) const
{
_localControllerAuthorizations_m.lock();
@ -306,10 +306,9 @@ private:
Address _remoteTraceTarget;
enum Trace::Level _remoteTraceLevel;
uint8_t _multipathMode;
volatile int64_t _now;
int64_t _lastPingCheck;
int64_t _lastGratuitousPingCheck;
int64_t _lastHousekeepingRun;
int64_t _lastMemoizedTraceSettings;
volatile int64_t _prngState[2];

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -56,7 +56,7 @@
* + Inline push of CertificateOfMembership deprecated
* 9 - 1.2.0 ... 1.2.14
* 10 - 1.4.0 ... CURRENT
* + Multipath capability and load balancing
* + Multipath capability and load balancing (tentative)
*/
#define ZT_PROTO_VERSION 10
@ -931,13 +931,13 @@ public:
*
* Upon receipt of this packet, the local peer will verify that the correct
* number of bytes were received by the remote peer. If these values do
* not agree that could be an indicator of packet loss.
* not agree that could be an indication of packet loss.
*
* Additionally, the local peer knows the interval of time that has
* elapsed since the last received ACK. With this information it can compute
* a rough estimate of the current throughput.
*
* This is sent at a maximum rate of once per every ZT_PATH_ACK_INTERVAL
* This is sent at a maximum rate of once per every ZT_QOS_ACK_INTERVAL
*/
VERB_ACK = 0x12,
@ -963,7 +963,8 @@ public:
* measure of the amount of time between when a packet was received and the
* egress time of its tracking QoS packet.
*
* This is sent at a maximum rate of once per every ZT_PATH_QOS_INTERVAL
* This is sent at a maximum rate of once per every
* ZT_QOS_MEASUREMENT_INTERVAL
*/
VERB_QOS_MEASUREMENT = 0x13,
@ -996,7 +997,34 @@ public:
* node on startup. This is helpful in identifying traces from different
* members of a cluster.
*/
VERB_REMOTE_TRACE = 0x15
VERB_REMOTE_TRACE = 0x15,
/**
* A request to a peer to use a specific path in a multi-path scenario:
* <[2] 16-bit unsigned integer that encodes a path choice utility>
*
* This is sent when a node operating in multipath mode observes that
* its inbound and outbound traffic aren't going over the same path. The
* node will compute its perceived utility for using its chosen outbound
* path and send this to a peer in an attempt to petition it to send
* its traffic over this same path.
*
* Scenarios:
*
* (1) Remote peer utility is GREATER than ours:
* - Remote peer will refuse the petition and continue using current path
* (2) Remote peer utility is LESS than than ours:
* - Remote peer will accept the petition and switch to our chosen path
* (3) Remote peer utility is EQUAL to our own:
* - To prevent confusion and flapping, both side will agree to use the
* numerical values of their identities to determine which path to use.
* The peer with the greatest identity will win.
*
* If a node petitions a peer repeatedly with no effect it will regard
* that as a refusal by the remote peer, in this case if the utility is
* negligible it will voluntarily switch to the remote peer's chosen path.
*/
VERB_PATH_NEGOTIATION_REQUEST = 0x16
};
/**

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -26,10 +26,10 @@
#include "SharedPtr.hpp"
#include "AtomicCounter.hpp"
#include "Utils.hpp"
#include "RingBuffer.hpp"
#include "Packet.hpp"
#include "RingBuffer.hpp"
#include "../osdep/Phy.hpp"
#include "../osdep/Link.hpp"
/**
* Maximum return value of preferenceRank()
@ -46,7 +46,7 @@ class RuntimeEnvironment;
class Path
{
friend class SharedPtr<Path>;
Phy<Path *> *_phy;
friend class Bond;
public:
/**
@ -85,77 +85,111 @@ public:
_lastOut(0),
_lastIn(0),
_lastTrustEstablishedPacketReceived(0),
_lastPathQualityComputeTime(0),
_localSocket(-1),
_latency(0xffff),
_addr(),
_ipScope(InetAddress::IP_SCOPE_NONE),
_lastAck(0),
_lastThroughputEstimation(0),
_lastAckReceived(0),
_lastAckSent(0),
_lastQoSMeasurement(0),
_lastQoSRecordPurge(0),
_lastThroughputEstimation(0),
_lastRefractoryUpdate(0),
_lastAliveToggle(0),
_lastEligibilityState(false),
_lastTrialBegin(0),
_refractoryPeriod(0),
_monitorInterval(0),
_upDelay(0),
_downDelay(0),
_ipvPref(0),
_mode(0),
_onlyPathOnLink(false),
_enabled(false),
_bonded(false),
_negotiated(false),
_deprecated(false),
_shouldReallocateFlows(false),
_assignedFlowCount(0),
_latencyMean(0),
_latencyVariance(0),
_packetLossRatio(0),
_packetErrorRatio(0),
_throughputMean(0),
_throughputMax(0),
_throughputVariance(0),
_allocation(0),
_byteLoad(0),
_relativeByteLoad(0),
_affinity(0),
_failoverScore(0),
_unackedBytes(0),
_expectingAckAsOf(0),
_packetsReceivedSinceLastAck(0),
_packetsReceivedSinceLastQoS(0),
_maxLifetimeThroughput(0),
_lastComputedMeanThroughput(0),
_bytesAckedSinceLastThroughputEstimation(0),
_lastComputedMeanLatency(0.0),
_lastComputedPacketDelayVariance(0.0),
_lastComputedPacketErrorRatio(0.0),
_lastComputedPacketLossRatio(0),
_lastComputedStability(0.0),
_lastComputedRelativeQuality(0),
_lastComputedThroughputDistCoeff(0.0),
_lastAllocation(0)
{
memset(_ifname, 0, 16);
memset(_addrString, 0, sizeof(_addrString));
}
_packetsIn(0),
_packetsOut(0)
{}
Path(const int64_t localSocket,const InetAddress &addr) :
_lastOut(0),
_lastIn(0),
_lastTrustEstablishedPacketReceived(0),
_lastPathQualityComputeTime(0),
_localSocket(localSocket),
_latency(0xffff),
_addr(addr),
_ipScope(addr.ipScope()),
_lastAck(0),
_lastThroughputEstimation(0),
_lastAckReceived(0),
_lastAckSent(0),
_lastQoSMeasurement(0),
_lastQoSRecordPurge(0),
_lastThroughputEstimation(0),
_lastRefractoryUpdate(0),
_lastAliveToggle(0),
_lastEligibilityState(false),
_lastTrialBegin(0),
_refractoryPeriod(0),
_monitorInterval(0),
_upDelay(0),
_downDelay(0),
_ipvPref(0),
_mode(0),
_onlyPathOnLink(false),
_enabled(false),
_bonded(false),
_negotiated(false),
_deprecated(false),
_shouldReallocateFlows(false),
_assignedFlowCount(0),
_latencyMean(0),
_latencyVariance(0),
_packetLossRatio(0),
_packetErrorRatio(0),
_throughputMean(0),
_throughputMax(0),
_throughputVariance(0),
_allocation(0),
_byteLoad(0),
_relativeByteLoad(0),
_affinity(0),
_failoverScore(0),
_unackedBytes(0),
_expectingAckAsOf(0),
_packetsReceivedSinceLastAck(0),
_packetsReceivedSinceLastQoS(0),
_maxLifetimeThroughput(0),
_lastComputedMeanThroughput(0),
_bytesAckedSinceLastThroughputEstimation(0),
_lastComputedMeanLatency(0.0),
_lastComputedPacketDelayVariance(0.0),
_lastComputedPacketErrorRatio(0.0),
_lastComputedPacketLossRatio(0),
_lastComputedStability(0.0),
_lastComputedRelativeQuality(0),
_lastComputedThroughputDistCoeff(0.0),
_lastAllocation(0)
{
memset(_ifname, 0, 16);
memset(_addrString, 0, sizeof(_addrString));
if (_localSocket != -1) {
_phy->getIfName((PhySocket *) ((uintptr_t) _localSocket), _ifname, 16);
}
}
_packetsIn(0),
_packetsOut(0)
{}
/**
* Called when a packet is received from this remote path, regardless of content
*
* @param t Time of receive
*/
inline void received(const uint64_t t) { _lastIn = t; }
inline void received(const uint64_t t) {
if (!alive(t,_bonded)) {
_lastAliveToggle = _lastIn;
}
_lastIn = t;
}
/**
* Set time last trusted packet was received (done in Peer::received())
@ -195,7 +229,6 @@ public:
else {
_latency = l;
}
_latencySamples.push(l);
}
/**
@ -284,341 +317,32 @@ public:
}
/**
* Record statistics on outgoing packets. Used later to estimate QoS metrics.
*
* @param now Current time
* @param packetId ID of packet
* @param payloadLength Length of payload
* @param verb Packet verb
* @param bonded Whether this path is part of a bond.
*/
inline void recordOutgoingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb)
{
Mutex::Lock _l(_statistics_m);
if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) {
if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) {
_unackedBytes += payloadLength;
// Take note that we're expecting a VERB_ACK on this path as of a specific time
_expectingAckAsOf = ackAge(now) > ZT_PATH_ACK_INTERVAL ? _expectingAckAsOf : now;
if (_outQoSRecords.size() < ZT_PATH_MAX_OUTSTANDING_QOS_RECORDS) {
_outQoSRecords[packetId] = now;
}
}
}
}
inline void setBonded(bool bonded) { _bonded = bonded; }
/**
* Record statistics on incoming packets. Used later to estimate QoS metrics.
*
* @param now Current time
* @param packetId ID of packet
* @param payloadLength Length of payload
* @param verb Packet verb
* @return True if this path is currently part of a bond.
*/
inline void recordIncomingPacket(int64_t now, int64_t packetId, uint16_t payloadLength, Packet::Verb verb)
{
Mutex::Lock _l(_statistics_m);
if (verb != Packet::VERB_ACK && verb != Packet::VERB_QOS_MEASUREMENT) {
if ((packetId & (ZT_PATH_QOS_ACK_PROTOCOL_DIVISOR - 1)) == 0) {
_inACKRecords[packetId] = payloadLength;
_packetsReceivedSinceLastAck++;
_inQoSRecords[packetId] = now;
_packetsReceivedSinceLastQoS++;
}
_packetValiditySamples.push(true);
}
}
/**
* Record that we've received a VERB_ACK on this path, also compute throughput if required.
*
* @param now Current time
* @param ackedBytes Number of bytes acknowledged by other peer
*/
inline void receivedAck(int64_t now, int32_t ackedBytes)
{
_expectingAckAsOf = 0;
_unackedBytes = (ackedBytes > _unackedBytes) ? 0 : _unackedBytes - ackedBytes;
int64_t timeSinceThroughputEstimate = (now - _lastThroughputEstimation);
if (timeSinceThroughputEstimate >= ZT_PATH_THROUGHPUT_MEASUREMENT_INTERVAL) {
uint64_t throughput = (uint64_t)((float)(_bytesAckedSinceLastThroughputEstimation * 8) / ((float)timeSinceThroughputEstimate / (float)1000));
_throughputSamples.push(throughput);
_maxLifetimeThroughput = throughput > _maxLifetimeThroughput ? throughput : _maxLifetimeThroughput;
_lastThroughputEstimation = now;
_bytesAckedSinceLastThroughputEstimation = 0;
} else {
_bytesAckedSinceLastThroughputEstimation += ackedBytes;
}
}
/**
* @return Number of bytes this peer is responsible for ACKing since last ACK
*/
inline int32_t bytesToAck()
{
Mutex::Lock _l(_statistics_m);
int32_t bytesToAck = 0;
std::map<uint64_t,uint16_t>::iterator it = _inACKRecords.begin();
while (it != _inACKRecords.end()) {
bytesToAck += it->second;
it++;
}
return bytesToAck;
}
/**
* @return Number of bytes thus far sent that have not been acknowledged by the remote peer
*/
inline int64_t unackedSentBytes()
{
return _unackedBytes;
}
/**
* Account for the fact that an ACK was just sent. Reset counters, timers, and clear statistics buffers
*
* @param Current time
*/
inline void sentAck(int64_t now)
{
Mutex::Lock _l(_statistics_m);
_inACKRecords.clear();
_packetsReceivedSinceLastAck = 0;
_lastAck = now;
}
/**
* Receive QoS data, match with recorded egress times from this peer, compute latency
* estimates.
*
* @param now Current time
* @param count Number of records
* @param rx_id table of packet IDs
* @param rx_ts table of holding times
*/
inline void receivedQoS(int64_t now, int count, uint64_t *rx_id, uint16_t *rx_ts)
{
Mutex::Lock _l(_statistics_m);
// Look up egress times and compute latency values for each record
std::map<uint64_t,uint64_t>::iterator it;
for (int j=0; j<count; j++) {
it = _outQoSRecords.find(rx_id[j]);
if (it != _outQoSRecords.end()) {
uint16_t rtt = (uint16_t)(now - it->second);
uint16_t rtt_compensated = rtt - rx_ts[j];
uint16_t latency = rtt_compensated / 2;
updateLatency(latency, now);
_outQoSRecords.erase(it);
}
}
}
/**
* Generate the contents of a VERB_QOS_MEASUREMENT packet.
*
* @param now Current time
* @param qosBuffer destination buffer
* @return Size of payload
*/
inline int32_t generateQoSPacket(int64_t now, char *qosBuffer)
{
Mutex::Lock _l(_statistics_m);
int32_t len = 0;
std::map<uint64_t,uint64_t>::iterator it = _inQoSRecords.begin();
int i=0;
while (i<_packetsReceivedSinceLastQoS && it != _inQoSRecords.end()) {
uint64_t id = it->first;
memcpy(qosBuffer, &id, sizeof(uint64_t));
qosBuffer+=sizeof(uint64_t);
uint16_t holdingTime = (uint16_t)(now - it->second);
memcpy(qosBuffer, &holdingTime, sizeof(uint16_t));
qosBuffer+=sizeof(uint16_t);
len+=sizeof(uint64_t)+sizeof(uint16_t);
_inQoSRecords.erase(it++);
i++;
}
return len;
}
/**
* Account for the fact that a VERB_QOS_MEASUREMENT was just sent. Reset timers.
*
* @param Current time
*/
inline void sentQoS(int64_t now) {
_packetsReceivedSinceLastQoS = 0;
_lastQoSMeasurement = now;
}
/**
* @param now Current time
* @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time
*/
inline bool needsToSendAck(int64_t now) {
return ((now - _lastAck) >= ZT_PATH_ACK_INTERVAL ||
(_packetsReceivedSinceLastAck == ZT_PATH_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck;
}
/**
* @param now Current time
* @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time
*/
inline bool needsToSendQoS(int64_t now) {
return ((_packetsReceivedSinceLastQoS >= ZT_PATH_QOS_TABLE_SIZE) ||
((now - _lastQoSMeasurement) > ZT_PATH_QOS_INTERVAL)) && _packetsReceivedSinceLastQoS;
}
/**
* How much time has elapsed since we've been expecting a VERB_ACK on this path. This value
* is used to determine a more relevant path "age". This lets us penalize paths which are no
* longer ACKing, but not those that simple aren't being used to carry traffic at the
* current time.
*/
inline int64_t ackAge(int64_t now) { return _expectingAckAsOf ? now - _expectingAckAsOf : 0; }
/**
* The maximum observed throughput (in bits/s) for this path
*/
inline uint64_t maxLifetimeThroughput() { return _maxLifetimeThroughput; }
/**
* @return The mean throughput (in bits/s) of this link
*/
inline uint64_t meanThroughput() { return _lastComputedMeanThroughput; }
/**
* Assign a new relative quality value for this path in the aggregate link
*
* @param rq Quality of this path in comparison to other paths available to this peer
*/
inline void updateRelativeQuality(float rq) { _lastComputedRelativeQuality = rq; }
/**
* @return Quality of this path compared to others in the aggregate link
*/
inline float relativeQuality() { return _lastComputedRelativeQuality; }
/**
* Assign a new allocation value for this path in the aggregate link
*
* @param allocation Percentage of traffic to be sent over this path to a peer
*/
inline void updateComponentAllocationOfAggregateLink(unsigned char allocation) { _lastAllocation = allocation; }
/**
* @return Percentage of traffic allocated to this path in the aggregate link
*/
inline unsigned char allocation() { return _lastAllocation; }
/**
* @return Stability estimates can become expensive to compute, we cache the most recent result.
*/
inline float lastComputedStability() { return _lastComputedStability; }
/**
* @return A pointer to a cached copy of the human-readable name of the interface this Path's localSocket is bound to
*/
inline char *getName() { return _ifname; }
/**
* @return Packet delay variance
*/
inline float packetDelayVariance() { return _lastComputedPacketDelayVariance; }
/**
* @return Previously-computed mean latency
*/
inline float meanLatency() { return _lastComputedMeanLatency; }
/**
* @return Packet loss rate (PLR)
*/
inline float packetLossRatio() { return _lastComputedPacketLossRatio; }
/**
* @return Packet error ratio (PER)
*/
inline float packetErrorRatio() { return _lastComputedPacketErrorRatio; }
/**
* Record an invalid incoming packet. This packet failed MAC/compression/cipher checks and will now
* contribute to a Packet Error Ratio (PER).
*/
inline void recordInvalidPacket() { _packetValiditySamples.push(false); }
/**
* @return A pointer to a cached copy of the address string for this Path (For debugging only)
*/
inline char *getAddressString() { return _addrString; }
/**
* @return The current throughput disturbance coefficient
*/
inline float throughputDisturbanceCoefficient() { return _lastComputedThroughputDistCoeff; }
/**
* Compute and cache stability and performance metrics. The resultant stability coefficient is a measure of how "well behaved"
* this path is. This figure is substantially different from (but required for the estimation of the path's overall "quality".
*
* @param now Current time
*/
inline void processBackgroundPathMeasurements(const int64_t now)
{
if (now - _lastPathQualityComputeTime > ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
Mutex::Lock _l(_statistics_m);
_lastPathQualityComputeTime = now;
address().toString(_addrString);
_lastComputedMeanLatency = _latencySamples.mean();
_lastComputedPacketDelayVariance = _latencySamples.stddev(); // Similar to "jitter" (SEE: RFC 3393, RFC 4689)
_lastComputedMeanThroughput = (uint64_t)_throughputSamples.mean();
// If no packet validity samples, assume PER==0
_lastComputedPacketErrorRatio = 1 - (_packetValiditySamples.count() ? _packetValiditySamples.mean() : 1);
// Compute path stability
// Normalize measurements with wildly different ranges into a reasonable range
float normalized_pdv = Utils::normalize(_lastComputedPacketDelayVariance, 0, ZT_PATH_MAX_PDV, 0, 10);
float normalized_la = Utils::normalize(_lastComputedMeanLatency, 0, ZT_PATH_MAX_MEAN_LATENCY, 0, 10);
float throughput_cv = _throughputSamples.mean() > 0 ? _throughputSamples.stddev() / _throughputSamples.mean() : 1;
// Form an exponential cutoff and apply contribution weights
float pdv_contrib = expf((-1.0f)*normalized_pdv) * (float)ZT_PATH_CONTRIB_PDV;
float latency_contrib = expf((-1.0f)*normalized_la) * (float)ZT_PATH_CONTRIB_LATENCY;
// Throughput Disturbance Coefficient
float throughput_disturbance_contrib = expf((-1.0f)*throughput_cv) * (float)ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE;
_throughputDisturbanceSamples.push(throughput_cv);
_lastComputedThroughputDistCoeff = _throughputDisturbanceSamples.mean();
// Obey user-defined ignored contributions
pdv_contrib = ZT_PATH_CONTRIB_PDV > 0.0 ? pdv_contrib : 1;
latency_contrib = ZT_PATH_CONTRIB_LATENCY > 0.0 ? latency_contrib : 1;
throughput_disturbance_contrib = ZT_PATH_CONTRIB_THROUGHPUT_DISTURBANCE > 0.0 ? throughput_disturbance_contrib : 1;
// Stability
_lastComputedStability = pdv_contrib + latency_contrib + throughput_disturbance_contrib;
_lastComputedStability *= 1 - _lastComputedPacketErrorRatio;
// Prevent QoS records from sticking around for too long
std::map<uint64_t,uint64_t>::iterator it = _outQoSRecords.begin();
while (it != _outQoSRecords.end()) {
// Time since egress of tracked packet
if ((now - it->second) >= ZT_PATH_QOS_TIMEOUT) {
_outQoSRecords.erase(it++);
} else { it++; }
}
}
}
inline bool bonded() { return _bonded; }
/**
* @return True if this path is alive (receiving heartbeats)
*/
inline bool alive(const int64_t now) const { return ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000)); }
inline bool alive(const int64_t now, bool bondingEnabled = false) const {
return (bondingEnabled && _monitorInterval) ? ((now - _lastIn) < (_monitorInterval * 3)) : ((now - _lastIn) < (ZT_PATH_HEARTBEAT_PERIOD + 5000));
}
/**
* @return True if this path needs a heartbeat
*/
inline bool needsHeartbeat(const int64_t now) const { return ((now - _lastOut) >= ZT_PATH_HEARTBEAT_PERIOD); }
/**
* @return True if this path needs a heartbeat in accordance to the user-specified path monitor frequency
*/
inline bool needsGratuitousHeartbeat(const int64_t now) { return allowed() && (_monitorInterval > 0) && ((now - _lastOut) >= _monitorInterval); }
/**
* @return Last time we sent something
*/
@ -629,62 +353,335 @@ public:
*/
inline int64_t lastIn() const { return _lastIn; }
/**
* @return the age of the path in terms of receiving packets
*/
inline int64_t age(int64_t now) { return (now - _lastIn); }
/**
* @return Time last trust-established packet was received
*/
inline int64_t lastTrustEstablishedPacketReceived() const { return _lastTrustEstablishedPacketReceived; }
/**
* @return Time since last VERB_ACK was received
*/
inline int64_t ackAge(int64_t now) { return _lastAckReceived ? now - _lastAckReceived : 0; }
/**
* Set or update a refractory period for the path.
*
* @param punishment How much a path should be punished
* @param pathFailure Whether this call is the result of a recent path failure
*/
inline void adjustRefractoryPeriod(int64_t now, uint32_t punishment, bool pathFailure) {
if (pathFailure) {
unsigned int suggestedRefractoryPeriod = _refractoryPeriod ? punishment + (_refractoryPeriod * 2) : punishment;
_refractoryPeriod = std::min(suggestedRefractoryPeriod, (unsigned int)ZT_MULTIPATH_MAX_REFRACTORY_PERIOD);
_lastRefractoryUpdate = 0;
} else {
uint32_t drainRefractory = 0;
if (_lastRefractoryUpdate) {
drainRefractory = (now - _lastRefractoryUpdate);
} else {
drainRefractory = (now - _lastAliveToggle);
}
_lastRefractoryUpdate = now;
if (_refractoryPeriod > drainRefractory) {
_refractoryPeriod -= drainRefractory;
} else {
_refractoryPeriod = 0;
_lastRefractoryUpdate = 0;
}
}
}
/**
* Determine the current state of eligibility of the path.
*
* @param includeRefractoryPeriod Whether current punishment should be taken into consideration
* @return True if this path can be used in a bond at the current time
*/
inline bool eligible(uint64_t now, int ackSendInterval, bool includeRefractoryPeriod = false) {
if (includeRefractoryPeriod && _refractoryPeriod) {
return false;
}
bool acceptableAge = age(now) < ((_monitorInterval * 4) + _downDelay); // Simple RX age (driven by packets of any type and gratuitous VERB_HELLOs)
bool acceptableAckAge = ackAge(now) < (ackSendInterval); // Whether the remote peer is actually responding to our outgoing traffic or simply sending stuff to us
bool notTooEarly = (now - _lastAliveToggle) >= _upDelay; // Whether we've waited long enough since the link last came online
bool inTrial = (now - _lastTrialBegin) < _upDelay; // Whether this path is still in its trial period
bool currEligibility = allowed() && (((acceptableAge || acceptableAckAge) && notTooEarly) || inTrial);
return currEligibility;
}
/**
* Record when this path first entered the bond. Each path is given a trial period where it is admitted
* to the bond without requiring observations to prove its performance or reliability.
*/
inline void startTrial(uint64_t now) { _lastTrialBegin = now; }
/**
* @return True if a path is permitted to be used in a bond (according to user pref.)
*/
inline bool allowed() {
return _enabled
&& (!_ipvPref
|| ((_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46 || _ipvPref == 64))
|| ((_addr.isV6() && (_ipvPref == 6 || _ipvPref == 46 || _ipvPref == 64)))));
}
/**
* @return True if a path is preferred over another on the same physical link (according to user pref.)
*/
inline bool preferred() {
return _onlyPathOnLink
|| (_addr.isV4() && (_ipvPref == 4 || _ipvPref == 46))
|| (_addr.isV6() && (_ipvPref == 6 || _ipvPref == 64));
}
/**
* @param now Current time
* @return Whether an ACK (VERB_ACK) packet needs to be emitted at this time
*/
inline bool needsToSendAck(int64_t now, int ackSendInterval) {
return ((now - _lastAckSent) >= ackSendInterval ||
(_packetsReceivedSinceLastAck == ZT_QOS_TABLE_SIZE)) && _packetsReceivedSinceLastAck;
}
/**
* @param now Current time
* @return Whether a QoS (VERB_QOS_MEASUREMENT) packet needs to be emitted at this time
*/
inline bool needsToSendQoS(int64_t now, int qosSendInterval) {
return ((_packetsReceivedSinceLastQoS >= ZT_QOS_TABLE_SIZE) ||
((now - _lastQoSMeasurement) > qosSendInterval)) && _packetsReceivedSinceLastQoS;
}
/**
* Reset packet counters
*/
inline void resetPacketCounts()
{
_packetsIn = 0;
_packetsOut = 0;
}
private:
Mutex _statistics_m;
volatile int64_t _lastOut;
volatile int64_t _lastIn;
volatile int64_t _lastTrustEstablishedPacketReceived;
volatile int64_t _lastPathQualityComputeTime;
int64_t _localSocket;
volatile unsigned int _latency;
InetAddress _addr;
InetAddress::IpScope _ipScope; // memoize this since it's a computed value checked often
AtomicCounter __refCount;
std::map<uint64_t,uint64_t> _outQoSRecords; // id:egress_time
std::map<uint64_t,uint64_t> _inQoSRecords; // id:now
std::map<uint64_t,uint16_t> _inACKRecords; // id:len
std::map<uint64_t,uint64_t> qosStatsOut; // id:egress_time
std::map<uint64_t,uint64_t> qosStatsIn; // id:now
std::map<uint64_t,uint16_t> ackStatsIn; // id:len
int64_t _lastAck;
int64_t _lastThroughputEstimation;
int64_t _lastQoSMeasurement;
int64_t _lastQoSRecordPurge;
RingBuffer<int,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> qosRecordSize;
RingBuffer<float,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> qosRecordLossSamples;
RingBuffer<uint64_t,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> throughputSamples;
RingBuffer<bool,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> packetValiditySamples;
RingBuffer<float,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> _throughputVarianceSamples;
RingBuffer<uint16_t,ZT_QOS_SHORTTERM_SAMPLE_WIN_SIZE> latencySamples;
/**
* Last time that a VERB_ACK was received on this path.
*/
uint64_t _lastAckReceived;
/**
* Last time that a VERB_ACK was sent out on this path.
*/
uint64_t _lastAckSent;
/**
* Last time that a VERB_QOS_MEASUREMENT was sent out on this path.
*/
uint64_t _lastQoSMeasurement;
/**
* Last time that the path's throughput was estimated.
*/
uint64_t _lastThroughputEstimation;
/**
* The last time that the refractory period was updated.
*/
uint64_t _lastRefractoryUpdate;
/**
* The last time that the path was marked as "alive".
*/
uint64_t _lastAliveToggle;
/**
* State of eligibility at last check. Used for determining state changes.
*/
bool _lastEligibilityState;
/**
* Timestamp indicating when this path's trial period began.
*/
uint64_t _lastTrialBegin;
/**
* Amount of time that this path will be prevented from becoming a member of a bond.
*/
uint32_t _refractoryPeriod;
/**
* Monitor interval specific to this path or that was inherited from the bond controller.
*/
int32_t _monitorInterval;
/**
* Up delay interval specific to this path or that was inherited from the bond controller.
*/
uint32_t _upDelay;
/**
* Down delay interval specific to this path or that was inherited from the bond controller.
*/
uint32_t _downDelay;
/**
* IP version preference inherited from the physical link.
*/
uint8_t _ipvPref;
/**
* Mode inherited from the physical link.
*/
uint8_t _mode;
/**
* IP version preference inherited from the physical link.
*/
bool _onlyPathOnLink;
/**
* Enabled state inherited from the physical link.
*/
bool _enabled;
/**
* Whether this path is currently part of a bond.
*/
bool _bonded;
/**
* Whether this path was intentionally negotiated by either peer.
*/
bool _negotiated;
/**
* Whether this path has been deprecated due to performance issues. Current traffic flows
* will be re-allocated to other paths in the most non-disruptive manner (if possible),
* and new traffic will not be allocated to this path.
*/
bool _deprecated;
/**
* Whether flows should be moved from this path. Current traffic flows will be re-allocated
* immediately.
*/
bool _shouldReallocateFlows;
/**
* The number of flows currently assigned to this path.
*/
uint16_t _assignedFlowCount;
/**
* The mean latency (computed from a sliding window.)
*/
float _latencyMean;
/**
* Packet delay variance (computed from a sliding window.)
*/
float _latencyVariance;
/**
* The ratio of lost packets to received packets.
*/
float _packetLossRatio;
/**
* The ratio of packets that failed their MAC/CRC checks to those that did not.
*/
float _packetErrorRatio;
/**
* The estimated mean throughput of this path.
*/
uint64_t _throughputMean;
/**
* The maximum observed throughput of this path.
*/
uint64_t _throughputMax;
/**
* The variance in the estimated throughput of this path.
*/
float _throughputVariance;
/**
* The relative quality of this path to all others in the bond, [0-255].
*/
uint8_t _allocation;
/**
* How much load this path is under.
*/
uint64_t _byteLoad;
/**
* How much load this path is under (relative to other paths in the bond.)
*/
uint8_t _relativeByteLoad;
/**
* Relative value expressing how "deserving" this path is of new traffic.
*/
uint8_t _affinity;
/**
* Score that indicates to what degree this path is preferred over others that
* are available to the bonding policy. (specifically for active-backup)
*/
uint32_t _failoverScore;
/**
* Number of bytes thus far sent that have not been acknowledged by the remote peer.
*/
int64_t _unackedBytes;
int64_t _expectingAckAsOf;
int16_t _packetsReceivedSinceLastAck;
int16_t _packetsReceivedSinceLastQoS;
uint64_t _maxLifetimeThroughput;
uint64_t _lastComputedMeanThroughput;
/**
* Number of packets received since the last VERB_ACK was sent to the remote peer.
*/
int32_t _packetsReceivedSinceLastAck;
/**
* Number of packets received since the last VERB_QOS_MEASUREMENT was sent to the remote peer.
*/
int32_t _packetsReceivedSinceLastQoS;
/**
* Bytes acknowledged via incoming VERB_ACK since the last estimation of throughput.
*/
uint64_t _bytesAckedSinceLastThroughputEstimation;
float _lastComputedMeanLatency;
float _lastComputedPacketDelayVariance;
float _lastComputedPacketErrorRatio;
float _lastComputedPacketLossRatio;
// cached estimates
float _lastComputedStability;
float _lastComputedRelativeQuality;
float _lastComputedThroughputDistCoeff;
unsigned char _lastAllocation;
// cached human-readable strings for tracing purposes
char _ifname[16];
char _addrString[256];
RingBuffer<uint64_t,ZT_PATH_QUALITY_METRIC_WIN_SZ> _throughputSamples;
RingBuffer<uint32_t,ZT_PATH_QUALITY_METRIC_WIN_SZ> _latencySamples;
RingBuffer<bool,ZT_PATH_QUALITY_METRIC_WIN_SZ> _packetValiditySamples;
RingBuffer<float,ZT_PATH_QUALITY_METRIC_WIN_SZ> _throughputDisturbanceSamples;
/**
* Counters used for tracking path load.
*/
int _packetsIn;
int _packetsOut;
};
} // namespace ZeroTier

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -14,7 +14,6 @@
#include "../version.h"
#include "Constants.hpp"
#include "Peer.hpp"
#include "Node.hpp"
#include "Switch.hpp"
#include "Network.hpp"
#include "SelfAwareness.hpp"
@ -35,20 +34,14 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident
_lastTriedMemorizedPath(0),
_lastDirectPathPushSent(0),
_lastDirectPathPushReceive(0),
_lastEchoRequestReceived(0),
_lastCredentialRequestSent(0),
_lastWhoisRequestReceived(0),
_lastEchoRequestReceived(0),
_lastCredentialsReceived(0),
_lastTrustEstablishedPacketReceived(0),
_lastSentFullHello(0),
_lastACKWindowReset(0),
_lastQoSWindowReset(0),
_lastMultipathCompatibilityCheck(0),
_lastEchoCheck(0),
_freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter),
_uniqueAlivePathCount(0),
_localMultipathSupported(false),
_remoteMultipathSupported(false),
_canUseMultipath(false),
_vProto(0),
_vMajor(0),
_vMinor(0),
@ -56,14 +49,18 @@ Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Ident
_id(peerIdentity),
_directPathPushCutoffCount(0),
_credentialsCutoffCount(0),
_linkIsBalanced(false),
_linkIsRedundant(false),
_remotePeerMultipathEnabled(false),
_lastAggregateStatsReport(0),
_lastAggregateAllocation(0)
_echoRequestCutoffCount(0),
_uniqueAlivePathCount(0),
_localMultipathSupported(false),
_remoteMultipathSupported(false),
_canUseMultipath(false),
_shouldCollectPathStatistics(0),
_bondingPolicy(0),
_lastComputedAggregateMeanLatency(0)
{
if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH))
if (!myIdentity.agree(peerIdentity,_key,ZT_PEER_SECRET_KEY_LENGTH)) {
throw ZT_EXCEPTION_INVALID_ARGUMENT;
}
}
void Peer::received(
@ -76,7 +73,8 @@ void Peer::received(
const uint64_t inRePacketId,
const Packet::Verb inReVerb,
const bool trustEstablished,
const uint64_t networkId)
const uint64_t networkId,
const int32_t flowId)
{
const int64_t now = RR->node->now();
@ -93,7 +91,9 @@ void Peer::received(
break;
}
if (trustEstablished) {
recordIncomingPacket(tPtr, path, packetId, payloadLength, verb, flowId, now);
if (trustEstablished) {
_lastTrustEstablishedPacketReceived = now;
path->trustedPacketReceived(now);
}
@ -139,6 +139,9 @@ void Peer::received(
if (q > replacePathQuality) {
replacePathQuality = q;
replacePath = i;
if (!_paths[i].p->alive(now)) {
break; // Stop searching, we found an identical dead path, replace the object
}
}
} else {
replacePath = i;
@ -216,136 +219,15 @@ void Peer::received(
}
}
void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, int64_t now)
SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId)
{
_freeRandomByte += (unsigned char)(packetId >> 8); // grab entropy to use in path selection logic for multipath
if (_canUseMultipath) {
path->recordOutgoingPacket(now, packetId, payloadLength, verb);
}
}
void Peer::recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, int64_t now)
{
if (_canUseMultipath) {
if (path->needsToSendAck(now)) {
sendACK(tPtr, path, path->localSocket(), path->address(), now);
}
path->recordIncomingPacket(now, packetId, payloadLength, verb);
}
}
void Peer::computeAggregateProportionalAllocation(int64_t now)
{
float maxStability = 0;
float totalRelativeQuality = 0;
float maxThroughput = 1;
float maxScope = 0;
float relStability[ZT_MAX_PEER_NETWORK_PATHS];
float relThroughput[ZT_MAX_PEER_NETWORK_PATHS];
memset(&relStability, 0, sizeof(relStability));
memset(&relThroughput, 0, sizeof(relThroughput));
// Survey all paths
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
relStability[i] = _paths[i].p->lastComputedStability();
relThroughput[i] = (float)_paths[i].p->maxLifetimeThroughput();
maxStability = relStability[i] > maxStability ? relStability[i] : maxStability;
maxThroughput = relThroughput[i] > maxThroughput ? relThroughput[i] : maxThroughput;
maxScope = _paths[i].p->ipScope() > maxScope ? _paths[i].p->ipScope() : maxScope;
}
}
// Convert to relative values
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
relStability[i] /= maxStability ? maxStability : 1;
relThroughput[i] /= maxThroughput ? maxThroughput : 1;
float normalized_ma = Utils::normalize((float)_paths[i].p->ackAge(now), 0, ZT_PATH_MAX_AGE, 0, 10);
float age_contrib = exp((-1)*normalized_ma);
float relScope = ((float)(_paths[i].p->ipScope()+1) / (maxScope + 1));
float relQuality =
(relStability[i] * (float)ZT_PATH_CONTRIB_STABILITY)
+ (fmaxf(1.0f, relThroughput[i]) * (float)ZT_PATH_CONTRIB_THROUGHPUT)
+ relScope * (float)ZT_PATH_CONTRIB_SCOPE;
relQuality *= age_contrib;
// Arbitrary cutoffs
relQuality = relQuality > (1.00f / 100.0f) ? relQuality : 0.0f;
relQuality = relQuality < (99.0f / 100.0f) ? relQuality : 1.0f;
totalRelativeQuality += relQuality;
_paths[i].p->updateRelativeQuality(relQuality);
}
}
// Convert set of relative performances into an allocation set
for(uint16_t i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_paths[i].p->updateComponentAllocationOfAggregateLink((unsigned char)((_paths[i].p->relativeQuality() / totalRelativeQuality) * 255));
}
}
}
int Peer::computeAggregateLinkPacketDelayVariance()
{
float pdv = 0.0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
pdv += _paths[i].p->relativeQuality() * _paths[i].p->packetDelayVariance();
}
}
return (int)pdv;
}
int Peer::computeAggregateLinkMeanLatency()
{
int ml = 0;
int pathCount = 0;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
pathCount++;
ml += (int)(_paths[i].p->relativeQuality() * _paths[i].p->meanLatency());
}
}
return ml / pathCount;
}
int Peer::aggregateLinkPhysicalPathCount()
{
std::map<std::string, bool> ifnamemap;
int pathCount = 0;
int64_t now = RR->node->now();
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
if (!ifnamemap[_paths[i].p->getName()]) {
ifnamemap[_paths[i].p->getName()] = true;
pathCount++;
}
}
}
return pathCount;
}
int Peer::aggregateLinkLogicalPathCount()
{
int pathCount = 0;
int64_t now = RR->node->now();
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
pathCount++;
}
}
return pathCount;
}
SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired)
{
Mutex::Lock _l(_paths_m);
unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS;
/**
* Send traffic across the highest quality path only. This algorithm will still
* use the old path quality metric from protocol version 9.
*/
if (!_canUseMultipath) {
if (!_bondToPeer) {
Mutex::Lock _l(_paths_m);
unsigned int bestPath = ZT_MAX_PEER_NETWORK_PATHS;
/**
* Send traffic across the highest quality path only. This algorithm will still
* use the old path quality metric from protocol version 9.
*/
long bestPathQuality = 2147483647;
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
@ -363,115 +245,7 @@ SharedPtr<Path> Peer::getAppropriatePath(int64_t now, bool includeExpired)
}
return SharedPtr<Path>();
}
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_paths[i].p->processBackgroundPathMeasurements(now);
}
}
/**
* Randomly distribute traffic across all paths
*/
int numAlivePaths = 0;
int numStalePaths = 0;
if (RR->node->getMultipathMode() == ZT_MULTIPATH_RANDOM) {
int alivePaths[ZT_MAX_PEER_NETWORK_PATHS];
int stalePaths[ZT_MAX_PEER_NETWORK_PATHS];
memset(&alivePaths, -1, sizeof(alivePaths));
memset(&stalePaths, -1, sizeof(stalePaths));
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if (_paths[i].p->alive(now)) {
alivePaths[numAlivePaths] = i;
numAlivePaths++;
}
else {
stalePaths[numStalePaths] = i;
numStalePaths++;
}
}
}
unsigned int r = _freeRandomByte;
if (numAlivePaths > 0) {
int rf = r % numAlivePaths;
return _paths[alivePaths[rf]].p;
}
else if(numStalePaths > 0) {
// Resort to trying any non-expired path
int rf = r % numStalePaths;
return _paths[stalePaths[rf]].p;
}
}
/**
* Proportionally allocate traffic according to dynamic path quality measurements
*/
if (RR->node->getMultipathMode() == ZT_MULTIPATH_PROPORTIONALLY_BALANCED) {
if ((now - _lastAggregateAllocation) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
_lastAggregateAllocation = now;
computeAggregateProportionalAllocation(now);
}
// Randomly choose path according to their allocations
float rf = _freeRandomByte;
for(int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
if (rf < _paths[i].p->allocation()) {
bestPath = i;
_pathChoiceHist.push(bestPath); // Record which path we chose
break;
}
rf -= _paths[i].p->allocation();
}
}
if (bestPath < ZT_MAX_PEER_NETWORK_PATHS) {
return _paths[bestPath].p;
}
}
return SharedPtr<Path>();
}
char *Peer::interfaceListStr()
{
std::map<std::string, int> ifnamemap;
char tmp[32];
const int64_t now = RR->node->now();
char *ptr = _interfaceListStr;
bool imbalanced = false;
memset(_interfaceListStr, 0, sizeof(_interfaceListStr));
int alivePathCount = aggregateLinkLogicalPathCount();
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p && _paths[i].p->alive(now)) {
int ipv = _paths[i].p->address().isV4();
// If this is acting as an aggregate link, check allocations
float targetAllocation = 1.0f / (float)alivePathCount;
float currentAllocation = 1.0f;
if (alivePathCount > 1) {
currentAllocation = (float)_pathChoiceHist.countValue(i) / (float)_pathChoiceHist.count();
if (fabs(targetAllocation - currentAllocation) > ZT_PATH_IMBALANCE_THRESHOLD) {
imbalanced = true;
}
}
char *ipvStr = ipv ? (char*)"ipv4" : (char*)"ipv6";
sprintf(tmp, "(%s, %s, %.3f)", _paths[i].p->getName(), ipvStr, currentAllocation);
// Prevent duplicates
if(ifnamemap[_paths[i].p->getName()] != ipv) {
memcpy(ptr, tmp, strlen(tmp));
ptr += strlen(tmp);
*ptr = ' ';
ptr++;
ifnamemap[_paths[i].p->getName()] = ipv;
}
}
}
ptr--; // Overwrite trailing space
if (imbalanced) {
sprintf(tmp, ", is asymmetrical");
memcpy(ptr, tmp, sizeof(tmp));
} else {
*ptr = '\0';
}
return _interfaceListStr;
return _bondToPeer->getAppropriatePath(now, flowId);
}
void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr<Peer> &other) const
@ -595,73 +369,6 @@ void Peer::introduce(void *const tPtr,const int64_t now,const SharedPtr<Peer> &o
}
}
inline void Peer::processBackgroundPeerTasks(const int64_t now)
{
// Determine current multipath compatibility with other peer
if ((now - _lastMultipathCompatibilityCheck) >= ZT_PATH_QUALITY_COMPUTE_INTERVAL) {
//
// Cache number of available paths so that we can short-circuit multipath logic elsewhere
//
// We also take notice of duplicate paths (same IP only) because we may have
// recently received a direct path push from a peer and our list might contain
// a dead path which hasn't been fully recognized as such. In this case we
// don't want the duplicate to trigger execution of multipath code prematurely.
//
// This is done to support the behavior of auto multipath enable/disable
// without user intervention.
//
int currAlivePathCount = 0;
int duplicatePathsFound = 0;
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
currAlivePathCount++;
for (unsigned int j=0;j<ZT_MAX_PEER_NETWORK_PATHS;++j) {
if (_paths[i].p && _paths[j].p && _paths[i].p->address().ipsEqual2(_paths[j].p->address()) && i != j) {
duplicatePathsFound+=1;
break;
}
}
}
}
_uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2));
_lastMultipathCompatibilityCheck = now;
_localMultipathSupported = ((RR->node->getMultipathMode() != ZT_MULTIPATH_NONE) && (ZT_PROTO_VERSION > 9));
_remoteMultipathSupported = _vProto > 9;
// If both peers support multipath and more than one path exist, we can use multipath logic
_canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1);
}
}
void Peer::sendACK(void *tPtr,const SharedPtr<Path> &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_ACK);
uint32_t bytesToAck = path->bytesToAck();
outp.append<uint32_t>(bytesToAck);
if (atAddress) {
outp.armor(_key,false);
RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size());
} else {
RR->sw->send(tPtr,outp,false);
}
path->sentAck(now);
}
void Peer::sendQOS_MEASUREMENT(void *tPtr,const SharedPtr<Path> &path,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
const int64_t _now = RR->node->now();
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_QOS_MEASUREMENT);
char qosData[ZT_PATH_MAX_QOS_PACKET_SZ];
int16_t len = path->generateQoSPacket(_now,qosData);
outp.append(qosData,len);
if (atAddress) {
outp.armor(_key,false);
RR->node->putPacket(tPtr,localSocket,atAddress,outp.data(),outp.size());
} else {
RR->sw->send(tPtr,outp,false);
}
path->sentQoS(now);
}
void Peer::sendHELLO(void *tPtr,const int64_t localSocket,const InetAddress &atAddress,int64_t now)
{
Packet outp(_id.address(),RR->identity.address(),Packet::VERB_HELLO);
@ -727,33 +434,58 @@ void Peer::tryMemorizedPath(void *tPtr,int64_t now)
}
}
void Peer::performMultipathStateCheck(int64_t now)
{
/**
* Check for conditions required for multipath bonding and create a bond
* if allowed.
*/
_localMultipathSupported = ((RR->bc->inUse()) && (ZT_PROTO_VERSION > 9));
if (_localMultipathSupported) {
int currAlivePathCount = 0;
int duplicatePathsFound = 0;
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
currAlivePathCount++;
for (unsigned int j=0;j<ZT_MAX_PEER_NETWORK_PATHS;++j) {
if (_paths[i].p && _paths[j].p && _paths[i].p->address().ipsEqual2(_paths[j].p->address()) && i != j) {
duplicatePathsFound+=1;
break;
}
}
}
}
_uniqueAlivePathCount = (currAlivePathCount - (duplicatePathsFound / 2));
_remoteMultipathSupported = _vProto > 9;
_canUseMultipath = _localMultipathSupported && _remoteMultipathSupported && (_uniqueAlivePathCount > 1);
}
if (_canUseMultipath && !_bondToPeer) {
if (RR->bc) {
_bondToPeer = RR->bc->createTransportTriggeredBond(RR, this);
/**
* Allow new bond to retroactively learn all paths known to this peer
*/
if (_bondToPeer) {
for (unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (_paths[i].p) {
_bondToPeer->nominatePath(_paths[i].p, now);
}
}
}
}
}
}
unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
{
unsigned int sent = 0;
Mutex::Lock _l(_paths_m);
performMultipathStateCheck(now);
const bool sendFullHello = ((now - _lastSentFullHello) >= ZT_PEER_PING_PERIOD);
_lastSentFullHello = now;
processBackgroundPeerTasks(now);
// Emit traces regarding aggregate link status
if (_canUseMultipath) {
int alivePathCount = aggregateLinkPhysicalPathCount();
if ((now - _lastAggregateStatsReport) > ZT_PATH_AGGREGATE_STATS_REPORT_INTERVAL) {
_lastAggregateStatsReport = now;
if (alivePathCount) {
RR->t->peerLinkAggregateStatistics(NULL,*this);
}
} if (alivePathCount < 2 && _linkIsRedundant) {
_linkIsRedundant = !_linkIsRedundant;
RR->t->peerLinkNoLongerRedundant(NULL,*this);
} if (alivePathCount > 1 && !_linkIsRedundant) {
_linkIsRedundant = !_linkIsRedundant;
RR->t->peerLinkNowRedundant(NULL,*this);
}
}
// Right now we only keep pinging links that have the maximum priority. The
// priority is used to track cluster redirections, meaning that when a cluster
// redirects us its redirect target links override all other links and we
@ -770,7 +502,8 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
if (_paths[i].p) {
// Clean expired and reduced priority paths
if ( ((now - _paths[i].lr) < ZT_PEER_PATH_EXPIRATION) && (_paths[i].priority == maxPriority) ) {
if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now))) {
if ((sendFullHello)||(_paths[i].p->needsHeartbeat(now))
|| (_canUseMultipath && _paths[i].p->needsGratuitousHeartbeat(now))) {
attemptToContactAt(tPtr,_paths[i].p->localSocket(),_paths[i].p->address(),now,sendFullHello);
_paths[i].p->sent(now);
sent |= (_paths[i].p->address().ss_family == AF_INET) ? 0x1 : 0x2;
@ -781,14 +514,6 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
}
} else break;
}
if (canUseMultipath()) {
while(j < ZT_MAX_PEER_NETWORK_PATHS) {
_paths[j].lr = 0;
_paths[j].p.zero();
_paths[j].priority = 1;
++j;
}
}
return sent;
}
@ -855,4 +580,30 @@ void Peer::resetWithinScope(void *tPtr,InetAddress::IpScope scope,int inetAddres
}
}
void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now)
{
if (!_shouldCollectPathStatistics || !_bondToPeer) {
return;
}
_bondToPeer->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now);
}
void Peer::recordIncomingInvalidPacket(const SharedPtr<Path>& path)
{
if (!_shouldCollectPathStatistics || !_bondToPeer) {
return;
}
_bondToPeer->recordIncomingInvalidPacket(path);
}
void Peer::recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now)
{
if (!_shouldCollectPathStatistics || !_bondToPeer) {
return;
}
_bondToPeer->recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now);
}
} // namespace ZeroTier

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -31,6 +31,8 @@
#include "AtomicCounter.hpp"
#include "Hashtable.hpp"
#include "Mutex.hpp"
#include "Bond.hpp"
#include "BondController.hpp"
#define ZT_PEER_MAX_SERIALIZED_STATE_SIZE (sizeof(Peer) + 32 + (sizeof(Path) * 2))
@ -42,6 +44,9 @@ namespace ZeroTier {
class Peer
{
friend class SharedPtr<Peer>;
friend class SharedPtr<Bond>;
friend class Switch;
friend class Bond;
private:
Peer() {} // disabled to prevent bugs -- should not be constructed uninitialized
@ -95,7 +100,8 @@ public:
const uint64_t inRePacketId,
const Packet::Verb inReVerb,
const bool trustEstablished,
const uint64_t networkId);
const uint64_t networkId,
const int32_t flowId);
/**
* Check whether we have an active path to this peer via the given address
@ -135,73 +141,39 @@ public:
}
/**
* Record statistics on outgoing packets
*
* @param path Path over which packet was sent
* @param id Packet ID
* @param len Length of packet payload
* @param verb Packet verb
* @param now Current time
*/
void recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now);
/**
* Record statistics on incoming packets
*
* @param path Path over which packet was sent
* @param id Packet ID
* @param len Length of packet payload
* @param verb Packet verb
* @param now Current time
*/
void recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId, uint16_t payloadLength, const Packet::Verb verb, int64_t now);
/**
* Send an ACK to peer for the most recent packets received
* Record incoming packets to
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param localSocket Raw socket the ACK packet will be sent over
* @param atAddress Destination for the ACK packet
* @param path Path over which packet was received
* @param packetId Packet ID
* @param payloadLength Length of packet data payload
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void sendACK(void *tPtr, const SharedPtr<Path> &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now);
void recordIncomingPacket(void *tPtr, const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now);
/**
* Send a QoS packet to peer so that it can evaluate the quality of this link
*
* @param tPtr Thread pointer to be handed through to any callbacks called as a result of this call
* @param localSocket Raw socket the QoS packet will be sent over
* @param atAddress Destination for the QoS packet
* @param path Path over which packet is being sent
* @param packetId Packet ID
* @param payloadLength Length of packet data payload
* @param verb Packet verb
* @param flowId Flow ID
* @param now Current time
*/
void sendQOS_MEASUREMENT(void *tPtr, const SharedPtr<Path> &path, const int64_t localSocket,const InetAddress &atAddress,int64_t now);
void recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now);
/**
* Compute relative quality values and allocations for the components of the aggregate link
* Record an invalid incoming packet. This packet failed
* MAC/compression/cipher checks and will now contribute to a
* Packet Error Ratio (PER).
*
* @param now Current time
* @param path Path over which packet was received
*/
void computeAggregateProportionalAllocation(int64_t now);
/**
* @return The aggregate link Packet Delay Variance (PDV)
*/
int computeAggregateLinkPacketDelayVariance();
/**
* @return The aggregate link mean latency
*/
int computeAggregateLinkMeanLatency();
/**
* @return The number of currently alive "physical" paths in the aggregate link
*/
int aggregateLinkPhysicalPathCount();
/**
* @return The number of currently alive "logical" paths in the aggregate link
*/
int aggregateLinkLogicalPathCount();
void recordIncomingInvalidPacket(const SharedPtr<Path>& path);
/**
* Get the most appropriate direct path based on current multipath and QoS configuration
@ -210,13 +182,7 @@ public:
* @param includeExpired If true, include even expired paths
* @return Best current path or NULL if none
*/
SharedPtr<Path> getAppropriatePath(int64_t now, bool includeExpired);
/**
* Generate a human-readable string of interface names making up the aggregate link, also include
* moving allocation and IP version number for each (for tracing)
*/
char *interfaceListStr();
SharedPtr<Path> getAppropriatePath(int64_t now, bool includeExpired, int32_t flowId = -1);
/**
* Send VERB_RENDEZVOUS to this and another peer via the best common IP scope and path
@ -258,6 +224,13 @@ public:
*/
void tryMemorizedPath(void *tPtr,int64_t now);
/**
* A check to be performed periodically which determines whether multipath communication is
* possible with this peer. This check should be performed early in the life-cycle of the peer
* as well as during the process of learning new paths.
*/
void performMultipathStateCheck(int64_t now);
/**
* Send pings or keepalives depending on configured timeouts
*
@ -270,16 +243,6 @@ public:
*/
unsigned int doPingAndKeepalive(void *tPtr,int64_t now);
/**
* Clear paths whose localSocket(s) are in a CLOSED state or have an otherwise INVALID state.
* This should be called frequently so that we can detect and remove unproductive or invalid paths.
*
* Under the hood this is done periodically based on ZT_CLOSED_PATH_PRUNING_INTERVAL.
*
* @return Number of paths that were pruned this round
*/
unsigned int prunePaths();
/**
* Process a cluster redirect sent by this peer
*
@ -341,7 +304,7 @@ public:
inline unsigned int latency(const int64_t now)
{
if (_canUseMultipath) {
return (int)computeAggregateLinkMeanLatency();
return (int)_lastComputedAggregateMeanLatency;
} else {
SharedPtr<Path> bp(getAppropriatePath(now,false));
if (bp)
@ -400,37 +363,6 @@ public:
inline bool remoteVersionKnown() const { return ((_vMajor > 0)||(_vMinor > 0)||(_vRevision > 0)); }
/**
* Periodically update known multipath activation constraints. This is done so that we know when and when
* not to use multipath logic. Doing this once every few seconds is sufficient.
*
* @param now Current time
*/
inline void processBackgroundPeerTasks(const int64_t now);
/**
* Record that the remote peer does have multipath enabled. As is evident by the receipt of a VERB_ACK
* or a VERB_QOS_MEASUREMENT packet at some point in the past. Until this flag is set, the local client
* shall assume that multipath is not enabled and should only use classical Protocol 9 logic.
*/
inline void inferRemoteMultipathEnabled() { _remotePeerMultipathEnabled = true; }
/**
* @return Whether the local client supports and is configured to use multipath
*/
inline bool localMultipathSupport() { return _localMultipathSupported; }
/**
* @return Whether the remote peer supports and is configured to use multipath
*/
inline bool remoteMultipathSupport() { return _remoteMultipathSupported; }
/**
* @return Whether this client can use multipath to communicate with this peer. True if both peers are using
* the correct protocol and if both peers have multipath enabled. False if otherwise.
*/
inline bool canUseMultipath() { return _canUseMultipath; }
/**
* @return True if peer has received a trust established packet (e.g. common network membership) in the past ZT_TRUST_EXPIRATION ms
*/
@ -485,50 +417,35 @@ public:
}
/**
* Rate limit gate for inbound ECHO requests
* Rate limit gate for inbound ECHO requests. This rate limiter works
* by draining a certain number of requests per unit time. Each peer may
* theoretically receive up to ZT_ECHO_CUTOFF_LIMIT requests per second.
*/
inline bool rateGateEchoRequest(const int64_t now)
{
if ((now - _lastEchoRequestReceived) >= ZT_PEER_GENERAL_RATE_LIMIT) {
_lastEchoRequestReceived = now;
return true;
}
return false;
}
/**
* Rate limit gate for VERB_ACK
*/
inline bool rateGateACK(const int64_t now)
{
if ((now - _lastACKWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) {
_lastACKWindowReset = now;
_ACKCutoffCount = 0;
/*
// TODO: Rethink this
if (_canUseMultipath) {
_echoRequestCutoffCount++;
int numToDrain = (now - _lastEchoCheck) / ZT_ECHO_DRAINAGE_DIVISOR;
_lastEchoCheck = now;
fprintf(stderr, "ZT_ECHO_CUTOFF_LIMIT=%d, (now - _lastEchoCheck)=%d, numToDrain=%d, ZT_ECHO_DRAINAGE_DIVISOR=%d\n", ZT_ECHO_CUTOFF_LIMIT, (now - _lastEchoCheck), numToDrain, ZT_ECHO_DRAINAGE_DIVISOR);
if (_echoRequestCutoffCount > numToDrain) {
_echoRequestCutoffCount-=numToDrain;
}
else {
_echoRequestCutoffCount = 0;
}
return (_echoRequestCutoffCount < ZT_ECHO_CUTOFF_LIMIT);
} else {
++_ACKCutoffCount;
if ((now - _lastEchoRequestReceived) >= (ZT_PEER_GENERAL_RATE_LIMIT)) {
_lastEchoRequestReceived = now;
return true;
}
return false;
}
return (_ACKCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT);
}
/**
* Rate limit gate for VERB_QOS_MEASUREMENT
*/
inline bool rateGateQoS(const int64_t now)
{
if ((now - _lastQoSWindowReset) >= ZT_PATH_QOS_ACK_CUTOFF_TIME) {
_lastQoSWindowReset = now;
_QoSCutoffCount = 0;
} else {
++_QoSCutoffCount;
}
return (_QoSCutoffCount < ZT_PATH_QOS_ACK_CUTOFF_LIMIT);
}
/**
* @return Whether this peer is reachable via an aggregate link
*/
inline bool hasAggregateLink() {
return _localMultipathSupported && _remoteMultipathSupported && _remotePeerMultipathEnabled;
*/
return true;
}
/**
@ -603,6 +520,18 @@ public:
}
}
/**
*
* @return
*/
SharedPtr<Bond> bond() { return _bondToPeer; }
/**
*
* @return
*/
inline int8_t bondingPolicy() { return _bondingPolicy; }
private:
struct _PeerPath
{
@ -621,25 +550,16 @@ private:
int64_t _lastTriedMemorizedPath;
int64_t _lastDirectPathPushSent;
int64_t _lastDirectPathPushReceive;
int64_t _lastEchoRequestReceived;
int64_t _lastCredentialRequestSent;
int64_t _lastWhoisRequestReceived;
int64_t _lastEchoRequestReceived;
int64_t _lastCredentialsReceived;
int64_t _lastTrustEstablishedPacketReceived;
int64_t _lastSentFullHello;
int64_t _lastPathPrune;
int64_t _lastACKWindowReset;
int64_t _lastQoSWindowReset;
int64_t _lastMultipathCompatibilityCheck;
int64_t _lastEchoCheck;
unsigned char _freeRandomByte;
int _uniqueAlivePathCount;
bool _localMultipathSupported;
bool _remoteMultipathSupported;
bool _canUseMultipath;
uint16_t _vProto;
uint16_t _vMajor;
uint16_t _vMinor;
@ -652,21 +572,22 @@ private:
unsigned int _directPathPushCutoffCount;
unsigned int _credentialsCutoffCount;
unsigned int _QoSCutoffCount;
unsigned int _ACKCutoffCount;
unsigned int _echoRequestCutoffCount;
AtomicCounter __refCount;
RingBuffer<int,ZT_MULTIPATH_PROPORTION_WIN_SZ> _pathChoiceHist;
bool _linkIsBalanced;
bool _linkIsRedundant;
bool _remotePeerMultipathEnabled;
int _uniqueAlivePathCount;
bool _localMultipathSupported;
bool _remoteMultipathSupported;
bool _canUseMultipath;
int64_t _lastAggregateStatsReport;
int64_t _lastAggregateAllocation;
volatile bool _shouldCollectPathStatistics;
volatile int8_t _bondingPolicy;
char _interfaceListStr[256]; // 16 characters * 16 paths in a link
int32_t _lastComputedAggregateMeanLatency;
SharedPtr<Bond> _bondToPeer;
};
} // namespace ZeroTier

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -238,6 +238,21 @@ public:
return curr_cnt ? subtotal / (float)curr_cnt : 0;
}
/**
* @return The sum of the contents of the buffer
*/
inline float sum()
{
size_t iterator = begin;
float total = 0;
size_t curr_cnt = count();
for (size_t i=0; i<curr_cnt; i++) {
iterator = (iterator + S - 1) % curr_cnt;
total += (float)*(buf + iterator);
}
return total;
}
/**
* @return The sample standard deviation of element values
*/
@ -306,10 +321,10 @@ public:
for (size_t i=0; i<S; i++) {
iterator = (iterator + S - 1) % S;
if (typeid(T) == typeid(int)) {
//DEBUG_INFO("buf[%2zu]=%2d", iterator, (int)*(buf + iterator));
fprintf(stderr, "buf[%2zu]=%2d\n", iterator, (int)*(buf + iterator));
}
else {
//DEBUG_INFO("buf[%2zu]=%2f", iterator, (float)*(buf + iterator));
fprintf(stderr, "buf[%2zu]=%2f\n", iterator, (float)*(buf + iterator));
}
}
}

View file

@ -30,6 +30,7 @@ class Multicaster;
class NetworkController;
class SelfAwareness;
class Trace;
class BondController;
/**
* Holds global state for an instance of ZeroTier::Node
@ -75,6 +76,7 @@ public:
Multicaster *mc;
Topology *topology;
SelfAwareness *sa;
BondController *bc;
// This node's identity and string representations thereof
Identity identity;

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -42,8 +42,38 @@ Switch::Switch(const RuntimeEnvironment *renv) :
{
}
// Returns true if packet appears valid; pos and proto will be set
static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto)
{
if (frameLen < 40)
return false;
pos = 40;
proto = frameData[6];
while (pos <= frameLen) {
switch(proto) {
case 0: // hop-by-hop options
case 43: // routing
case 60: // destination options
case 135: // mobility options
if ((pos + 8) > frameLen)
return false; // invalid!
proto = frameData[pos];
pos += ((unsigned int)frameData[pos + 1] * 8) + 8;
break;
//case 44: // fragment -- we currently can't parse these and they are deprecated in IPv6 anyway
//case 50:
//case 51: // IPSec ESP and AH -- we have to stop here since this is encrypted stuff
default:
return true;
}
}
return false; // overflow == invalid
}
void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len)
{
int32_t flowId = ZT_QOS_NO_FLOW;
try {
const int64_t now = RR->node->now();
@ -112,6 +142,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
if (rq->packetId != fragmentPacketId) {
// No packet found, so we received a fragment without its head.
rq->flowId = flowId;
rq->timestamp = now;
rq->packetId = fragmentPacketId;
rq->frags[fragmentNumber - 1] = fragment;
@ -130,7 +161,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
for(unsigned int f=1;f<totalFragments;++f)
rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength());
if (rq->frag0.tryDecode(RR,tPtr)) {
if (rq->frag0.tryDecode(RR,tPtr,flowId)) {
rq->timestamp = 0; // packet decoded, free entry
} else {
rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something
@ -195,6 +226,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
if (rq->packetId != packetId) {
// If we have no other fragments yet, create an entry and save the head
rq->flowId = flowId;
rq->timestamp = now;
rq->packetId = packetId;
rq->frag0.init(data,len,path,now);
@ -211,7 +243,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
for(unsigned int f=1;f<rq->totalFragments;++f)
rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength());
if (rq->frag0.tryDecode(RR,tPtr)) {
if (rq->frag0.tryDecode(RR,tPtr,flowId)) {
rq->timestamp = 0; // packet decoded, free entry
} else {
rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something
@ -224,9 +256,10 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre
} else {
// Packet is unfragmented, so just process it
IncomingPacket packet(data,len,path,now);
if (!packet.tryDecode(RR,tPtr)) {
if (!packet.tryDecode(RR,tPtr,flowId)) {
RXQueueEntry *const rq = _nextRXQueueEntry();
Mutex::Lock rql(rq->lock);
rq->flowId = flowId;
rq->timestamp = now;
rq->packetId = packet.packetId();
rq->frag0 = packet;
@ -256,7 +289,75 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
}
}
uint8_t qosBucket = ZT_QOS_DEFAULT_BUCKET;
uint8_t qosBucket = ZT_AQM_DEFAULT_BUCKET;
/**
* A pseudo-unique identifier used by balancing and bonding policies to
* categorize individual flows/conversations for assignment to a specific
* physical path. This identifier consists of the source port and
* destination port of the encapsulated frame.
*
* A flowId of -1 will indicate that there is no preference for how this
* packet shall be sent. An example of this would be an ICMP packet.
*/
int32_t flowId = ZT_QOS_NO_FLOW;
if (etherType == ZT_ETHERTYPE_IPV4 && (len >= 20)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
uint8_t proto = (reinterpret_cast<const uint8_t *>(data)[9]);
const unsigned int headerLen = 4 * (reinterpret_cast<const uint8_t *>(data)[0] & 0xf);
switch(proto) {
case 0x01: // ICMP
//flowId = 0x01;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (len > (headerLen + 4)) {
unsigned int pos = headerLen + 0;
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
flowId = dstPort ^ srcPort ^ proto;
}
break;
}
}
if (etherType == ZT_ETHERTYPE_IPV6 && (len >= 40)) {
uint16_t srcPort = 0;
uint16_t dstPort = 0;
unsigned int pos;
unsigned int proto;
_ipv6GetPayload((const uint8_t *)data, len, pos, proto);
switch(proto) {
case 0x3A: // ICMPv6
//flowId = 0x3A;
break;
// All these start with 16-bit source and destination port in that order
case 0x06: // TCP
case 0x11: // UDP
case 0x84: // SCTP
case 0x88: // UDPLite
if (len > (pos + 4)) {
srcPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
srcPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
pos++;
dstPort = (reinterpret_cast<const uint8_t *>(data)[pos++]) << 8;
dstPort |= (reinterpret_cast<const uint8_t *>(data)[pos]);
flowId = dstPort ^ srcPort ^ proto;
}
break;
default:
break;
}
}
if (to.isMulticast()) {
MulticastGroup multicastGroup(to,0);
@ -267,7 +368,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
* otherwise a straightforward Ethernet switch emulation. Vanilla ARP
* is dumb old broadcast and simply doesn't scale. ZeroTier multicast
* groups have an additional field called ADI (additional distinguishing
* information) which was added specifically for ARP though it could
* information) which was added specifically for ARP though it could
* be used for other things too. We then take ARP broadcasts and turn
* them into multicasts by stuffing the IP address being queried into
* the 32-bit ADI field. In practice this uses our multicast pub/sub
@ -416,7 +517,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
outp.append(data,len);
if (!network->config().disableCompression())
outp.compress();
aqm_enqueue(tPtr,network,outp,true,qosBucket);
aqm_enqueue(tPtr,network,outp,true,qosBucket,flowId);
} else {
Packet outp(toZT,RR->identity.address(),Packet::VERB_FRAME);
outp.append(network->id());
@ -424,7 +525,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
outp.append(data,len);
if (!network->config().disableCompression())
outp.compress();
aqm_enqueue(tPtr,network,outp,true,qosBucket);
aqm_enqueue(tPtr,network,outp,true,qosBucket,flowId);
}
} else {
// Destination is bridged behind a remote peer
@ -480,7 +581,7 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
outp.append(data,len);
if (!network->config().disableCompression())
outp.compress();
aqm_enqueue(tPtr,network,outp,true,qosBucket);
aqm_enqueue(tPtr,network,outp,true,qosBucket,flowId);
} else {
RR->t->outgoingNetworkFrameDropped(tPtr,network,from,to,etherType,vlanId,len,"filter blocked (bridge replication)");
}
@ -488,28 +589,25 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr<Network> &network,const
}
}
void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket)
void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId)
{
if(!network->qosEnabled()) {
send(tPtr, packet, encrypt);
send(tPtr, packet, encrypt, flowId);
return;
}
NetworkQoSControlBlock *nqcb = _netQueueControlBlock[network->id()];
if (!nqcb) {
// DEBUG_INFO("creating network QoS control block (NQCB) for network %llx", network->id());
nqcb = new NetworkQoSControlBlock();
_netQueueControlBlock[network->id()] = nqcb;
// Initialize ZT_QOS_NUM_BUCKETS queues and place them in the INACTIVE list
// These queues will be shuffled between the new/old/inactive lists by the enqueue/dequeue algorithm
for (int i=0; i<ZT_QOS_NUM_BUCKETS; i++) {
for (int i=0; i<ZT_AQM_NUM_BUCKETS; i++) {
nqcb->inactiveQueues.push_back(new ManagedQueue(i));
}
}
// Don't apply QoS scheduling to ZT protocol traffic
if (packet.verb() != Packet::VERB_FRAME && packet.verb() != Packet::VERB_EXT_FRAME) {
// DEBUG_INFO("skipping, no QoS for this packet, verb=%x", packet.verb());
// just send packet normally, no QoS for ZT protocol traffic
send(tPtr, packet, encrypt);
send(tPtr, packet, encrypt, flowId);
}
_aqm_m.lock();
@ -517,10 +615,10 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
// Enqueue packet and move queue to appropriate list
const Address dest(packet.destination());
TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt);
TXQueueEntry *txEntry = new TXQueueEntry(dest,RR->node->now(),packet,encrypt,flowId);
ManagedQueue *selectedQueue = nullptr;
for (size_t i=0; i<ZT_QOS_NUM_BUCKETS; i++) {
for (size_t i=0; i<ZT_AQM_NUM_BUCKETS; i++) {
if (i < nqcb->oldQueues.size()) { // search old queues first (I think this is best since old would imply most recent usage of the queue)
if (nqcb->oldQueues[i]->id == qosBucket) {
selectedQueue = nqcb->oldQueues[i];
@ -533,7 +631,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
if (nqcb->inactiveQueues[i]->id == qosBucket) {
selectedQueue = nqcb->inactiveQueues[i];
// move queue to end of NEW queue list
selectedQueue->byteCredit = ZT_QOS_QUANTUM;
selectedQueue->byteCredit = ZT_AQM_QUANTUM;
// DEBUG_INFO("moving q=%p from INACTIVE to NEW list", selectedQueue);
nqcb->newQueues.push_back(selectedQueue);
nqcb->inactiveQueues.erase(nqcb->inactiveQueues.begin() + i);
@ -552,11 +650,11 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
// Drop a packet if necessary
ManagedQueue *selectedQueueToDropFrom = nullptr;
if (nqcb->_currEnqueuedPackets > ZT_QOS_MAX_ENQUEUED_PACKETS)
if (nqcb->_currEnqueuedPackets > ZT_AQM_MAX_ENQUEUED_PACKETS)
{
// DEBUG_INFO("too many enqueued packets (%d), finding packet to drop", nqcb->_currEnqueuedPackets);
int maxQueueLength = 0;
for (size_t i=0; i<ZT_QOS_NUM_BUCKETS; i++) {
for (size_t i=0; i<ZT_AQM_NUM_BUCKETS; i++) {
if (i < nqcb->oldQueues.size()) {
if (nqcb->oldQueues[i]->byteLength > maxQueueLength) {
maxQueueLength = nqcb->oldQueues[i]->byteLength;
@ -589,7 +687,7 @@ void Switch::aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &
uint64_t Switch::control_law(uint64_t t, int count)
{
return (uint64_t)(t + ZT_QOS_INTERVAL / sqrt(count));
return (uint64_t)(t + ZT_AQM_INTERVAL / sqrt(count));
}
Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now)
@ -603,14 +701,14 @@ Switch::dqr Switch::dodequeue(ManagedQueue *q, uint64_t now)
return r;
}
uint64_t sojourn_time = now - r.p->creationTime;
if (sojourn_time < ZT_QOS_TARGET || q->byteLength <= ZT_DEFAULT_MTU) {
if (sojourn_time < ZT_AQM_TARGET || q->byteLength <= ZT_DEFAULT_MTU) {
// went below - stay below for at least interval
q->first_above_time = 0;
} else {
if (q->first_above_time == 0) {
// just went above from below. if still above at
// first_above_time, will say it's ok to drop.
q->first_above_time = now + ZT_QOS_INTERVAL;
q->first_above_time = now + ZT_AQM_INTERVAL;
} else if (now >= q->first_above_time) {
r.ok_to_drop = true;
}
@ -642,7 +740,7 @@ Switch::TXQueueEntry * Switch::CoDelDequeue(ManagedQueue *q, bool isNew, uint64_
q->q.pop_front(); // drop
r = dodequeue(q, now);
q->dropping = true;
q->count = (q->count > 2 && now - q->drop_next < 8*ZT_QOS_INTERVAL)?
q->count = (q->count > 2 && now - q->drop_next < 8*ZT_AQM_INTERVAL)?
q->count - 2 : 1;
q->drop_next = control_law(now, q->count);
}
@ -670,7 +768,7 @@ void Switch::aqm_dequeue(void *tPtr)
while (currQueues->size()) {
ManagedQueue *queueAtFrontOfList = currQueues->front();
if (queueAtFrontOfList->byteCredit < 0) {
queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM;
queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM;
// Move to list of OLD queues
// DEBUG_INFO("moving q=%p from NEW to OLD list", queueAtFrontOfList);
oldQueues->push_back(queueAtFrontOfList);
@ -689,7 +787,7 @@ void Switch::aqm_dequeue(void *tPtr)
queueAtFrontOfList->byteCredit -= len;
// Send the packet!
queueAtFrontOfList->q.pop_front();
send(tPtr, entryToEmit->packet, entryToEmit->encrypt);
send(tPtr, entryToEmit->packet, entryToEmit->encrypt, entryToEmit->flowId);
(*nqcb).second->_currEnqueuedPackets--;
}
if (queueAtFrontOfList) {
@ -705,7 +803,7 @@ void Switch::aqm_dequeue(void *tPtr)
while (currQueues->size()) {
ManagedQueue *queueAtFrontOfList = currQueues->front();
if (queueAtFrontOfList->byteCredit < 0) {
queueAtFrontOfList->byteCredit += ZT_QOS_QUANTUM;
queueAtFrontOfList->byteCredit += ZT_AQM_QUANTUM;
oldQueues->push_back(queueAtFrontOfList);
currQueues->erase(currQueues->begin());
} else {
@ -721,7 +819,7 @@ void Switch::aqm_dequeue(void *tPtr)
queueAtFrontOfList->byteLength -= len;
queueAtFrontOfList->byteCredit -= len;
queueAtFrontOfList->q.pop_front();
send(tPtr, entryToEmit->packet, entryToEmit->encrypt);
send(tPtr, entryToEmit->packet, entryToEmit->encrypt, entryToEmit->flowId);
(*nqcb).second->_currEnqueuedPackets--;
}
if (queueAtFrontOfList) {
@ -745,18 +843,18 @@ void Switch::removeNetworkQoSControlBlock(uint64_t nwid)
}
}
void Switch::send(void *tPtr,Packet &packet,bool encrypt)
void Switch::send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId)
{
const Address dest(packet.destination());
if (dest == RR->identity.address())
return;
if (!_trySend(tPtr,packet,encrypt)) {
if (!_trySend(tPtr,packet,encrypt,flowId)) {
{
Mutex::Lock _l(_txQueue_m);
if (_txQueue.size() >= ZT_TX_QUEUE_SIZE) {
_txQueue.pop_front();
}
_txQueue.push_back(TXQueueEntry(dest,RR->node->now(),packet,encrypt));
_txQueue.push_back(TXQueueEntry(dest,RR->node->now(),packet,encrypt,flowId));
}
if (!RR->topology->getPeer(tPtr,dest))
requestWhois(tPtr,RR->node->now(),dest);
@ -778,10 +876,11 @@ void Switch::requestWhois(void *tPtr,const int64_t now,const Address &addr)
const SharedPtr<Peer> upstream(RR->topology->getUpstreamPeer());
if (upstream) {
int32_t flowId = ZT_QOS_NO_FLOW;
Packet outp(upstream->address(),RR->identity.address(),Packet::VERB_WHOIS);
addr.appendTo(outp);
RR->node->expectReplyTo(outp.packetId());
send(tPtr,outp,true);
send(tPtr,outp,true,flowId);
}
}
@ -797,7 +896,7 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr<Peer> &peer)
RXQueueEntry *const rq = &(_rxQueue[ptr]);
Mutex::Lock rql(rq->lock);
if ((rq->timestamp)&&(rq->complete)) {
if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT))
if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT))
rq->timestamp = 0;
}
}
@ -806,7 +905,7 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr<Peer> &peer)
Mutex::Lock _l(_txQueue_m);
for(std::list< TXQueueEntry >::iterator txi(_txQueue.begin());txi!=_txQueue.end();) {
if (txi->dest == peer->address()) {
if (_trySend(tPtr,txi->packet,txi->encrypt)) {
if (_trySend(tPtr,txi->packet,txi->encrypt,txi->flowId)) {
_txQueue.erase(txi++);
} else {
++txi;
@ -830,7 +929,7 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now)
Mutex::Lock _l(_txQueue_m);
for(std::list< TXQueueEntry >::iterator txi(_txQueue.begin());txi!=_txQueue.end();) {
if (_trySend(tPtr,txi->packet,txi->encrypt)) {
if (_trySend(tPtr,txi->packet,txi->encrypt,txi->flowId)) {
_txQueue.erase(txi++);
} else if ((now - txi->creationTime) > ZT_TRANSMIT_QUEUE_TIMEOUT) {
_txQueue.erase(txi++);
@ -848,7 +947,7 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now)
RXQueueEntry *const rq = &(_rxQueue[ptr]);
Mutex::Lock rql(rq->lock);
if ((rq->timestamp)&&(rq->complete)) {
if ((rq->frag0.tryDecode(RR,tPtr))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) {
if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) {
rq->timestamp = 0;
} else {
const Address src(rq->frag0.source());
@ -894,7 +993,7 @@ bool Switch::_shouldUnite(const int64_t now,const Address &source,const Address
return false;
}
bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt)
bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId)
{
SharedPtr<Path> viaPath;
const int64_t now = RR->node->now();
@ -902,19 +1001,40 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt)
const SharedPtr<Peer> peer(RR->topology->getPeer(tPtr,destination));
if (peer) {
viaPath = peer->getAppropriatePath(now,false);
if (!viaPath) {
peer->tryMemorizedPath(tPtr,now); // periodically attempt memorized or statically defined paths, if any are known
if ((peer->bondingPolicy() == ZT_BONDING_POLICY_BROADCAST)
&& (packet.verb() == Packet::VERB_FRAME || packet.verb() == Packet::VERB_EXT_FRAME)) {
const SharedPtr<Peer> relay(RR->topology->getUpstreamPeer());
if ( (!relay) || (!(viaPath = relay->getAppropriatePath(now,false))) ) {
if (!(viaPath = peer->getAppropriatePath(now,true)))
return false;
Mutex::Lock _l(peer->_paths_m);
for(int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
if (peer->_paths[i].p && peer->_paths[i].p->alive(now)) {
char pathStr[128];
peer->_paths[i].p->address().toString(pathStr);
_sendViaSpecificPath(tPtr,peer,peer->_paths[i].p,now,packet,encrypt,flowId);
}
}
return true;
}
else {
viaPath = peer->getAppropriatePath(now,false,flowId);
if (!viaPath) {
peer->tryMemorizedPath(tPtr,now); // periodically attempt memorized or statically defined paths, if any are known
const SharedPtr<Peer> relay(RR->topology->getUpstreamPeer());
if ( (!relay) || (!(viaPath = relay->getAppropriatePath(now,false,flowId))) ) {
if (!(viaPath = peer->getAppropriatePath(now,true,flowId)))
return false;
}
}
if (viaPath) {
_sendViaSpecificPath(tPtr,peer,viaPath,now,packet,encrypt,flowId);
return true;
}
}
} else {
return false;
}
return false;
}
void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr<Peer> peer,SharedPtr<Path> viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId)
{
unsigned int mtu = ZT_DEFAULT_PHYSMTU;
uint64_t trustedPathId = 0;
RR->topology->getOutboundPathInfo(viaPath->address(),mtu,trustedPathId);
@ -922,7 +1042,7 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt)
unsigned int chunkSize = std::min(packet.size(),mtu);
packet.setFragmented(chunkSize < packet.size());
peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), now);
peer->recordOutgoingPacket(viaPath, packet.packetId(), packet.payloadLength(), packet.verb(), flowId, now);
if (trustedPathId) {
packet.setTrusted(trustedPathId);
@ -949,8 +1069,6 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt)
}
}
}
return true;
}
} // namespace ZeroTier

View file

@ -1,10 +1,10 @@
/*
* Copyright (c)2019 ZeroTier, Inc.
* Copyright (c)2013-2020 ZeroTier, Inc.
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file in the project's root directory.
*
* Change Date: 2023-01-01
* Change Date: 2024-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2.0 of the Apache License.
@ -59,6 +59,8 @@ class Switch
struct ManagedQueue;
struct TXQueueEntry;
friend class SharedPtr<Peer>;
typedef struct {
TXQueueEntry *p;
bool ok_to_drop;
@ -78,6 +80,11 @@ public:
*/
void onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddress &fromAddr,const void *data,unsigned int len);
/**
* Returns whether our bonding or balancing policy is aware of flows.
*/
bool isFlowAware();
/**
* Called when a packet comes from a local Ethernet tap
*
@ -118,7 +125,7 @@ public:
* @param encrypt Encrypt packet payload? (always true except for HELLO)
* @param qosBucket Which bucket the rule-system determined this packet should fall into
*/
void aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket);
void aqm_enqueue(void *tPtr, const SharedPtr<Network> &network, Packet &packet,bool encrypt,int qosBucket,int32_t flowId = ZT_QOS_NO_FLOW);
/**
* Performs a single AQM cycle and dequeues and transmits all eligible packets on all networks
@ -164,7 +171,7 @@ public:
* @param packet Packet to send (buffer may be modified)
* @param encrypt Encrypt packet payload? (always true except for HELLO)
*/
void send(void *tPtr,Packet &packet,bool encrypt);
void send(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW);
/**
* Request WHOIS on a given address
@ -199,7 +206,8 @@ public:
private:
bool _shouldUnite(const int64_t now,const Address &source,const Address &destination);
bool _trySend(void *tPtr,Packet &packet,bool encrypt); // packet is modified if return is true
bool _trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId = ZT_QOS_NO_FLOW); // packet is modified if return is true
void _sendViaSpecificPath(void *tPtr,SharedPtr<Peer> peer,SharedPtr<Path> viaPath,int64_t now,Packet &packet,bool encrypt,int32_t flowId);
const RuntimeEnvironment *const RR;
int64_t _lastBeaconResponse;
@ -220,6 +228,7 @@ private:
unsigned int totalFragments; // 0 if only frag0 received, waiting for frags
uint32_t haveFragments; // bit mask, LSB to MSB
volatile bool complete; // if true, packet is complete
volatile int32_t flowId;
Mutex lock;
};
RXQueueEntry _rxQueue[ZT_RX_QUEUE_SIZE];
@ -248,16 +257,18 @@ private:
struct TXQueueEntry
{
TXQueueEntry() {}
TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc) :
TXQueueEntry(Address d,uint64_t ct,const Packet &p,bool enc,int32_t fid) :
dest(d),
creationTime(ct),
packet(p),
encrypt(enc) {}
encrypt(enc),
flowId(fid) {}
Address dest;
uint64_t creationTime;
Packet packet; // unencrypted/unMAC'd packet -- this is done at send time
bool encrypt;
int32_t flowId;
};
std::list< TXQueueEntry > _txQueue;
Mutex _txQueue_m;
@ -289,7 +300,7 @@ private:
{
ManagedQueue(int id) :
id(id),
byteCredit(ZT_QOS_QUANTUM),
byteCredit(ZT_AQM_QUANTUM),
byteLength(0),
dropping(false)
{}

View file

@ -96,22 +96,24 @@ void Trace::peerConfirmingUnknownPath(void *const tPtr,const uint64_t networkId,
void Trace::peerLinkNowRedundant(void *const tPtr,Peer &peer)
{
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt());
//ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is fully redundant",peer.address().toInt());
}
void Trace::peerLinkNoLongerRedundant(void *const tPtr,Peer &peer)
{
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt());
//ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is no longer redundant",peer.address().toInt());
}
void Trace::peerLinkAggregateStatistics(void *const tPtr,Peer &peer)
{
/*
ZT_LOCAL_TRACE(tPtr,RR,"link to peer %.10llx is composed of (%d) physical paths %s, has packet delay variance (%.0f ms), mean latency (%.0f ms)",
peer.address().toInt(),
peer.aggregateLinkPhysicalPathCount(),
peer.interfaceListStr(),
peer.computeAggregateLinkPacketDelayVariance(),
peer.computeAggregateLinkMeanLatency());
*/
}
void Trace::peerLearnedNewPath(void *const tPtr,const uint64_t networkId,Peer &peer,const SharedPtr<Path> &newPath,const uint64_t packetId)

View file

@ -214,12 +214,12 @@ public:
return l;
}
static inline float normalize(float value, int64_t bigMin, int64_t bigMax, int32_t targetMin, int32_t targetMax)
static inline float normalize(float value, float bigMin, float bigMax, float targetMin, float targetMax)
{
int64_t bigSpan = bigMax - bigMin;
int64_t smallSpan = targetMax - targetMin;
float valueScaled = (value - (float)bigMin) / (float)bigSpan;
return (float)targetMin + valueScaled * (float)smallSpan;
float bigSpan = bigMax - bigMin;
float smallSpan = targetMax - targetMin;
float valueScaled = (value - bigMin) / bigSpan;
return targetMin + valueScaled * smallSpan;
}
/**
@ -253,6 +253,7 @@ public:
static inline int strToInt(const char *s) { return (int)strtol(s,(char **)0,10); }
static inline unsigned long strToULong(const char *s) { return strtoul(s,(char **)0,10); }
static inline long strToLong(const char *s) { return strtol(s,(char **)0,10); }
static inline double strToDouble(const char *s) { return strtod(s,NULL); }
static inline unsigned long long strToU64(const char *s)
{
#ifdef __WINDOWS__