Refactor incoming packet (rxQueue/fragmentQueue) to eliminate variable length queues and merge queues. This is both faster and saves memory.

This commit is contained in:
Adam Ierymenko 2016-03-18 14:16:07 -07:00
parent 9f31cbd8b8
commit d6a1868d0a
6 changed files with 317 additions and 258 deletions

View file

@ -60,7 +60,6 @@ Switch::Switch(const RuntimeEnvironment *renv) :
RR(renv),
_lastBeaconResponse(0),
_outstandingWhoisRequests(32),
_defragQueue(32),
_lastUniteAttempt(8) // only really used on root servers and upstreams, and it'll grow there just fine
{
}
@ -72,11 +71,14 @@ Switch::~Switch()
void Switch::onRemotePacket(const InetAddress &localAddr,const InetAddress &fromAddr,const void *data,unsigned int len)
{
try {
const uint64_t now = RR->node->now();
if (len == 13) {
/* LEGACY: before VERB_PUSH_DIRECT_PATHS, peers used broadcast
* announcements on the LAN to solve the 'same network problem.' We
* no longer send these, but we'll listen for them for a while to
* locate peers with versions <1.0.4. */
Address beaconAddr(reinterpret_cast<const char *>(data) + 8,5);
if (beaconAddr == RR->identity.address())
return;
@ -84,7 +86,6 @@ void Switch::onRemotePacket(const InetAddress &localAddr,const InetAddress &from
return;
SharedPtr<Peer> peer(RR->topology->getPeer(beaconAddr));
if (peer) { // we'll only respond to beacons from known peers
const uint64_t now = RR->node->now();
if ((now - _lastBeaconResponse) >= 2500) { // limit rate of responses
_lastBeaconResponse = now;
Packet outp(peer->address(),RR->identity.address(),Packet::VERB_NOP);
@ -92,11 +93,209 @@ void Switch::onRemotePacket(const InetAddress &localAddr,const InetAddress &from
RR->node->putPacket(localAddr,fromAddr,outp.data(),outp.size());
}
}
} else if (len > ZT_PROTO_MIN_FRAGMENT_LENGTH) {
if (((const unsigned char *)data)[ZT_PACKET_FRAGMENT_IDX_FRAGMENT_INDICATOR] == ZT_PACKET_FRAGMENT_INDICATOR) {
_handleRemotePacketFragment(localAddr,fromAddr,data,len);
} else if (len >= ZT_PROTO_MIN_PACKET_LENGTH) {
_handleRemotePacketHead(localAddr,fromAddr,data,len);
} else if (len > ZT_PROTO_MIN_FRAGMENT_LENGTH) { // min length check is important!
if (reinterpret_cast<const uint8_t *>(data)[ZT_PACKET_FRAGMENT_IDX_FRAGMENT_INDICATOR] == ZT_PACKET_FRAGMENT_INDICATOR) {
// Handle fragment ----------------------------------------------------
Packet::Fragment fragment(data,len);
Address destination(fragment.destination());
if (destination != RR->identity.address()) {
// Fragment is not for us, so try to relay it
if (fragment.hops() < ZT_RELAY_MAX_HOPS) {
fragment.incrementHops();
// Note: we don't bother initiating NAT-t for fragments, since heads will set that off.
// It wouldn't hurt anything, just redundant and unnecessary.
SharedPtr<Peer> relayTo = RR->topology->getPeer(destination);
if ((!relayTo)||(!relayTo->send(fragment.data(),fragment.size(),now))) {
#ifdef ZT_ENABLE_CLUSTER
if (RR->cluster) {
RR->cluster->sendViaCluster(Address(),destination,fragment.data(),fragment.size(),false);
return;
}
#endif
// Don't know peer or no direct path -- so relay via root server
relayTo = RR->topology->getBestRoot();
if (relayTo)
relayTo->send(fragment.data(),fragment.size(),now);
}
} else {
TRACE("dropped relay [fragment](%s) -> %s, max hops exceeded",fromAddr.toString().c_str(),destination.toString().c_str());
}
} else {
// Fragment looks like ours
const uint64_t fragmentPacketId = fragment.packetId();
const unsigned int fragmentNumber = fragment.fragmentNumber();
const unsigned int totalFragments = fragment.totalFragments();
if ((totalFragments <= ZT_MAX_PACKET_FRAGMENTS)&&(fragmentNumber < ZT_MAX_PACKET_FRAGMENTS)&&(fragmentNumber > 0)&&(totalFragments > 1)) {
// Fragment appears basically sane. Its fragment number must be
// 1 or more, since a Packet with fragmented bit set is fragment 0.
// Total fragments must be more than 1, otherwise why are we
// seeing a Packet::Fragment?
Mutex::Lock _l(_rxQueue_m);
RXQueueEntry *const rq = _findRXQueueEntry(fragmentPacketId);
if ((!rq->timestamp)||(rq->packetId != fragmentPacketId)) {
// No packet found, so we received a fragment without its head.
//TRACE("fragment (%u/%u) of %.16llx from %s",fragmentNumber + 1,totalFragments,fragmentPacketId,fromAddr.toString().c_str());
rq->timestamp = now;
rq->packetId = fragmentPacketId;
rq->frags[fragmentNumber - 1] = fragment;
rq->totalFragments = totalFragments; // total fragment count is known
rq->haveFragments = 1 << fragmentNumber; // we have only this fragment
rq->complete = false;
} else if (!(rq->haveFragments & (1 << fragmentNumber))) {
// We have other fragments and maybe the head, so add this one and check
//TRACE("fragment (%u/%u) of %.16llx from %s",fragmentNumber + 1,totalFragments,fragmentPacketId,fromAddr.toString().c_str());
rq->frags[fragmentNumber - 1] = fragment;
rq->totalFragments = totalFragments;
if (Utils::countBits(rq->haveFragments |= (1 << fragmentNumber)) == totalFragments) {
// We have all fragments -- assemble and process full Packet
//TRACE("packet %.16llx is complete, assembling and processing...",fragmentPacketId);
for(unsigned int f=1;f<totalFragments;++f)
rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength());
if (rq->frag0.tryDecode(RR,false)) {
rq->timestamp = 0; // packet decoded, free entry
} else {
rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something
}
}
} // else this is a duplicate fragment, ignore
}
}
// --------------------------------------------------------------------
} else if (len >= ZT_PROTO_MIN_PACKET_LENGTH) { // min length check is important!
// Handle packet head -------------------------------------------------
// See packet format in Packet.hpp to understand this
const uint64_t packetId = (
(((uint64_t)reinterpret_cast<const uint8_t *>(data)[0]) << 56) |
(((uint64_t)reinterpret_cast<const uint8_t *>(data)[1]) << 48) |
(((uint64_t)reinterpret_cast<const uint8_t *>(data)[2]) << 40) |
(((uint64_t)reinterpret_cast<const uint8_t *>(data)[3]) << 32) |
(((uint64_t)reinterpret_cast<const uint8_t *>(data)[4]) << 24) |
(((uint64_t)reinterpret_cast<const uint8_t *>(data)[5]) << 16) |
(((uint64_t)reinterpret_cast<const uint8_t *>(data)[6]) << 8) |
((uint64_t)reinterpret_cast<const uint8_t *>(data)[7])
);
const Address destination(reinterpret_cast<const uint8_t *>(data) + 8,ZT_ADDRESS_LENGTH);
const Address source(reinterpret_cast<const uint8_t *>(data) + 13,ZT_ADDRESS_LENGTH);
// Catch this and toss it -- it would never work, but it could happen if we somehow
// mistakenly guessed an address we're bound to as a destination for another peer.
if (source == RR->identity.address())
return;
//TRACE("<< %.16llx %s -> %s (size: %u)",(unsigned long long)packet->packetId(),source.toString().c_str(),destination.toString().c_str(),packet->size());
if (destination != RR->identity.address()) {
Packet packet(data,len);
// Packet is not for us, so try to relay it
if (packet.hops() < ZT_RELAY_MAX_HOPS) {
packet.incrementHops();
SharedPtr<Peer> relayTo = RR->topology->getPeer(destination);
if ((relayTo)&&((relayTo->send(packet.data(),packet.size(),now)))) {
Mutex::Lock _l(_lastUniteAttempt_m);
uint64_t &luts = _lastUniteAttempt[_LastUniteKey(source,destination)];
if ((now - luts) >= ZT_MIN_UNITE_INTERVAL) {
luts = now;
unite(source,destination);
}
} else {
#ifdef ZT_ENABLE_CLUSTER
if (RR->cluster) {
bool shouldUnite;
{
Mutex::Lock _l(_lastUniteAttempt_m);
uint64_t &luts = _lastUniteAttempt[_LastUniteKey(source,destination)];
shouldUnite = ((now - luts) >= ZT_MIN_UNITE_INTERVAL);
if (shouldUnite)
luts = now;
}
RR->cluster->sendViaCluster(source,destination,packet.data(),packet.size(),shouldUnite);
return;
}
#endif
relayTo = RR->topology->getBestRoot(&source,1,true);
if (relayTo)
relayTo->send(packet.data(),packet.size(),now);
}
} else {
TRACE("dropped relay %s(%s) -> %s, max hops exceeded",packet.source().toString().c_str(),fromAddr.toString().c_str(),destination.toString().c_str());
}
} else if ((reinterpret_cast<const uint8_t *>(data)[ZT_PACKET_IDX_FLAGS] & ZT_PROTO_FLAG_FRAGMENTED) != 0) {
// Packet is the head of a fragmented packet series
Mutex::Lock _l(_rxQueue_m);
RXQueueEntry *const rq = _findRXQueueEntry(packetId);
if ((!rq->timestamp)||(rq->packetId != packetId)) {
// If we have no other fragments yet, create an entry and save the head
//TRACE("fragment (0/?) of %.16llx from %s",pid,fromAddr.toString().c_str());
rq->timestamp = now;
rq->packetId = packetId;
rq->frag0.init(data,len,localAddr,fromAddr,now);
rq->totalFragments = 0;
rq->haveFragments = 1;
rq->complete = false;
} else if (!(rq->haveFragments & 1)) {
// If we have other fragments but no head, see if we are complete with the head
if ((rq->totalFragments > 1)&&(Utils::countBits(rq->haveFragments |= 1) == rq->totalFragments)) {
// We have all fragments -- assemble and process full Packet
//TRACE("packet %.16llx is complete, assembling and processing...",pid);
rq->frag0.init(data,len,localAddr,fromAddr,now);
for(unsigned int f=1;f<rq->totalFragments;++f)
rq->frag0.append(rq->frags[f - 1].payload(),rq->frags[f - 1].payloadLength());
if (rq->frag0.tryDecode(RR,false)) {
rq->timestamp = 0; // packet decoded, free entry
} else {
rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something
}
} else {
// Still waiting on more fragments, but keep the head
rq->frag0.init(data,len,localAddr,fromAddr,now);
}
} // else this is a duplicate head, ignore
} else {
// Packet is unfragmented, so just process it
IncomingPacket packet(data,len,localAddr,fromAddr,now);
if (!packet.tryDecode(RR,false)) {
Mutex::Lock _l(_rxQueue_m);
RXQueueEntry *rq = &(_rxQueue[ZT_RX_QUEUE_SIZE - 1]);
unsigned long i = ZT_RX_QUEUE_SIZE - 1;
while ((i)&&(rq->timestamp)) {
RXQueueEntry *tmp = &(_rxQueue[--i]);
if (tmp->timestamp < rq->timestamp)
rq = tmp;
}
rq->timestamp = now;
rq->packetId = packetId;
rq->frag0 = packet;
rq->totalFragments = 1;
rq->haveFragments = 1;
rq->complete = true;
}
}
// --------------------------------------------------------------------
}
}
} catch (std::exception &ex) {
@ -451,10 +650,13 @@ void Switch::doAnythingWaitingForPeer(const SharedPtr<Peer> &peer)
{ // finish processing any packets waiting on peer's public key / identity
Mutex::Lock _l(_rxQueue_m);
for(std::list< SharedPtr<IncomingPacket> >::iterator rxi(_rxQueue.begin());rxi!=_rxQueue.end();) {
if ((*rxi)->tryDecode(RR,false))
_rxQueue.erase(rxi++);
else ++rxi;
unsigned long i = ZT_RX_QUEUE_SIZE;
while (i) {
RXQueueEntry *rq = &(_rxQueue[--i]);
if ((rq->timestamp)&&(rq->complete)) {
if (rq->frag0.tryDecode(RR,false))
rq->timestamp = 0;
}
}
}
@ -546,29 +748,6 @@ unsigned long Switch::doTimerTasks(uint64_t now)
}
}
{ // Time out RX queue packets that never got WHOIS lookups or other info.
Mutex::Lock _l(_rxQueue_m);
for(std::list< SharedPtr<IncomingPacket> >::iterator i(_rxQueue.begin());i!=_rxQueue.end();) {
if ((now - (*i)->receiveTime()) > ZT_RECEIVE_QUEUE_TIMEOUT) {
TRACE("RX %s -> %s timed out",(*i)->source().toString().c_str(),(*i)->destination().toString().c_str());
_rxQueue.erase(i++);
} else ++i;
}
}
{ // Time out packets that didn't get all their fragments.
Mutex::Lock _l(_defragQueue_m);
Hashtable< uint64_t,DefragQueueEntry >::Iterator i(_defragQueue);
uint64_t *packetId = (uint64_t *)0;
DefragQueueEntry *qe = (DefragQueueEntry *)0;
while (i.next(packetId,qe)) {
if ((now - qe->creationTime) > ZT_FRAGMENTED_PACKET_RECEIVE_TIMEOUT) {
TRACE("incomplete fragmented packet %.16llx timed out, fragments discarded",*packetId);
_defragQueue.erase(*packetId);
}
}
}
{ // Remove really old last unite attempt entries to keep table size controlled
Mutex::Lock _l(_lastUniteAttempt_m);
Hashtable< _LastUniteKey,uint64_t >::Iterator i(_lastUniteAttempt);
@ -583,180 +762,6 @@ unsigned long Switch::doTimerTasks(uint64_t now)
return nextDelay;
}
void Switch::_handleRemotePacketFragment(const InetAddress &localAddr,const InetAddress &fromAddr,const void *data,unsigned int len)
{
Packet::Fragment fragment(data,len);
Address destination(fragment.destination());
if (destination != RR->identity.address()) {
// Fragment is not for us, so try to relay it
if (fragment.hops() < ZT_RELAY_MAX_HOPS) {
fragment.incrementHops();
// Note: we don't bother initiating NAT-t for fragments, since heads will set that off.
// It wouldn't hurt anything, just redundant and unnecessary.
SharedPtr<Peer> relayTo = RR->topology->getPeer(destination);
if ((!relayTo)||(!relayTo->send(fragment.data(),fragment.size(),RR->node->now()))) {
#ifdef ZT_ENABLE_CLUSTER
if (RR->cluster) {
RR->cluster->sendViaCluster(Address(),destination,fragment.data(),fragment.size(),false);
return;
}
#endif
// Don't know peer or no direct path -- so relay via root server
relayTo = RR->topology->getBestRoot();
if (relayTo)
relayTo->send(fragment.data(),fragment.size(),RR->node->now());
}
} else {
TRACE("dropped relay [fragment](%s) -> %s, max hops exceeded",fromAddr.toString().c_str(),destination.toString().c_str());
}
} else {
// Fragment looks like ours
uint64_t pid = fragment.packetId();
unsigned int fno = fragment.fragmentNumber();
unsigned int tf = fragment.totalFragments();
if ((tf <= ZT_MAX_PACKET_FRAGMENTS)&&(fno < ZT_MAX_PACKET_FRAGMENTS)&&(fno > 0)&&(tf > 1)) {
// Fragment appears basically sane. Its fragment number must be
// 1 or more, since a Packet with fragmented bit set is fragment 0.
// Total fragments must be more than 1, otherwise why are we
// seeing a Packet::Fragment?
Mutex::Lock _l(_defragQueue_m);
DefragQueueEntry &dq = _defragQueue[pid];
if (!dq.creationTime) {
// We received a Packet::Fragment without its head, so queue it and wait
dq.creationTime = RR->node->now();
dq.frags[fno - 1] = fragment;
dq.totalFragments = tf; // total fragment count is known
dq.haveFragments = 1 << fno; // we have only this fragment
//TRACE("fragment (%u/%u) of %.16llx from %s",fno + 1,tf,pid,fromAddr.toString().c_str());
} else if (!(dq.haveFragments & (1 << fno))) {
// We have other fragments and maybe the head, so add this one and check
dq.frags[fno - 1] = fragment;
dq.totalFragments = tf;
//TRACE("fragment (%u/%u) of %.16llx from %s",fno + 1,tf,pid,fromAddr.toString().c_str());
if (Utils::countBits(dq.haveFragments |= (1 << fno)) == tf) {
// We have all fragments -- assemble and process full Packet
//TRACE("packet %.16llx is complete, assembling and processing...",pid);
SharedPtr<IncomingPacket> packet(dq.frag0);
for(unsigned int f=1;f<tf;++f)
packet->append(dq.frags[f - 1].payload(),dq.frags[f - 1].payloadLength());
_defragQueue.erase(pid); // dq no longer valid after this
if (!packet->tryDecode(RR,false)) {
Mutex::Lock _l(_rxQueue_m);
_rxQueue.push_back(packet);
}
}
} // else this is a duplicate fragment, ignore
}
}
}
void Switch::_handleRemotePacketHead(const InetAddress &localAddr,const InetAddress &fromAddr,const void *data,unsigned int len)
{
const uint64_t now = RR->node->now();
SharedPtr<IncomingPacket> packet(new IncomingPacket(data,len,localAddr,fromAddr,now));
Address source(packet->source());
Address destination(packet->destination());
// Catch this and toss it -- it would never work, but it could happen if we somehow
// mistakenly guessed an address we're bound to as a destination for another peer.
if (source == RR->identity.address())
return;
//TRACE("<< %.16llx %s -> %s (size: %u)",(unsigned long long)packet->packetId(),source.toString().c_str(),destination.toString().c_str(),packet->size());
if (destination != RR->identity.address()) {
// Packet is not for us, so try to relay it
if (packet->hops() < ZT_RELAY_MAX_HOPS) {
packet->incrementHops();
SharedPtr<Peer> relayTo = RR->topology->getPeer(destination);
if ((relayTo)&&((relayTo->send(packet->data(),packet->size(),now)))) {
Mutex::Lock _l(_lastUniteAttempt_m);
uint64_t &luts = _lastUniteAttempt[_LastUniteKey(source,destination)];
if ((now - luts) >= ZT_MIN_UNITE_INTERVAL) {
luts = now;
unite(source,destination);
}
} else {
#ifdef ZT_ENABLE_CLUSTER
if (RR->cluster) {
bool shouldUnite;
{
Mutex::Lock _l(_lastUniteAttempt_m);
uint64_t &luts = _lastUniteAttempt[_LastUniteKey(source,destination)];
shouldUnite = ((now - luts) >= ZT_MIN_UNITE_INTERVAL);
if (shouldUnite)
luts = now;
}
RR->cluster->sendViaCluster(source,destination,packet->data(),packet->size(),shouldUnite);
return;
}
#endif
relayTo = RR->topology->getBestRoot(&source,1,true);
if (relayTo)
relayTo->send(packet->data(),packet->size(),now);
}
} else {
TRACE("dropped relay %s(%s) -> %s, max hops exceeded",packet->source().toString().c_str(),fromAddr.toString().c_str(),destination.toString().c_str());
}
} else if (packet->fragmented()) {
// Packet is the head of a fragmented packet series
uint64_t pid = packet->packetId();
Mutex::Lock _l(_defragQueue_m);
DefragQueueEntry &dq = _defragQueue[pid];
if (!dq.creationTime) {
// If we have no other fragments yet, create an entry and save the head
dq.creationTime = now;
dq.frag0 = packet;
dq.totalFragments = 0; // 0 == unknown, waiting for Packet::Fragment
dq.haveFragments = 1; // head is first bit (left to right)
//TRACE("fragment (0/?) of %.16llx from %s",pid,fromAddr.toString().c_str());
} else if (!(dq.haveFragments & 1)) {
// If we have other fragments but no head, see if we are complete with the head
if ((dq.totalFragments)&&(Utils::countBits(dq.haveFragments |= 1) == dq.totalFragments)) {
// We have all fragments -- assemble and process full Packet
//TRACE("packet %.16llx is complete, assembling and processing...",pid);
// packet already contains head, so append fragments
for(unsigned int f=1;f<dq.totalFragments;++f)
packet->append(dq.frags[f - 1].payload(),dq.frags[f - 1].payloadLength());
_defragQueue.erase(pid); // dq no longer valid after this
if (!packet->tryDecode(RR,false)) {
Mutex::Lock _l(_rxQueue_m);
_rxQueue.push_back(packet);
}
} else {
// Still waiting on more fragments, so queue the head
dq.frag0 = packet;
}
} // else this is a duplicate head, ignore
} else {
// Packet is unfragmented, so just process it
if (!packet->tryDecode(RR,false)) {
Mutex::Lock _l(_rxQueue_m);
_rxQueue.push_back(packet);
}
}
}
Address Switch::_sendWhoisRequest(const Address &addr,const Address *peersAlreadyConsulted,unsigned int numPeersAlreadyConsulted)
{
SharedPtr<Peer> root(RR->topology->getBestRoot(peersAlreadyConsulted,numPeersAlreadyConsulted,false));