Some micro-optimizations, including a memcpy that is faster than Linux for most distro/compiler versions.
This commit is contained in:
parent
cb8e7b4d5e
commit
0608704626
22 changed files with 163 additions and 81 deletions
|
@ -387,27 +387,27 @@ static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = val
|
|||
|
||||
static inline U16 LZ4_read16(const void* memPtr)
|
||||
{
|
||||
U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
U16 val; ZT_FAST_MEMCPY(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
static inline U32 LZ4_read32(const void* memPtr)
|
||||
{
|
||||
U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
U32 val; ZT_FAST_MEMCPY(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
static inline reg_t LZ4_read_ARCH(const void* memPtr)
|
||||
{
|
||||
reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
|
||||
reg_t val; ZT_FAST_MEMCPY(&val, memPtr, sizeof(val)); return val;
|
||||
}
|
||||
|
||||
static inline void LZ4_write16(void* memPtr, U16 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
ZT_FAST_MEMCPY(memPtr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
static inline void LZ4_write32(void* memPtr, U32 value)
|
||||
{
|
||||
memcpy(memPtr, &value, sizeof(value));
|
||||
ZT_FAST_MEMCPY(memPtr, &value, sizeof(value));
|
||||
}
|
||||
|
||||
#endif /* LZ4_FORCE_MEMORY_ACCESS */
|
||||
|
@ -435,7 +435,7 @@ static inline void LZ4_writeLE16(void* memPtr, U16 value)
|
|||
|
||||
static inline void LZ4_copy8(void* dst, const void* src)
|
||||
{
|
||||
memcpy(dst,src,8);
|
||||
ZT_FAST_MEMCPY(dst,src,8);
|
||||
}
|
||||
|
||||
/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
|
||||
|
@ -830,7 +830,7 @@ _last_literals:
|
|||
} else {
|
||||
*op++ = (BYTE)(lastRun<<ML_BITS);
|
||||
}
|
||||
memcpy(op, anchor, lastRun);
|
||||
ZT_FAST_MEMCPY(op, anchor, lastRun);
|
||||
op += lastRun;
|
||||
}
|
||||
|
||||
|
@ -956,7 +956,7 @@ FORCE_INLINE int LZ4_decompress_generic(
|
|||
if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */
|
||||
if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */
|
||||
}
|
||||
memcpy(op, ip, length);
|
||||
ZT_FAST_MEMCPY(op, ip, length);
|
||||
ip += length;
|
||||
op += length;
|
||||
break; /* Necessarily EOF, due to parsing restrictions */
|
||||
|
@ -995,14 +995,14 @@ FORCE_INLINE int LZ4_decompress_generic(
|
|||
/* match encompass external dictionary and current block */
|
||||
size_t const copySize = (size_t)(lowPrefix-match);
|
||||
size_t const restSize = length - copySize;
|
||||
memcpy(op, dictEnd - copySize, copySize);
|
||||
ZT_FAST_MEMCPY(op, dictEnd - copySize, copySize);
|
||||
op += copySize;
|
||||
if (restSize > (size_t)(op-lowPrefix)) { /* overlap copy */
|
||||
BYTE* const endOfMatch = op + restSize;
|
||||
const BYTE* copyFrom = lowPrefix;
|
||||
while (op < endOfMatch) *op++ = *copyFrom++;
|
||||
} else {
|
||||
memcpy(op, lowPrefix, restSize);
|
||||
ZT_FAST_MEMCPY(op, lowPrefix, restSize);
|
||||
op += restSize;
|
||||
} }
|
||||
continue;
|
||||
|
@ -1017,7 +1017,7 @@ FORCE_INLINE int LZ4_decompress_generic(
|
|||
op[2] = match[2];
|
||||
op[3] = match[3];
|
||||
match += dec32table[offset];
|
||||
memcpy(op+4, match, 4);
|
||||
ZT_FAST_MEMCPY(op+4, match, 4);
|
||||
match -= dec64;
|
||||
} else { LZ4_copy8(op, match); match+=8; }
|
||||
op += 8;
|
||||
|
@ -1095,7 +1095,7 @@ void Packet::armor(const void *key,bool encryptPayload,unsigned int counter)
|
|||
s20.crypt12(payload,payload,payloadLen);
|
||||
uint64_t mac[2];
|
||||
Poly1305::compute(mac,payload,payloadLen,macKey);
|
||||
memcpy(data + ZT_PACKET_IDX_MAC,mac,8);
|
||||
ZT_FAST_MEMCPY(data + ZT_PACKET_IDX_MAC,mac,8);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1167,7 +1167,7 @@ bool Packet::compress()
|
|||
if ((cl > 0)&&(cl < pl)) {
|
||||
data[ZT_PACKET_IDX_VERB] |= (char)ZT_PROTO_VERB_FLAG_COMPRESSED;
|
||||
setSize((unsigned int)cl + ZT_PACKET_IDX_PAYLOAD);
|
||||
memcpy(data + ZT_PACKET_IDX_PAYLOAD,buf,cl);
|
||||
ZT_FAST_MEMCPY(data + ZT_PACKET_IDX_PAYLOAD,buf,cl);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1187,7 +1187,7 @@ bool Packet::uncompress()
|
|||
int ucl = LZ4_decompress_safe((const char *)data + ZT_PACKET_IDX_PAYLOAD,buf,compLen,sizeof(buf));
|
||||
if ((ucl > 0)&&(ucl <= (int)(capacity() - ZT_PACKET_IDX_PAYLOAD))) {
|
||||
setSize((unsigned int)ucl + ZT_PACKET_IDX_PAYLOAD);
|
||||
memcpy(data + ZT_PACKET_IDX_PAYLOAD,buf,ucl);
|
||||
ZT_FAST_MEMCPY(data + ZT_PACKET_IDX_PAYLOAD,buf,ucl);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue