Remove ASM Salsa20 since it will not be the default in 2.x any more... reduce build complexity.

This commit is contained in:
Adam Ierymenko 2019-08-16 14:26:25 -07:00
parent 2b681c37ac
commit 51a25fdec9
No known key found for this signature in database
GPG key ID: 1657198823E52A61
5 changed files with 284 additions and 230 deletions

View file

@ -34,82 +34,6 @@ public:
inline Salsa20() {}
inline ~Salsa20() { Utils::burn(&_state,sizeof(_state)); }
/**
* XOR d with s
*
* This is done efficiently using e.g. SSE if available. It's used when
* alternative Salsa20 implementations are used in Packet and is here
* since this is where all the SSE stuff is already included.
*
* @param d Destination to XOR
* @param s Source bytes to XOR with destination
* @param len Length of s and d
*/
static inline void memxor(uint8_t *d,const uint8_t *s,unsigned int len)
{
#ifdef ZT_SALSA20_SSE
while (len >= 128) {
__m128i s0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s));
__m128i s1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 16));
__m128i s2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 32));
__m128i s3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 48));
__m128i s4 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 64));
__m128i s5 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 80));
__m128i s6 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 96));
__m128i s7 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s + 112));
__m128i d0 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d));
__m128i d1 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 16));
__m128i d2 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 32));
__m128i d3 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 48));
__m128i d4 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 64));
__m128i d5 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 80));
__m128i d6 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 96));
__m128i d7 = _mm_loadu_si128(reinterpret_cast<__m128i *>(d + 112));
d0 = _mm_xor_si128(d0,s0);
d1 = _mm_xor_si128(d1,s1);
d2 = _mm_xor_si128(d2,s2);
d3 = _mm_xor_si128(d3,s3);
d4 = _mm_xor_si128(d4,s4);
d5 = _mm_xor_si128(d5,s5);
d6 = _mm_xor_si128(d6,s6);
d7 = _mm_xor_si128(d7,s7);
_mm_storeu_si128(reinterpret_cast<__m128i *>(d),d0);
_mm_storeu_si128(reinterpret_cast<__m128i *>(d + 16),d1);
_mm_storeu_si128(reinterpret_cast<__m128i *>(d + 32),d2);
_mm_storeu_si128(reinterpret_cast<__m128i *>(d + 48),d3);
_mm_storeu_si128(reinterpret_cast<__m128i *>(d + 64),d4);
_mm_storeu_si128(reinterpret_cast<__m128i *>(d + 80),d5);
_mm_storeu_si128(reinterpret_cast<__m128i *>(d + 96),d6);
_mm_storeu_si128(reinterpret_cast<__m128i *>(d + 112),d7);
s += 128;
d += 128;
len -= 128;
}
while (len >= 16) {
_mm_storeu_si128(reinterpret_cast<__m128i *>(d),_mm_xor_si128(_mm_loadu_si128(reinterpret_cast<__m128i *>(d)),_mm_loadu_si128(reinterpret_cast<const __m128i *>(s))));
s += 16;
d += 16;
len -= 16;
}
#else
#ifndef ZT_NO_TYPE_PUNNING
while (len >= 16) {
(*reinterpret_cast<uint64_t *>(d)) ^= (*reinterpret_cast<const uint64_t *>(s));
s += 8;
d += 8;
(*reinterpret_cast<uint64_t *>(d)) ^= (*reinterpret_cast<const uint64_t *>(s));
s += 8;
d += 8;
len -= 16;
}
#endif
#endif
while (len) {
--len;
*(d++) ^= *(s++);
}
}
/**
* @param key 256-bit (32 byte) key
* @param iv 64-bit initialization vector