SSE optimized Salsa20 -- anywhere from 20% to 50% faster than plain C version

This commit is contained in:
Adam Ierymenko 2014-07-15 17:56:09 -07:00
parent 00f9305ad8
commit 12692c551e
5 changed files with 262 additions and 120 deletions

View file

@ -11,6 +11,17 @@
#include "Constants.hpp"
#ifdef ZT_SALSA20_SSE
#include <emmintrin.h>
#ifdef __GCC__
#define ZT_SALSA20_SSE_ALIGN __attribute__((aligned (16)))
#else
#define ZT_SALSA20_SSE_ALIGN __declspec(align(16))
#endif
#else
#define ZT_SALSA20_SSE_ALIGN
#endif
namespace ZeroTier {
/**
@ -68,7 +79,12 @@ public:
}
private:
uint32_t _state[16];
volatile ZT_SALSA20_SSE_ALIGN union {
#ifdef ZT_SALSA20_SSE
__m128i v[4];
#endif
uint32_t i[16];
} _state;
unsigned int _roundsDiv2;
};