SSE optimized Salsa20 -- anywhere from 20% to 50% faster than plain C version
This commit is contained in:
parent
00f9305ad8
commit
12692c551e
5 changed files with 262 additions and 120 deletions
|
@ -11,6 +11,17 @@
|
|||
|
||||
#include "Constants.hpp"
|
||||
|
||||
#ifdef ZT_SALSA20_SSE
|
||||
#include <emmintrin.h>
|
||||
#ifdef __GCC__
|
||||
#define ZT_SALSA20_SSE_ALIGN __attribute__((aligned (16)))
|
||||
#else
|
||||
#define ZT_SALSA20_SSE_ALIGN __declspec(align(16))
|
||||
#endif
|
||||
#else
|
||||
#define ZT_SALSA20_SSE_ALIGN
|
||||
#endif
|
||||
|
||||
namespace ZeroTier {
|
||||
|
||||
/**
|
||||
|
@ -68,7 +79,12 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
uint32_t _state[16];
|
||||
volatile ZT_SALSA20_SSE_ALIGN union {
|
||||
#ifdef ZT_SALSA20_SSE
|
||||
__m128i v[4];
|
||||
#endif
|
||||
uint32_t i[16];
|
||||
} _state;
|
||||
unsigned int _roundsDiv2;
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue