From 88df903283b721e5dfcb488331fd90c45f940d25 Mon Sep 17 00:00:00 2001 From: Samuel Neves Date: Sat, 11 Jun 2016 17:47:44 +0100 Subject: remove unused code, vars --- ref/blake2-impl.h | 40 +++++++++-------- ref/blake2b-ref.c | 12 ----- ref/blake2s-ref.c | 12 ----- sse/blake2-impl.h | 40 +++++++++-------- sse/blake2b.c | 128 +++++++++++++++++++---------------------------------- sse/blake2s.c | 129 ++++++++++++++++++++---------------------------------- sse/makefile | 2 +- 7 files changed, 135 insertions(+), 228 deletions(-) diff --git a/ref/blake2-impl.h b/ref/blake2-impl.h index 46440bf..03df0b5 100644 --- a/ref/blake2-impl.h +++ b/ref/blake2-impl.h @@ -18,7 +18,19 @@ #include #include -static uint32_t load32( const void *src ) +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline +#endif + +static BLAKE2_INLINE uint32_t load32( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint32_t w; @@ -33,7 +45,7 @@ static uint32_t load32( const void *src ) #endif } -static uint64_t load64( const void *src ) +static BLAKE2_INLINE uint64_t load64( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint64_t w; @@ -52,7 +64,7 @@ static uint64_t load64( const void *src ) #endif } -static void store32( void *dst, uint32_t w ) +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); @@ -65,7 +77,7 @@ static void store32( void *dst, uint32_t w ) #endif } -static void store64( void *dst, uint64_t w ) +static BLAKE2_INLINE void store64( void *dst, uint64_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); @@ -82,7 +94,7 @@ static void store64( void *dst, uint64_t w ) #endif } -static uint64_t load48( const void *src ) +static BLAKE2_INLINE uint64_t load48( const void *src ) { const uint8_t *p = ( const uint8_t * )src; return (( uint64_t )( p[0] ) << 0) | @@ -93,7 +105,7 @@ static uint64_t load48( const void *src ) (( uint64_t )( p[5] ) << 40) ; } -static void store48( void *dst, uint64_t w ) +static BLAKE2_INLINE void store48( void *dst, uint64_t w ) { uint8_t *p = ( uint8_t * )dst; p[0] = (uint8_t)(w >> 0); @@ -104,28 +116,18 @@ static void store48( void *dst, uint64_t w ) p[5] = (uint8_t)(w >> 40); } -static uint32_t rotl32( const uint32_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 32 - c ) ); -} - -static uint64_t rotl64( const uint64_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 64 - c ) ); -} - -static uint32_t rotr32( const uint32_t w, const unsigned c ) +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 32 - c ) ); } -static uint64_t rotr64( const uint64_t w, const unsigned c ) +static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 64 - c ) ); } /* prevents compiler optimizing out memset() */ -static void secure_zero_memory(void *v, size_t n) +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) { static void *(*const volatile memset_v)(void *, int, size_t) = &memset; memset_v(v, 0, n); diff --git a/ref/blake2b-ref.c b/ref/blake2b-ref.c index 37e4af7..f1c7055 100644 --- a/ref/blake2b-ref.c +++ b/ref/blake2b-ref.c @@ -50,11 +50,6 @@ static void blake2b_set_lastnode( blake2b_state *S ) S->f[1] = (uint64_t)-1; } -static void blake2b_clear_lastnode( blake2b_state *S ) -{ - S->f[1] = 0; -} - /* Some helper functions, not necessarily useful */ static int blake2b_is_lastblock( const blake2b_state *S ) { @@ -68,13 +63,6 @@ static void blake2b_set_lastblock( blake2b_state *S ) S->f[0] = (uint64_t)-1; } -static void blake2b_clear_lastblock( blake2b_state *S ) -{ - if( S->last_node ) blake2b_clear_lastnode( S ); - - S->f[0] = 0; -} - static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) { S->t[0] += inc; diff --git a/ref/blake2s-ref.c b/ref/blake2s-ref.c index a278489..b570fd9 100644 --- a/ref/blake2s-ref.c +++ b/ref/blake2s-ref.c @@ -45,11 +45,6 @@ static void blake2s_set_lastnode( blake2s_state *S ) S->f[1] = (uint32_t)-1; } -static void blake2s_clear_lastnode( blake2s_state *S ) -{ - S->f[1] = 0; -} - /* Some helper functions, not necessarily useful */ static int blake2s_is_lastblock( const blake2s_state *S ) { @@ -63,13 +58,6 @@ static void blake2s_set_lastblock( blake2s_state *S ) S->f[0] = (uint32_t)-1; } -static void blake2s_clear_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_clear_lastnode( S ); - - S->f[0] = 0; -} - static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) { S->t[0] += inc; diff --git a/sse/blake2-impl.h b/sse/blake2-impl.h index 46440bf..03df0b5 100644 --- a/sse/blake2-impl.h +++ b/sse/blake2-impl.h @@ -18,7 +18,19 @@ #include #include -static uint32_t load32( const void *src ) +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline +#endif + +static BLAKE2_INLINE uint32_t load32( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint32_t w; @@ -33,7 +45,7 @@ static uint32_t load32( const void *src ) #endif } -static uint64_t load64( const void *src ) +static BLAKE2_INLINE uint64_t load64( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint64_t w; @@ -52,7 +64,7 @@ static uint64_t load64( const void *src ) #endif } -static void store32( void *dst, uint32_t w ) +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); @@ -65,7 +77,7 @@ static void store32( void *dst, uint32_t w ) #endif } -static void store64( void *dst, uint64_t w ) +static BLAKE2_INLINE void store64( void *dst, uint64_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); @@ -82,7 +94,7 @@ static void store64( void *dst, uint64_t w ) #endif } -static uint64_t load48( const void *src ) +static BLAKE2_INLINE uint64_t load48( const void *src ) { const uint8_t *p = ( const uint8_t * )src; return (( uint64_t )( p[0] ) << 0) | @@ -93,7 +105,7 @@ static uint64_t load48( const void *src ) (( uint64_t )( p[5] ) << 40) ; } -static void store48( void *dst, uint64_t w ) +static BLAKE2_INLINE void store48( void *dst, uint64_t w ) { uint8_t *p = ( uint8_t * )dst; p[0] = (uint8_t)(w >> 0); @@ -104,28 +116,18 @@ static void store48( void *dst, uint64_t w ) p[5] = (uint8_t)(w >> 40); } -static uint32_t rotl32( const uint32_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 32 - c ) ); -} - -static uint64_t rotl64( const uint64_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 64 - c ) ); -} - -static uint32_t rotr32( const uint32_t w, const unsigned c ) +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 32 - c ) ); } -static uint64_t rotr64( const uint64_t w, const unsigned c ) +static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 64 - c ) ); } /* prevents compiler optimizing out memset() */ -static void secure_zero_memory(void *v, size_t n) +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) { static void *(*const volatile memset_v)(void *, int, size_t) = &memset; memset_v(v, 0, n); diff --git a/sse/blake2b.c b/sse/blake2b.c index 7557541..e1f0edd 100644 --- a/sse/blake2b.c +++ b/sse/blake2b.c @@ -49,34 +49,12 @@ static const uint64_t blake2b_IV[8] = 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL }; -static const uint8_t blake2b_sigma[12][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } -}; - - /* Some helper functions */ static void blake2b_set_lastnode( blake2b_state *S ) { S->f[1] = (uint64_t)-1; } -static void blake2b_clear_lastnode( blake2b_state *S ) -{ - S->f[1] = 0; -} - static int blake2b_is_lastblock( const blake2b_state *S ) { return S->f[0] != 0; @@ -89,28 +67,12 @@ static void blake2b_set_lastblock( blake2b_state *S ) S->f[0] = (uint64_t)-1; } -static void blake2b_clear_lastblock( blake2b_state *S ) -{ - if( S->last_node ) blake2b_clear_lastnode( S ); - - S->f[0] = 0; -} - - static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) { S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); } -static void blake2b_init0( blake2b_state *S ) -{ - size_t i; - memset( S, 0, sizeof( blake2b_state ) ); - - for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; -} - /* init xors IV with input parameter block */ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) { @@ -132,48 +94,46 @@ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) /* Some sort of default parameter block initialization, for sequential blake2b */ int blake2b_init( blake2b_state *S, size_t outlen ) { - const blake2b_param P = - { - (uint8_t)outlen, - 0, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; + blake2b_param P[1]; if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - return blake2b_init_param( S, &P ); + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + return blake2b_init_param( S, P ); } int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) { - const blake2b_param P = - { - (uint8_t)outlen, - (uint8_t)keylen, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; + blake2b_param P[1]; if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; - if( blake2b_init_param( S, &P ) < 0 ) + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2b_init_param( S, P ) < 0 ) return 0; { @@ -208,22 +168,22 @@ static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOC const __m128i m6 = LOADU( block + 96 ); const __m128i m7 = LOADU( block + 112 ); #else - const uint64_t m0 = ( ( uint64_t * )block )[ 0]; - const uint64_t m1 = ( ( uint64_t * )block )[ 1]; - const uint64_t m2 = ( ( uint64_t * )block )[ 2]; - const uint64_t m3 = ( ( uint64_t * )block )[ 3]; - const uint64_t m4 = ( ( uint64_t * )block )[ 4]; - const uint64_t m5 = ( ( uint64_t * )block )[ 5]; - const uint64_t m6 = ( ( uint64_t * )block )[ 6]; - const uint64_t m7 = ( ( uint64_t * )block )[ 7]; - const uint64_t m8 = ( ( uint64_t * )block )[ 8]; - const uint64_t m9 = ( ( uint64_t * )block )[ 9]; - const uint64_t m10 = ( ( uint64_t * )block )[10]; - const uint64_t m11 = ( ( uint64_t * )block )[11]; - const uint64_t m12 = ( ( uint64_t * )block )[12]; - const uint64_t m13 = ( ( uint64_t * )block )[13]; - const uint64_t m14 = ( ( uint64_t * )block )[14]; - const uint64_t m15 = ( ( uint64_t * )block )[15]; + const uint64_t m0 = load64(block + 0 * sizeof(uint64_t)); + const uint64_t m1 = load64(block + 1 * sizeof(uint64_t)); + const uint64_t m2 = load64(block + 2 * sizeof(uint64_t)); + const uint64_t m3 = load64(block + 3 * sizeof(uint64_t)); + const uint64_t m4 = load64(block + 4 * sizeof(uint64_t)); + const uint64_t m5 = load64(block + 5 * sizeof(uint64_t)); + const uint64_t m6 = load64(block + 6 * sizeof(uint64_t)); + const uint64_t m7 = load64(block + 7 * sizeof(uint64_t)); + const uint64_t m8 = load64(block + 8 * sizeof(uint64_t)); + const uint64_t m9 = load64(block + 9 * sizeof(uint64_t)); + const uint64_t m10 = load64(block + 10 * sizeof(uint64_t)); + const uint64_t m11 = load64(block + 11 * sizeof(uint64_t)); + const uint64_t m12 = load64(block + 12 * sizeof(uint64_t)); + const uint64_t m13 = load64(block + 13 * sizeof(uint64_t)); + const uint64_t m14 = load64(block + 14 * sizeof(uint64_t)); + const uint64_t m15 = load64(block + 15 * sizeof(uint64_t)); #endif row1l = LOADU( &S->h[0] ); row1h = LOADU( &S->h[2] ); diff --git a/sse/blake2s.c b/sse/blake2s.c index f423d58..d61e587 100644 --- a/sse/blake2s.c +++ b/sse/blake2s.c @@ -45,32 +45,12 @@ static const uint32_t blake2s_IV[8] = 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL }; -static const uint8_t blake2s_sigma[10][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , -}; - - /* Some helper functions */ static void blake2s_set_lastnode( blake2s_state *S ) { S->f[1] = (uint32_t)-1; } -static void blake2s_clear_lastnode( blake2s_state *S ) -{ - S->f[1] = 0; -} - static int blake2s_is_lastblock( const blake2s_state *S ) { return S->f[0] != 0; @@ -83,13 +63,6 @@ static void blake2s_set_lastblock( blake2s_state *S ) S->f[0] = (uint32_t)-1; } -static void blake2s_clear_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_clear_lastnode( S ); - - S->f[0] = 0; -} - static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) { uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0]; @@ -98,14 +71,6 @@ static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) S->t[1] = ( uint32_t )( t >> 32 ); } -static void blake2s_init0( blake2s_state *S ) -{ - size_t i; - memset( S, 0, sizeof( blake2s_state ) ); - - for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; -} - /* init2 xors IV with input parameter block */ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) { @@ -127,47 +92,49 @@ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) /* Some sort of default parameter block initialization, for sequential blake2s */ int blake2s_init( blake2s_state *S, size_t outlen ) { - const blake2s_param P = - { - (uint8_t)outlen, - 0, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; + blake2s_param P[1]; + /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - return blake2s_init_param( S, &P ); + + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store48( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + return blake2s_init_param( S, P ); } int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) { - const blake2s_param P = - { - (uint8_t)outlen, - (uint8_t)keylen, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; + blake2s_param P[1]; /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1; - if( blake2s_init_param( S, &P ) < 0 ) + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store48( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2s_init_param( S, P ) < 0 ) return -1; { @@ -202,27 +169,27 @@ static void blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOC const __m128i m2 = LOADU( block + 32 ); const __m128i m3 = LOADU( block + 48 ); #else - const uint32_t m0 = ( ( uint32_t * )block )[ 0]; - const uint32_t m1 = ( ( uint32_t * )block )[ 1]; - const uint32_t m2 = ( ( uint32_t * )block )[ 2]; - const uint32_t m3 = ( ( uint32_t * )block )[ 3]; - const uint32_t m4 = ( ( uint32_t * )block )[ 4]; - const uint32_t m5 = ( ( uint32_t * )block )[ 5]; - const uint32_t m6 = ( ( uint32_t * )block )[ 6]; - const uint32_t m7 = ( ( uint32_t * )block )[ 7]; - const uint32_t m8 = ( ( uint32_t * )block )[ 8]; - const uint32_t m9 = ( ( uint32_t * )block )[ 9]; - const uint32_t m10 = ( ( uint32_t * )block )[10]; - const uint32_t m11 = ( ( uint32_t * )block )[11]; - const uint32_t m12 = ( ( uint32_t * )block )[12]; - const uint32_t m13 = ( ( uint32_t * )block )[13]; - const uint32_t m14 = ( ( uint32_t * )block )[14]; - const uint32_t m15 = ( ( uint32_t * )block )[15]; + const uint32_t m0 = load32(block + 0 * sizeof(uint32_t)); + const uint32_t m1 = load32(block + 1 * sizeof(uint32_t)); + const uint32_t m2 = load32(block + 2 * sizeof(uint32_t)); + const uint32_t m3 = load32(block + 3 * sizeof(uint32_t)); + const uint32_t m4 = load32(block + 4 * sizeof(uint32_t)); + const uint32_t m5 = load32(block + 5 * sizeof(uint32_t)); + const uint32_t m6 = load32(block + 6 * sizeof(uint32_t)); + const uint32_t m7 = load32(block + 7 * sizeof(uint32_t)); + const uint32_t m8 = load32(block + 8 * sizeof(uint32_t)); + const uint32_t m9 = load32(block + 9 * sizeof(uint32_t)); + const uint32_t m10 = load32(block + 10 * sizeof(uint32_t)); + const uint32_t m11 = load32(block + 11 * sizeof(uint32_t)); + const uint32_t m12 = load32(block + 12 * sizeof(uint32_t)); + const uint32_t m13 = load32(block + 13 * sizeof(uint32_t)); + const uint32_t m14 = load32(block + 14 * sizeof(uint32_t)); + const uint32_t m15 = load32(block + 15 * sizeof(uint32_t)); #endif row1 = ff0 = LOADU( &S->h[0] ); row2 = ff1 = LOADU( &S->h[4] ); - row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); - row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) ); + row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] ); + row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) ); ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); diff --git a/sse/makefile b/sse/makefile index a1e7e2a..b23b4a8 100644 --- a/sse/makefile +++ b/sse/makefile @@ -1,5 +1,5 @@ CC=gcc -CFLAGS=-O3 -march=native -I../testvectors +CFLAGS=-O3 -I../testvectors all: blake2s blake2b blake2sp blake2bp check -- cgit v1.2.3