Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/BLAKE2/BLAKE2.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Neves <sneves@dei.uc.pt>2016-06-11 19:47:44 +0300
committerSamuel Neves <sneves@dei.uc.pt>2016-06-11 19:58:17 +0300
commit88df903283b721e5dfcb488331fd90c45f940d25 (patch)
tree613503c724bd3d405f6eba04e4784dda12493532
parentab60beb7a1c9bdce7315f7324338793610df934a (diff)
remove unused code, vars
-rw-r--r--ref/blake2-impl.h40
-rw-r--r--ref/blake2b-ref.c12
-rw-r--r--ref/blake2s-ref.c12
-rw-r--r--sse/blake2-impl.h40
-rw-r--r--sse/blake2b.c128
-rw-r--r--sse/blake2s.c129
-rw-r--r--sse/makefile2
7 files changed, 135 insertions, 228 deletions
diff --git a/ref/blake2-impl.h b/ref/blake2-impl.h
index 46440bf..03df0b5 100644
--- a/ref/blake2-impl.h
+++ b/ref/blake2-impl.h
@@ -18,7 +18,19 @@
#include <stdint.h>
#include <string.h>
-static uint32_t load32( const void *src )
+#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
+ #if defined(_MSC_VER)
+ #define BLAKE2_INLINE __inline
+ #elif defined(__GNUC__)
+ #define BLAKE2_INLINE __inline__
+ #else
+ #define BLAKE2_INLINE
+ #endif
+#else
+ #define BLAKE2_INLINE inline
+#endif
+
+static BLAKE2_INLINE uint32_t load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint32_t w;
@@ -33,7 +45,7 @@ static uint32_t load32( const void *src )
#endif
}
-static uint64_t load64( const void *src )
+static BLAKE2_INLINE uint64_t load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint64_t w;
@@ -52,7 +64,7 @@ static uint64_t load64( const void *src )
#endif
}
-static void store32( void *dst, uint32_t w )
+static BLAKE2_INLINE void store32( void *dst, uint32_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
@@ -65,7 +77,7 @@ static void store32( void *dst, uint32_t w )
#endif
}
-static void store64( void *dst, uint64_t w )
+static BLAKE2_INLINE void store64( void *dst, uint64_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
@@ -82,7 +94,7 @@ static void store64( void *dst, uint64_t w )
#endif
}
-static uint64_t load48( const void *src )
+static BLAKE2_INLINE uint64_t load48( const void *src )
{
const uint8_t *p = ( const uint8_t * )src;
return (( uint64_t )( p[0] ) << 0) |
@@ -93,7 +105,7 @@ static uint64_t load48( const void *src )
(( uint64_t )( p[5] ) << 40) ;
}
-static void store48( void *dst, uint64_t w )
+static BLAKE2_INLINE void store48( void *dst, uint64_t w )
{
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
@@ -104,28 +116,18 @@ static void store48( void *dst, uint64_t w )
p[5] = (uint8_t)(w >> 40);
}
-static uint32_t rotl32( const uint32_t w, const unsigned c )
-{
- return ( w << c ) | ( w >> ( 32 - c ) );
-}
-
-static uint64_t rotl64( const uint64_t w, const unsigned c )
-{
- return ( w << c ) | ( w >> ( 64 - c ) );
-}
-
-static uint32_t rotr32( const uint32_t w, const unsigned c )
+static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 32 - c ) );
}
-static uint64_t rotr64( const uint64_t w, const unsigned c )
+static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 64 - c ) );
}
/* prevents compiler optimizing out memset() */
-static void secure_zero_memory(void *v, size_t n)
+static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
{
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
memset_v(v, 0, n);
diff --git a/ref/blake2b-ref.c b/ref/blake2b-ref.c
index 37e4af7..f1c7055 100644
--- a/ref/blake2b-ref.c
+++ b/ref/blake2b-ref.c
@@ -50,11 +50,6 @@ static void blake2b_set_lastnode( blake2b_state *S )
S->f[1] = (uint64_t)-1;
}
-static void blake2b_clear_lastnode( blake2b_state *S )
-{
- S->f[1] = 0;
-}
-
/* Some helper functions, not necessarily useful */
static int blake2b_is_lastblock( const blake2b_state *S )
{
@@ -68,13 +63,6 @@ static void blake2b_set_lastblock( blake2b_state *S )
S->f[0] = (uint64_t)-1;
}
-static void blake2b_clear_lastblock( blake2b_state *S )
-{
- if( S->last_node ) blake2b_clear_lastnode( S );
-
- S->f[0] = 0;
-}
-
static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
{
S->t[0] += inc;
diff --git a/ref/blake2s-ref.c b/ref/blake2s-ref.c
index a278489..b570fd9 100644
--- a/ref/blake2s-ref.c
+++ b/ref/blake2s-ref.c
@@ -45,11 +45,6 @@ static void blake2s_set_lastnode( blake2s_state *S )
S->f[1] = (uint32_t)-1;
}
-static void blake2s_clear_lastnode( blake2s_state *S )
-{
- S->f[1] = 0;
-}
-
/* Some helper functions, not necessarily useful */
static int blake2s_is_lastblock( const blake2s_state *S )
{
@@ -63,13 +58,6 @@ static void blake2s_set_lastblock( blake2s_state *S )
S->f[0] = (uint32_t)-1;
}
-static void blake2s_clear_lastblock( blake2s_state *S )
-{
- if( S->last_node ) blake2s_clear_lastnode( S );
-
- S->f[0] = 0;
-}
-
static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
{
S->t[0] += inc;
diff --git a/sse/blake2-impl.h b/sse/blake2-impl.h
index 46440bf..03df0b5 100644
--- a/sse/blake2-impl.h
+++ b/sse/blake2-impl.h
@@ -18,7 +18,19 @@
#include <stdint.h>
#include <string.h>
-static uint32_t load32( const void *src )
+#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
+ #if defined(_MSC_VER)
+ #define BLAKE2_INLINE __inline
+ #elif defined(__GNUC__)
+ #define BLAKE2_INLINE __inline__
+ #else
+ #define BLAKE2_INLINE
+ #endif
+#else
+ #define BLAKE2_INLINE inline
+#endif
+
+static BLAKE2_INLINE uint32_t load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint32_t w;
@@ -33,7 +45,7 @@ static uint32_t load32( const void *src )
#endif
}
-static uint64_t load64( const void *src )
+static BLAKE2_INLINE uint64_t load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint64_t w;
@@ -52,7 +64,7 @@ static uint64_t load64( const void *src )
#endif
}
-static void store32( void *dst, uint32_t w )
+static BLAKE2_INLINE void store32( void *dst, uint32_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
@@ -65,7 +77,7 @@ static void store32( void *dst, uint32_t w )
#endif
}
-static void store64( void *dst, uint64_t w )
+static BLAKE2_INLINE void store64( void *dst, uint64_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
@@ -82,7 +94,7 @@ static void store64( void *dst, uint64_t w )
#endif
}
-static uint64_t load48( const void *src )
+static BLAKE2_INLINE uint64_t load48( const void *src )
{
const uint8_t *p = ( const uint8_t * )src;
return (( uint64_t )( p[0] ) << 0) |
@@ -93,7 +105,7 @@ static uint64_t load48( const void *src )
(( uint64_t )( p[5] ) << 40) ;
}
-static void store48( void *dst, uint64_t w )
+static BLAKE2_INLINE void store48( void *dst, uint64_t w )
{
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
@@ -104,28 +116,18 @@ static void store48( void *dst, uint64_t w )
p[5] = (uint8_t)(w >> 40);
}
-static uint32_t rotl32( const uint32_t w, const unsigned c )
-{
- return ( w << c ) | ( w >> ( 32 - c ) );
-}
-
-static uint64_t rotl64( const uint64_t w, const unsigned c )
-{
- return ( w << c ) | ( w >> ( 64 - c ) );
-}
-
-static uint32_t rotr32( const uint32_t w, const unsigned c )
+static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 32 - c ) );
}
-static uint64_t rotr64( const uint64_t w, const unsigned c )
+static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 64 - c ) );
}
/* prevents compiler optimizing out memset() */
-static void secure_zero_memory(void *v, size_t n)
+static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
{
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
memset_v(v, 0, n);
diff --git a/sse/blake2b.c b/sse/blake2b.c
index 7557541..e1f0edd 100644
--- a/sse/blake2b.c
+++ b/sse/blake2b.c
@@ -49,34 +49,12 @@ static const uint64_t blake2b_IV[8] =
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
-static const uint8_t blake2b_sigma[12][16] =
-{
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
- { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
- { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
- { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
- { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
- { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
- { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
- { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
- { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
- { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
- { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
-};
-
-
/* Some helper functions */
static void blake2b_set_lastnode( blake2b_state *S )
{
S->f[1] = (uint64_t)-1;
}
-static void blake2b_clear_lastnode( blake2b_state *S )
-{
- S->f[1] = 0;
-}
-
static int blake2b_is_lastblock( const blake2b_state *S )
{
return S->f[0] != 0;
@@ -89,28 +67,12 @@ static void blake2b_set_lastblock( blake2b_state *S )
S->f[0] = (uint64_t)-1;
}
-static void blake2b_clear_lastblock( blake2b_state *S )
-{
- if( S->last_node ) blake2b_clear_lastnode( S );
-
- S->f[0] = 0;
-}
-
-
static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
}
-static void blake2b_init0( blake2b_state *S )
-{
- size_t i;
- memset( S, 0, sizeof( blake2b_state ) );
-
- for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
-}
-
/* init xors IV with input parameter block */
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
@@ -132,48 +94,46 @@ int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
/* Some sort of default parameter block initialization, for sequential blake2b */
int blake2b_init( blake2b_state *S, size_t outlen )
{
- const blake2b_param P =
- {
- (uint8_t)outlen,
- 0,
- 1,
- 1,
- 0,
- 0,
- 0,
- 0,
- {0},
- {0},
- {0}
- };
+ blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
- return blake2b_init_param( S, &P );
+ P->digest_length = (uint8_t)outlen;
+ P->key_length = 0;
+ P->fanout = 1;
+ P->depth = 1;
+ store32( &P->leaf_length, 0 );
+ store64( &P->node_offset, 0 );
+ P->node_depth = 0;
+ P->inner_length = 0;
+ memset( P->reserved, 0, sizeof( P->reserved ) );
+ memset( P->salt, 0, sizeof( P->salt ) );
+ memset( P->personal, 0, sizeof( P->personal ) );
+
+ return blake2b_init_param( S, P );
}
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
{
- const blake2b_param P =
- {
- (uint8_t)outlen,
- (uint8_t)keylen,
- 1,
- 1,
- 0,
- 0,
- 0,
- 0,
- {0},
- {0},
- {0}
- };
+ blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
- if( blake2b_init_param( S, &P ) < 0 )
+ P->digest_length = (uint8_t)outlen;
+ P->key_length = (uint8_t)keylen;
+ P->fanout = 1;
+ P->depth = 1;
+ store32( &P->leaf_length, 0 );
+ store64( &P->node_offset, 0 );
+ P->node_depth = 0;
+ P->inner_length = 0;
+ memset( P->reserved, 0, sizeof( P->reserved ) );
+ memset( P->salt, 0, sizeof( P->salt ) );
+ memset( P->personal, 0, sizeof( P->personal ) );
+
+ if( blake2b_init_param( S, P ) < 0 )
return 0;
{
@@ -208,22 +168,22 @@ static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOC
const __m128i m6 = LOADU( block + 96 );
const __m128i m7 = LOADU( block + 112 );
#else
- const uint64_t m0 = ( ( uint64_t * )block )[ 0];
- const uint64_t m1 = ( ( uint64_t * )block )[ 1];
- const uint64_t m2 = ( ( uint64_t * )block )[ 2];
- const uint64_t m3 = ( ( uint64_t * )block )[ 3];
- const uint64_t m4 = ( ( uint64_t * )block )[ 4];
- const uint64_t m5 = ( ( uint64_t * )block )[ 5];
- const uint64_t m6 = ( ( uint64_t * )block )[ 6];
- const uint64_t m7 = ( ( uint64_t * )block )[ 7];
- const uint64_t m8 = ( ( uint64_t * )block )[ 8];
- const uint64_t m9 = ( ( uint64_t * )block )[ 9];
- const uint64_t m10 = ( ( uint64_t * )block )[10];
- const uint64_t m11 = ( ( uint64_t * )block )[11];
- const uint64_t m12 = ( ( uint64_t * )block )[12];
- const uint64_t m13 = ( ( uint64_t * )block )[13];
- const uint64_t m14 = ( ( uint64_t * )block )[14];
- const uint64_t m15 = ( ( uint64_t * )block )[15];
+ const uint64_t m0 = load64(block + 0 * sizeof(uint64_t));
+ const uint64_t m1 = load64(block + 1 * sizeof(uint64_t));
+ const uint64_t m2 = load64(block + 2 * sizeof(uint64_t));
+ const uint64_t m3 = load64(block + 3 * sizeof(uint64_t));
+ const uint64_t m4 = load64(block + 4 * sizeof(uint64_t));
+ const uint64_t m5 = load64(block + 5 * sizeof(uint64_t));
+ const uint64_t m6 = load64(block + 6 * sizeof(uint64_t));
+ const uint64_t m7 = load64(block + 7 * sizeof(uint64_t));
+ const uint64_t m8 = load64(block + 8 * sizeof(uint64_t));
+ const uint64_t m9 = load64(block + 9 * sizeof(uint64_t));
+ const uint64_t m10 = load64(block + 10 * sizeof(uint64_t));
+ const uint64_t m11 = load64(block + 11 * sizeof(uint64_t));
+ const uint64_t m12 = load64(block + 12 * sizeof(uint64_t));
+ const uint64_t m13 = load64(block + 13 * sizeof(uint64_t));
+ const uint64_t m14 = load64(block + 14 * sizeof(uint64_t));
+ const uint64_t m15 = load64(block + 15 * sizeof(uint64_t));
#endif
row1l = LOADU( &S->h[0] );
row1h = LOADU( &S->h[2] );
diff --git a/sse/blake2s.c b/sse/blake2s.c
index f423d58..d61e587 100644
--- a/sse/blake2s.c
+++ b/sse/blake2s.c
@@ -45,32 +45,12 @@ static const uint32_t blake2s_IV[8] =
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
-static const uint8_t blake2s_sigma[10][16] =
-{
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
- { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
- { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
- { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
- { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
- { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
- { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
- { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
- { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
- { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
-};
-
-
/* Some helper functions */
static void blake2s_set_lastnode( blake2s_state *S )
{
S->f[1] = (uint32_t)-1;
}
-static void blake2s_clear_lastnode( blake2s_state *S )
-{
- S->f[1] = 0;
-}
-
static int blake2s_is_lastblock( const blake2s_state *S )
{
return S->f[0] != 0;
@@ -83,13 +63,6 @@ static void blake2s_set_lastblock( blake2s_state *S )
S->f[0] = (uint32_t)-1;
}
-static void blake2s_clear_lastblock( blake2s_state *S )
-{
- if( S->last_node ) blake2s_clear_lastnode( S );
-
- S->f[0] = 0;
-}
-
static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
{
uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0];
@@ -98,14 +71,6 @@ static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
S->t[1] = ( uint32_t )( t >> 32 );
}
-static void blake2s_init0( blake2s_state *S )
-{
- size_t i;
- memset( S, 0, sizeof( blake2s_state ) );
-
- for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
-}
-
/* init2 xors IV with input parameter block */
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
{
@@ -127,47 +92,49 @@ int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
/* Some sort of default parameter block initialization, for sequential blake2s */
int blake2s_init( blake2s_state *S, size_t outlen )
{
- const blake2s_param P =
- {
- (uint8_t)outlen,
- 0,
- 1,
- 1,
- 0,
- {0},
- 0,
- 0,
- {0},
- {0}
- };
+ blake2s_param P[1];
+
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
- return blake2s_init_param( S, &P );
+
+ P->digest_length = (uint8_t)outlen;
+ P->key_length = 0;
+ P->fanout = 1;
+ P->depth = 1;
+ store32( &P->leaf_length, 0 );
+ store48( &P->node_offset, 0 );
+ P->node_depth = 0;
+ P->inner_length = 0;
+ /* memset(P->reserved, 0, sizeof(P->reserved) ); */
+ memset( P->salt, 0, sizeof( P->salt ) );
+ memset( P->personal, 0, sizeof( P->personal ) );
+
+ return blake2s_init_param( S, P );
}
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
{
- const blake2s_param P =
- {
- (uint8_t)outlen,
- (uint8_t)keylen,
- 1,
- 1,
- 0,
- {0},
- 0,
- 0,
- {0},
- {0}
- };
+ blake2s_param P[1];
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1;
- if( blake2s_init_param( S, &P ) < 0 )
+ P->digest_length = (uint8_t)outlen;
+ P->key_length = (uint8_t)keylen;
+ P->fanout = 1;
+ P->depth = 1;
+ store32( &P->leaf_length, 0 );
+ store48( &P->node_offset, 0 );
+ P->node_depth = 0;
+ P->inner_length = 0;
+ /* memset(P->reserved, 0, sizeof(P->reserved) ); */
+ memset( P->salt, 0, sizeof( P->salt ) );
+ memset( P->personal, 0, sizeof( P->personal ) );
+
+ if( blake2s_init_param( S, P ) < 0 )
return -1;
{
@@ -202,27 +169,27 @@ static void blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOC
const __m128i m2 = LOADU( block + 32 );
const __m128i m3 = LOADU( block + 48 );
#else
- const uint32_t m0 = ( ( uint32_t * )block )[ 0];
- const uint32_t m1 = ( ( uint32_t * )block )[ 1];
- const uint32_t m2 = ( ( uint32_t * )block )[ 2];
- const uint32_t m3 = ( ( uint32_t * )block )[ 3];
- const uint32_t m4 = ( ( uint32_t * )block )[ 4];
- const uint32_t m5 = ( ( uint32_t * )block )[ 5];
- const uint32_t m6 = ( ( uint32_t * )block )[ 6];
- const uint32_t m7 = ( ( uint32_t * )block )[ 7];
- const uint32_t m8 = ( ( uint32_t * )block )[ 8];
- const uint32_t m9 = ( ( uint32_t * )block )[ 9];
- const uint32_t m10 = ( ( uint32_t * )block )[10];
- const uint32_t m11 = ( ( uint32_t * )block )[11];
- const uint32_t m12 = ( ( uint32_t * )block )[12];
- const uint32_t m13 = ( ( uint32_t * )block )[13];
- const uint32_t m14 = ( ( uint32_t * )block )[14];
- const uint32_t m15 = ( ( uint32_t * )block )[15];
+ const uint32_t m0 = load32(block + 0 * sizeof(uint32_t));
+ const uint32_t m1 = load32(block + 1 * sizeof(uint32_t));
+ const uint32_t m2 = load32(block + 2 * sizeof(uint32_t));
+ const uint32_t m3 = load32(block + 3 * sizeof(uint32_t));
+ const uint32_t m4 = load32(block + 4 * sizeof(uint32_t));
+ const uint32_t m5 = load32(block + 5 * sizeof(uint32_t));
+ const uint32_t m6 = load32(block + 6 * sizeof(uint32_t));
+ const uint32_t m7 = load32(block + 7 * sizeof(uint32_t));
+ const uint32_t m8 = load32(block + 8 * sizeof(uint32_t));
+ const uint32_t m9 = load32(block + 9 * sizeof(uint32_t));
+ const uint32_t m10 = load32(block + 10 * sizeof(uint32_t));
+ const uint32_t m11 = load32(block + 11 * sizeof(uint32_t));
+ const uint32_t m12 = load32(block + 12 * sizeof(uint32_t));
+ const uint32_t m13 = load32(block + 13 * sizeof(uint32_t));
+ const uint32_t m14 = load32(block + 14 * sizeof(uint32_t));
+ const uint32_t m15 = load32(block + 15 * sizeof(uint32_t));
#endif
row1 = ff0 = LOADU( &S->h[0] );
row2 = ff1 = LOADU( &S->h[4] );
- row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A );
- row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) );
+ row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] );
+ row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
diff --git a/sse/makefile b/sse/makefile
index a1e7e2a..b23b4a8 100644
--- a/sse/makefile
+++ b/sse/makefile
@@ -1,5 +1,5 @@
CC=gcc
-CFLAGS=-O3 -march=native -I../testvectors
+CFLAGS=-O3 -I../testvectors
all: blake2s blake2b blake2sp blake2bp check