Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/BLAKE2/BLAKE2.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Neves <sneves@dei.uc.pt>2014-01-14 23:02:48 +0400
committerSamuel Neves <sneves@dei.uc.pt>2014-01-14 23:02:48 +0400
commit22a1ce9b2f81115068688989a1c325662e026b52 (patch)
treee828d710477ffb97cf9d4ccfb6122c8c423b2b23
parent802c795b4340ff69e2e14cdd326565d0a8372cf4 (diff)
Use unaligned instructions for non speed-critical memory accesses
-rw-r--r--sse/blake2b.c24
-rw-r--r--sse/blake2s.c10
2 files changed, 17 insertions, 17 deletions
diff --git a/sse/blake2b.c b/sse/blake2b.c
index 526943b..1d5ad24 100644
--- a/sse/blake2b.c
+++ b/sse/blake2b.c
@@ -284,14 +284,14 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2
const uint64_t m14 = ( ( uint64_t * )block )[14];
const uint64_t m15 = ( ( uint64_t * )block )[15];
#endif
- row1l = LOAD( &S->h[0] );
- row1h = LOAD( &S->h[2] );
- row2l = LOAD( &S->h[4] );
- row2h = LOAD( &S->h[6] );
- row3l = LOAD( &blake2b_IV[0] );
- row3h = LOAD( &blake2b_IV[2] );
- row4l = _mm_xor_si128( LOAD( &blake2b_IV[4] ), LOAD( &S->t[0] ) );
- row4h = _mm_xor_si128( LOAD( &blake2b_IV[6] ), LOAD( &S->f[0] ) );
+ row1l = LOADU( &S->h[0] );
+ row1h = LOADU( &S->h[2] );
+ row2l = LOADU( &S->h[4] );
+ row2h = LOADU( &S->h[6] );
+ row3l = LOADU( &blake2b_IV[0] );
+ row3h = LOADU( &blake2b_IV[2] );
+ row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
+ row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
@@ -306,12 +306,12 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2
ROUND( 11 );
row1l = _mm_xor_si128( row3l, row1l );
row1h = _mm_xor_si128( row3h, row1h );
- STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) );
- STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) );
+ STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
+ STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
row2l = _mm_xor_si128( row4l, row2l );
row2h = _mm_xor_si128( row4h, row2h );
- STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) );
- STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) );
+ STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
+ STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
return 0;
}
diff --git a/sse/blake2s.c b/sse/blake2s.c
index 03744ac..9ec2df1 100644
--- a/sse/blake2s.c
+++ b/sse/blake2s.c
@@ -274,10 +274,10 @@ static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2
const uint32_t m14 = ( ( uint32_t * )block )[14];
const uint32_t m15 = ( ( uint32_t * )block )[15];
#endif
- row1 = ff0 = LOAD( &S->h[0] );
- row2 = ff1 = LOAD( &S->h[4] );
+ row1 = ff0 = LOADU( &S->h[0] );
+ row2 = ff1 = LOADU( &S->h[4] );
row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A );
- row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOAD( &S->t[0] ) );
+ row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
@@ -288,8 +288,8 @@ static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
- STORE( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
- STORE( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
+ STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
+ STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
return 0;
}