diff options
-rw-r--r-- | bench/amd64cpuinfo.c | 22 | ||||
-rw-r--r-- | bench/bench.c | 26 | ||||
-rw-r--r-- | bench/do.gplot | 13 | ||||
-rw-r--r-- | bench/makefile | 10 | ||||
-rw-r--r-- | bench/md5.c | 1 | ||||
-rw-r--r-- | bench/x86cpuinfo.c | 21 | ||||
-rw-r--r-- | ref/blake2-impl.h | 120 | ||||
-rw-r--r-- | ref/blake2.h | 107 | ||||
-rw-r--r-- | ref/blake2b-ref.c | 292 | ||||
-rw-r--r-- | ref/blake2bp-ref.c | 142 | ||||
-rw-r--r-- | ref/blake2s-ref.c | 283 | ||||
-rw-r--r-- | ref/blake2sp-ref.c | 142 | ||||
-rw-r--r-- | ref/genkat-c.c | 133 | ||||
-rw-r--r-- | ref/genkat-json.c | 8 | ||||
-rw-r--r-- | ref/makefile | 12 | ||||
-rw-r--r-- | sse/blake2-config.h | 5 | ||||
-rw-r--r-- | sse/blake2-impl.h | 122 | ||||
-rw-r--r-- | sse/blake2.h | 107 | ||||
-rw-r--r-- | sse/blake2b-load-sse2.h | 5 | ||||
-rw-r--r-- | sse/blake2b-load-sse41.h | 5 | ||||
-rw-r--r-- | sse/blake2b-round.h | 5 | ||||
-rw-r--r-- | sse/blake2b.c | 338 | ||||
-rw-r--r-- | sse/blake2bp.c | 144 | ||||
-rw-r--r-- | sse/blake2s-load-sse2.h | 5 | ||||
-rw-r--r-- | sse/blake2s-load-sse41.h | 5 | ||||
-rw-r--r-- | sse/blake2s-load-xop.h | 11 | ||||
-rw-r--r-- | sse/blake2s-round.h | 5 | ||||
-rw-r--r-- | sse/blake2s.c | 330 | ||||
-rw-r--r-- | sse/blake2sp.c | 141 | ||||
-rw-r--r-- | sse/genkat-c.c | 133 | ||||
-rw-r--r-- | sse/genkat-json.c | 8 | ||||
-rw-r--r-- | sse/makefile | 10 | ||||
-rw-r--r-- | testvectors/blake2-kat.h | 23 |
33 files changed, 1238 insertions, 1496 deletions
diff --git a/bench/amd64cpuinfo.c b/bench/amd64cpuinfo.c deleted file mode 100644 index 70ed361..0000000 --- a/bench/amd64cpuinfo.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - BLAKE2 reference source code package - benchmark tool - - Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the - terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at - your option. The terms of these licenses can be found at: - - - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - - OpenSSL license : https://www.openssl.org/source/license.html - - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - - More information about the BLAKE2 hash function can be found at - https://blake2.net. -*/ - -unsigned long long cpucycles( void ) -{ - unsigned long long result; - asm volatile( ".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" - : "=a" ( result ) :: "%rdx" ); - return result; -} diff --git a/bench/bench.c b/bench/bench.c index bc30703..8488728 100644 --- a/bench/bench.c +++ b/bench/bench.c @@ -26,7 +26,31 @@ static int bench_cmp( const void *x, const void *y ) return *ix - *iy; } -unsigned long long cpucycles( void ); +#if defined(__amd64__) || defined(__x86_64__) +static unsigned long long cpucycles( void ) { + unsigned long long result; + __asm__ __volatile__( + ".byte 15;.byte 49\n" + "shlq $32,%%rdx\n" + "orq %%rdx,%%rax\n" + : "=a" ( result ) :: "%rdx" + ); + return result; +} +#elif defined(__i386__) +static unsigned long long cpucycles( void ) { + unsigned long long result; + __asm__ __volatile__( ".byte 15;.byte 49;" : "=A" ( result ) ); + return result; +} +#elif defined(_MSC_VER) +#include <intrin.h> +static unsigned long long cpucycles( void ) { + return __rdtsc(); +} +#else +#error "Don't know how to count cycles on this platform!" +#endif void bench() { diff --git a/bench/do.gplot b/bench/do.gplot index a9a8f6a..6a6832a 100644 --- a/bench/do.gplot +++ b/bench/do.gplot @@ -5,12 +5,15 @@ set ylabel "cycles" set xtics 0,32,maxx set grid set key left -plot "blake2b.data" using 1:2 with lines title "BLAKE2b" -replot "blake2s.data" using 1:2 with lines title "BLAKE2s" -replot "md5.data" using 1:2 with lines title "MD5" -#pause -1 "hit return to continue" + #set terminal png #set output "plotcycles.png" set terminal pdfcairo set output "plotcycles.pdf" -replot + +plot "blake2b.data" using 1:2 with lines title "BLAKE2b" +replot "blake2s.data" using 1:2 with lines title "BLAKE2s" +replot "md5.data" using 1:2 with lines title "MD5" + +set output "plotcycles.pdf" +replot
\ No newline at end of file diff --git a/bench/makefile b/bench/makefile index 183f298..8945dba 100644 --- a/bench/makefile +++ b/bench/makefile @@ -1,12 +1,16 @@ CC=gcc # std to gnu99 to support inline asm -CFLAGS=-std=gnu99 -O3 -march=native -DSUPERCOP # -DHAVE_XOP # uncomment on XOP-enabled CPUs -FILES=amd64cpuinfo.c bench.c +CFLAGS=-O3 -march=native -DSUPERCOP # -DHAVE_XOP # uncomment on XOP-enabled CPUs +FILES=bench.c -all: +all: bench + +bench: bench.c $(CC) $(FILES) $(CFLAGS) ../sse/blake2b.c -o blake2b $(CC) $(FILES) $(CFLAGS) ../sse/blake2s.c -o blake2s $(CC) $(FILES) $(CFLAGS) md5.c -o md5 -lcrypto -lz + +plot: bench ./blake2b > blake2b.data ./blake2s > blake2s.data ./md5 > md5.data diff --git a/bench/md5.c b/bench/md5.c index 1f6164e..3931ab6 100644 --- a/bench/md5.c +++ b/bench/md5.c @@ -14,7 +14,6 @@ */ #include <stddef.h> #include <openssl/md5.h> -//#include "crypto_hash.h" int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen ) { diff --git a/bench/x86cpuinfo.c b/bench/x86cpuinfo.c deleted file mode 100644 index ccc55cc..0000000 --- a/bench/x86cpuinfo.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - BLAKE2 reference source code package - benchmark tool - - Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the - terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at - your option. The terms of these licenses can be found at: - - - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - - OpenSSL license : https://www.openssl.org/source/license.html - - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - - More information about the BLAKE2 hash function can be found at - https://blake2.net. -*/ - -unsigned long long cpucycles( void ) -{ - unsigned long long result; - asm volatile( ".byte 15;.byte 49" : "=A" ( result ) ); - return result; -} diff --git a/ref/blake2-impl.h b/ref/blake2-impl.h index 63a2394..03df0b5 100644 --- a/ref/blake2-impl.h +++ b/ref/blake2-impl.h @@ -12,14 +12,25 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2_IMPL_H__ -#define __BLAKE2_IMPL_H__ +#ifndef BLAKE2_IMPL_H +#define BLAKE2_IMPL_H #include <stdint.h> #include <string.h> -BLAKE2_LOCAL_INLINE(uint32_t) load32( const void *src ) +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline +#endif + +static BLAKE2_INLINE uint32_t load32( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint32_t w; @@ -27,15 +38,14 @@ BLAKE2_LOCAL_INLINE(uint32_t) load32( const void *src ) return w; #else const uint8_t *p = ( const uint8_t * )src; - uint32_t w = *p++; - w |= ( uint32_t )( *p++ ) << 8; - w |= ( uint32_t )( *p++ ) << 16; - w |= ( uint32_t )( *p++ ) << 24; - return w; + return (( uint32_t )( p[0] ) << 0) | + (( uint32_t )( p[1] ) << 8) | + (( uint32_t )( p[2] ) << 16) | + (( uint32_t )( p[3] ) << 24) ; #endif } -BLAKE2_LOCAL_INLINE(uint64_t) load64( const void *src ) +static BLAKE2_INLINE uint64_t load64( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint64_t w; @@ -43,93 +53,81 @@ BLAKE2_LOCAL_INLINE(uint64_t) load64( const void *src ) return w; #else const uint8_t *p = ( const uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - w |= ( uint64_t )( *p++ ) << 48; - w |= ( uint64_t )( *p++ ) << 56; - return w; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) | + (( uint64_t )( p[6] ) << 48) | + (( uint64_t )( p[7] ) << 56) ; #endif } -BLAKE2_LOCAL_INLINE(void) store32( void *dst, uint32_t w ) +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); #else uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); #endif } -BLAKE2_LOCAL_INLINE(void) store64( void *dst, uint64_t w ) +static BLAKE2_INLINE void store64( void *dst, uint64_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); #else uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); + p[6] = (uint8_t)(w >> 48); + p[7] = (uint8_t)(w >> 56); #endif } -BLAKE2_LOCAL_INLINE(uint64_t) load48( const void *src ) +static BLAKE2_INLINE uint64_t load48( const void *src ) { const uint8_t *p = ( const uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - return w; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) ; } -BLAKE2_LOCAL_INLINE(void) store48( void *dst, uint64_t w ) +static BLAKE2_INLINE void store48( void *dst, uint64_t w ) { uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; -} - -BLAKE2_LOCAL_INLINE(uint32_t) rotl32( const uint32_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 32 - c ) ); -} - -BLAKE2_LOCAL_INLINE(uint64_t) rotl64( const uint64_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 64 - c ) ); + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); } -BLAKE2_LOCAL_INLINE(uint32_t) rotr32( const uint32_t w, const unsigned c ) +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 32 - c ) ); } -BLAKE2_LOCAL_INLINE(uint64_t) rotr64( const uint64_t w, const unsigned c ) +static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 64 - c ) ); } /* prevents compiler optimizing out memset() */ -BLAKE2_LOCAL_INLINE(void) secure_zero_memory(void *v, size_t n) +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) { static void *(*const volatile memset_v)(void *, int, size_t) = &memset; memset_v(v, 0, n); diff --git a/ref/blake2.h b/ref/blake2.h index 1a9fdf4..6a3069b 100644 --- a/ref/blake2.h +++ b/ref/blake2.h @@ -12,19 +12,16 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2_H__ -#define __BLAKE2_H__ +#ifndef BLAKE2_H +#define BLAKE2_H #include <stddef.h> #include <stdint.h> -#ifdef BLAKE2_NO_INLINE -#define BLAKE2_LOCAL_INLINE(type) static type -#endif - -#ifndef BLAKE2_LOCAL_INLINE -#define BLAKE2_LOCAL_INLINE(type) static inline type +#if defined(_MSC_VER) +#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop)) +#else +#define BLAKE2_PACKED(x) x __attribute__((packed)) #endif #if defined(__cplusplus) @@ -49,60 +46,65 @@ extern "C" { BLAKE2B_PERSONALBYTES = 16 }; - typedef struct __blake2s_state + typedef struct blake2s_state__ { uint32_t h[8]; uint32_t t[2]; uint32_t f[2]; - uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; + uint8_t buf[BLAKE2S_BLOCKBYTES]; size_t buflen; + size_t outlen; uint8_t last_node; } blake2s_state; - typedef struct __blake2b_state + typedef struct blake2b_state__ { uint64_t h[8]; uint64_t t[2]; uint64_t f[2]; - uint8_t buf[2 * BLAKE2B_BLOCKBYTES]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; size_t buflen; + size_t outlen; uint8_t last_node; } blake2b_state; - typedef struct __blake2sp_state + typedef struct blake2sp_state__ { blake2s_state S[8][1]; blake2s_state R[1]; - uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; - size_t buflen; + uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; } blake2sp_state; - typedef struct __blake2bp_state + typedef struct blake2bp_state__ { blake2b_state S[4][1]; blake2b_state R[1]; - uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; - size_t buflen; + uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; } blake2bp_state; -#pragma pack(push, 1) - typedef struct __blake2s_param + BLAKE2_PACKED(struct blake2s_param__ { uint8_t digest_length; /* 1 */ uint8_t key_length; /* 2 */ uint8_t fanout; /* 3 */ uint8_t depth; /* 4 */ uint32_t leaf_length; /* 8 */ - uint8_t node_offset[6];// 14 + uint8_t node_offset[6];/* 14 */ uint8_t node_depth; /* 15 */ uint8_t inner_length; /* 16 */ /* uint8_t reserved[0]; */ uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */ uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */ - } blake2s_param; + }); - typedef struct __blake2b_param + typedef struct blake2s_param__ blake2s_param; + + BLAKE2_PACKED(struct blake2b_param__ { uint8_t digest_length; /* 1 */ uint8_t key_length; /* 2 */ @@ -115,43 +117,48 @@ extern "C" { uint8_t reserved[14]; /* 32 */ uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ - } blake2b_param; -#pragma pack(pop) + }); + + typedef struct blake2b_param__ blake2b_param; + + /* Padded structs result in a compile-time error */ + enum { + BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES), + BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES) + }; /* Streaming API */ - int blake2s_init( blake2s_state *S, const uint8_t outlen ); - int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2s_init( blake2s_state *S, size_t outlen ); + int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); - int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ); - int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ); + int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, void *out, size_t outlen ); - int blake2b_init( blake2b_state *S, const uint8_t outlen ); - int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2b_init( blake2b_state *S, size_t outlen ); + int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); - int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ); - int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ); + int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final( blake2b_state *S, void *out, size_t outlen ); - int blake2sp_init( blake2sp_state *S, const uint8_t outlen ); - int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); - int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ); - int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen ); + int blake2sp_init( blake2sp_state *S, size_t outlen ); + int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen ); + int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ); - int blake2bp_init( blake2bp_state *S, const uint8_t outlen ); - int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); - int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ); - int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen ); + int blake2bp_init( blake2bp_state *S, size_t outlen ); + int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen ); + int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ); /* Simple API */ - int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); - int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); - int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); - int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); - static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) - { - return blake2b( out, in, key, outlen, inlen, keylen ); - } + /* This is simply an alias for blake2b */ + int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } diff --git a/ref/blake2b-ref.c b/ref/blake2b-ref.c index ac91568..f1c7055 100644 --- a/ref/blake2b-ref.c +++ b/ref/blake2b-ref.c @@ -45,136 +45,63 @@ static const uint8_t blake2b_sigma[12][16] = }; -BLAKE2_LOCAL_INLINE(int) blake2b_set_lastnode( blake2b_state *S ) +static void blake2b_set_lastnode( blake2b_state *S ) { - S->f[1] = -1; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastnode( blake2b_state *S ) -{ - S->f[1] = 0; - return 0; + S->f[1] = (uint64_t)-1; } /* Some helper functions, not necessarily useful */ -BLAKE2_LOCAL_INLINE(int) blake2b_is_lastblock( const blake2b_state *S ) +static int blake2b_is_lastblock( const blake2b_state *S ) { return S->f[0] != 0; } -BLAKE2_LOCAL_INLINE(int) blake2b_set_lastblock( blake2b_state *S ) +static void blake2b_set_lastblock( blake2b_state *S ) { if( S->last_node ) blake2b_set_lastnode( S ); - S->f[0] = -1; - return 0; + S->f[0] = (uint64_t)-1; } -BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastblock( blake2b_state *S ) -{ - if( S->last_node ) blake2b_clear_lastnode( S ); - - S->f[0] = 0; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) { S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); - return 0; -} - - - -/* Parameter-related functions */ -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) -{ - store32( &P->leaf_length, leaf_length ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) -{ - store64( &P->node_offset, node_offset ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); - return 0; } -BLAKE2_LOCAL_INLINE(int) blake2b_init0( blake2b_state *S ) +static void blake2b_init0( blake2b_state *S ) { + size_t i; memset( S, 0, sizeof( blake2b_state ) ); - for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; - - return 0; + for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; } /* init xors IV with input parameter block */ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) { const uint8_t *p = ( const uint8_t * )( P ); + size_t i; blake2b_init0( S ); /* IV XOR ParamBlock */ - for( size_t i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); + S->outlen = P->digest_length; return 0; } -int blake2b_init( blake2b_state *S, const uint8_t outlen ) +int blake2b_init( blake2b_state *S, size_t outlen ) { blake2b_param P[1]; if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - P->digest_length = outlen; + P->digest_length = (uint8_t)outlen; P->key_length = 0; P->fanout = 1; P->depth = 1; @@ -189,7 +116,7 @@ int blake2b_init( blake2b_state *S, const uint8_t outlen ) } -int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) { blake2b_param P[1]; @@ -197,8 +124,8 @@ int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, c if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); @@ -221,39 +148,20 @@ int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, c return 0; } -static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) -{ - uint64_t m[16]; - uint64_t v[16]; - int i; - - for( i = 0; i < 16; ++i ) - m[i] = load64( block + i * sizeof( m[i] ) ); - - for( i = 0; i < 8; ++i ) - v[i] = S->h[i]; - - v[ 8] = blake2b_IV[0]; - v[ 9] = blake2b_IV[1]; - v[10] = blake2b_IV[2]; - v[11] = blake2b_IV[3]; - v[12] = S->t[0] ^ blake2b_IV[4]; - v[13] = S->t[1] ^ blake2b_IV[5]; - v[14] = S->f[0] ^ blake2b_IV[6]; - v[15] = S->f[1] ^ blake2b_IV[7]; -#define G(r,i,a,b,c,d) \ - do { \ +#define G(r,i,a,b,c,d) \ + do { \ a = a + b + m[blake2b_sigma[r][2*i+0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ a = a + b + m[blake2b_sigma[r][2*i+1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ } while(0) -#define ROUND(r) \ - do { \ + +#define ROUND(r) \ + do { \ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ G(r,2,v[ 2],v[ 6],v[10],v[14]); \ @@ -263,6 +171,30 @@ static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCK G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ } while(0) + +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + uint64_t m[16]; + uint64_t v[16]; + size_t i; + + for( i = 0; i < 16; ++i ) { + m[i] = load64( block + i * sizeof( m[i] ) ); + } + + for( i = 0; i < 8; ++i ) { + v[i] = S->h[i]; + } + + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); @@ -276,78 +208,67 @@ static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCK ROUND( 10 ); ROUND( 11 ); - for( i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} #undef G #undef ROUND - return 0; -} -/* inlen now in bytes */ -int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ) +int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { size_t left = S->buflen; - size_t fill = 2 * BLAKE2B_BLOCKBYTES - left; - + size_t fill = BLAKE2B_BLOCKBYTES - left; if( inlen > fill ) { + S->buflen = 0; memcpy( S->buf + left, in, fill ); /* Fill buffer */ - S->buflen += fill; blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); blake2b_compress( S, S->buf ); /* Compress */ - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); /* Shift buffer left */ - S->buflen -= BLAKE2B_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else /* inlen <= fill */ - { - memcpy( S->buf + left, in, inlen ); - S->buflen += inlen; /* Be lazy, do not compress */ - in += inlen; - inlen -= inlen; + in += fill; inlen -= fill; + while(inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress( S, in ); + in += BLAKE2B_BLOCKBYTES; + inlen -= BLAKE2B_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -/* Is this correct? */ -int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ) +int blake2b_final( blake2b_state *S, void *out, size_t outlen ) { uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + size_t i; - if( out == NULL || outlen == 0 || outlen > BLAKE2B_OUTBYTES ) + if( out == NULL || outlen < S->outlen ) return -1; if( blake2b_is_lastblock( S ) ) return -1; - if( S->buflen > BLAKE2B_BLOCKBYTES ) - { - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); - S->buflen -= BLAKE2B_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); - } - blake2b_increment_counter( S, S->buflen ); blake2b_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ blake2b_compress( S, S->buf ); - for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - memcpy( out, buffer, outlen ); + memcpy( out, buffer, S->outlen ); + secure_zero_memory(buffer, sizeof(buffer)); return 0; } /* inlen, at least, should be uint64_t. Others can be size_t. */ -int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2b_state S[1]; @@ -376,41 +297,82 @@ int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen return 0; } +int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { + return blake2b(out, outlen, in, inlen, key, keylen); +} + #if defined(SUPERCOP) int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) { - return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); + return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 ); } #endif #if defined(BLAKE2B_SELFTEST) #include <string.h> #include "blake2-kat.h" -int main( int argc, char **argv ) +int main( void ) { uint8_t key[BLAKE2B_KEYBYTES]; - uint8_t buf[KAT_LENGTH]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; - for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) + for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) key[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) buf[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2B_OUTBYTES]; - blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); + blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ); if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) { - puts( "error" ); - return -1; + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2b_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2b_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) { + goto fail; + } } } puts( "ok" ); return 0; +fail: + puts("error"); + return -1; } #endif diff --git a/ref/blake2bp-ref.c b/ref/blake2bp-ref.c index 28855e6..8bbe305 100644 --- a/ref/blake2bp-ref.c +++ b/ref/blake2bp-ref.c @@ -27,11 +27,11 @@ #define PARALLELISM_DEGREE 4 -BLAKE2_LOCAL_INLINE(int) blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) +static int blake2bp_init_leaf( blake2b_state *S, size_t outlen, size_t keylen, uint64_t offset ) { blake2b_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; store32( &P->leaf_length, 0 ); @@ -44,11 +44,11 @@ BLAKE2_LOCAL_INLINE(int) blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, u return blake2b_init_param( S, P ); } -BLAKE2_LOCAL_INLINE(int) blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen ) +static int blake2bp_init_root( blake2b_state *S, size_t outlen, size_t keylen ) { blake2b_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; store32( &P->leaf_length, 0 ); @@ -62,17 +62,20 @@ BLAKE2_LOCAL_INLINE(int) blake2bp_init_root( blake2b_state *S, uint8_t outlen, u } -int blake2bp_init( blake2bp_state *S, const uint8_t outlen ) +int blake2bp_init( blake2bp_state *S, size_t outlen ) { + size_t i; + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; if( blake2bp_init_root( S->R, outlen, 0 ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1; S->R->last_node = 1; @@ -80,19 +83,22 @@ int blake2bp_init( blake2bp_state *S, const uint8_t outlen ) return 0; } -int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ) { + size_t i; + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; if( blake2bp_init_root( S->R, outlen, keylen ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1; S->R->last_node = 1; @@ -102,7 +108,7 @@ int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, memset( block, 0, BLAKE2B_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES ); secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ @@ -111,16 +117,18 @@ int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, } -int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ) +int blake2bp_update( blake2bp_state *S, const void *pin, size_t inlen ) { + const unsigned char * in = (const unsigned char *)pin; size_t left = S->buflen; size_t fill = sizeof( S->buf ) - left; + size_t i; if( left && inlen >= fill ) { memcpy( S->buf + left, in, fill ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); in += fill; @@ -132,19 +140,19 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ) #pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif - uint64_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2B_BLOCKBYTES; + size_t inlen__ = inlen; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2B_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) { - blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES ); + blake2b_update( S->S[i], in__, BLAKE2B_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; } @@ -160,11 +168,16 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ) return 0; } -int blake2bp_final( blake2bp_state *S, uint8_t *out, const uint8_t outlen ) +int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; + size_t i; + + if(out == NULL || outlen < S->outlen) { + return -1; + } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) { if( S->buflen > i * BLAKE2B_BLOCKBYTES ) { @@ -178,18 +191,18 @@ int blake2bp_final( blake2bp_state *S, uint8_t *out, const uint8_t outlen ) blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES ); } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES ); - - return blake2b_final( S->R, out, outlen ); + return blake2b_final( S->R, out, S->outlen ); } -int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen ) +int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; blake2b_state S[PARALLELISM_DEGREE][1]; blake2b_state FS[1]; + size_t i; /* Verify parameters */ if ( NULL == in && inlen > 0 ) return -1; @@ -202,7 +215,7 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin if( keylen > BLAKE2B_KEYBYTES ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1; S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */ @@ -213,7 +226,7 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin memset( block, 0, BLAKE2B_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES ); secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ @@ -223,31 +236,31 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif - uint64_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2B_BLOCKBYTES; + size_t inlen__ = inlen; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2B_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) { - blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES ); + blake2b_update( S[i], in__, BLAKE2B_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; } - if( inlen__ > id__ * BLAKE2B_BLOCKBYTES ) + if( inlen__ > i * BLAKE2B_BLOCKBYTES ) { - const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES; + const size_t left = inlen__ - i * BLAKE2B_BLOCKBYTES; const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES; - blake2b_update( S[id__], in__, len ); + blake2b_update( S[i], in__, len ); } - blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES ); + blake2b_final( S[i], hash[i], BLAKE2B_OUTBYTES ); } if( blake2bp_init_root( FS, outlen, keylen ) < 0 ) @@ -255,7 +268,7 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin FS->last_node = 1; /* Mark as last node */ - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES ); return blake2b_final( FS, out, outlen );; @@ -264,30 +277,67 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin #if defined(BLAKE2BP_SELFTEST) #include <string.h> #include "blake2-kat.h" -int main( int argc, char **argv ) +int main( void ) { uint8_t key[BLAKE2B_KEYBYTES]; - uint8_t buf[KAT_LENGTH]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; - for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) + for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) key[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) buf[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2B_OUTBYTES]; - blake2bp( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); + blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ); if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) ) { - puts( "error" ); - return -1; + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2bp_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2bp_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2bp_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2bp_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2bp_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES)) { + goto fail; + } } } puts( "ok" ); return 0; +fail: + puts("error"); + return -1; } #endif diff --git a/ref/blake2s-ref.c b/ref/blake2s-ref.c index 0e246c3..b570fd9 100644 --- a/ref/blake2s-ref.c +++ b/ref/blake2s-ref.c @@ -40,135 +40,64 @@ static const uint8_t blake2s_sigma[10][16] = { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , }; -BLAKE2_LOCAL_INLINE(int) blake2s_set_lastnode( blake2s_state *S ) +static void blake2s_set_lastnode( blake2s_state *S ) { - S->f[1] = -1; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastnode( blake2s_state *S ) -{ - S->f[1] = 0; - return 0; + S->f[1] = (uint32_t)-1; } /* Some helper functions, not necessarily useful */ -BLAKE2_LOCAL_INLINE(int) blake2s_is_lastblock( const blake2s_state *S ) +static int blake2s_is_lastblock( const blake2s_state *S ) { return S->f[0] != 0; } -BLAKE2_LOCAL_INLINE(int) blake2s_set_lastblock( blake2s_state *S ) +static void blake2s_set_lastblock( blake2s_state *S ) { if( S->last_node ) blake2s_set_lastnode( S ); - S->f[0] = -1; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_clear_lastnode( S ); - - S->f[0] = 0; - return 0; + S->f[0] = (uint32_t)-1; } -BLAKE2_LOCAL_INLINE(int) blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) { S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); - return 0; -} - -/* Parameter-related functions */ -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) -{ - store32( &P->leaf_length, leaf_length ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) -{ - store48( P->node_offset, node_offset ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); - return 0; } -BLAKE2_LOCAL_INLINE(int) blake2s_init0( blake2s_state *S ) +static void blake2s_init0( blake2s_state *S ) { + size_t i; memset( S, 0, sizeof( blake2s_state ) ); - for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; - - return 0; + for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; } /* init2 xors IV with input parameter block */ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) { - const uint32_t *p = ( const uint32_t * )( P ); + const unsigned char *p = ( const unsigned char * )( P ); + size_t i; blake2s_init0( S ); /* IV XOR ParamBlock */ - for( size_t i = 0; i < 8; ++i ) - S->h[i] ^= load32( &p[i] ); + for( i = 0; i < 8; ++i ) + S->h[i] ^= load32( &p[i * 4] ); + S->outlen = P->digest_length; return 0; } /* Sequential blake2s initialization */ -int blake2s_init( blake2s_state *S, const uint8_t outlen ) +int blake2s_init( blake2s_state *S, size_t outlen ) { blake2s_param P[1]; /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - P->digest_length = outlen; + P->digest_length = (uint8_t)outlen; P->key_length = 0; P->fanout = 1; P->depth = 1; @@ -182,7 +111,7 @@ int blake2s_init( blake2s_state *S, const uint8_t outlen ) return blake2s_init_param( S, P ); } -int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) { blake2s_param P[1]; @@ -190,8 +119,8 @@ int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, c if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); @@ -214,16 +143,43 @@ int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, c return 0; } -static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + +static void blake2s_compress( blake2s_state *S, const uint8_t in[BLAKE2S_BLOCKBYTES] ) { uint32_t m[16]; uint32_t v[16]; + size_t i; - for( size_t i = 0; i < 16; ++i ) - m[i] = load32( block + i * sizeof( m[i] ) ); + for( i = 0; i < 16; ++i ) { + m[i] = load32( in + i * sizeof( m[i] ) ); + } - for( size_t i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { v[i] = S->h[i]; + } v[ 8] = blake2s_IV[0]; v[ 9] = blake2s_IV[1]; @@ -233,28 +189,7 @@ static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCK v[13] = S->t[1] ^ blake2s_IV[5]; v[14] = S->f[0] ^ blake2s_IV[6]; v[15] = S->f[1] ^ blake2s_IV[7]; -#define G(r,i,a,b,c,d) \ - do { \ - a = a + b + m[blake2s_sigma[r][2*i+0]]; \ - d = rotr32(d ^ a, 16); \ - c = c + d; \ - b = rotr32(b ^ c, 12); \ - a = a + b + m[blake2s_sigma[r][2*i+1]]; \ - d = rotr32(d ^ a, 8); \ - c = c + d; \ - b = rotr32(b ^ c, 7); \ - } while(0) -#define ROUND(r) \ - do { \ - G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ - } while(0) + ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); @@ -266,77 +201,66 @@ static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCK ROUND( 8 ); ROUND( 9 ); - for( size_t i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} #undef G #undef ROUND - return 0; -} - -int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ) +int blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { size_t left = S->buflen; - size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; - + size_t fill = BLAKE2S_BLOCKBYTES - left; if( inlen > fill ) { + S->buflen = 0; memcpy( S->buf + left, in, fill ); /* Fill buffer */ - S->buflen += fill; blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); blake2s_compress( S, S->buf ); /* Compress */ - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); /* Shift buffer left */ - S->buflen -= BLAKE2S_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else /* inlen <= fill */ - { - memcpy( S->buf + left, in, inlen ); - S->buflen += inlen; /* Be lazy, do not compress */ - in += inlen; - inlen -= inlen; + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ) +int blake2s_final( blake2s_state *S, void *out, size_t outlen ) { uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; + size_t i; - if( out == NULL || outlen == 0 || outlen > BLAKE2S_OUTBYTES ) + if( out == NULL || outlen < S->outlen ) return -1; if( blake2s_is_lastblock( S ) ) return -1; - - if( S->buflen > BLAKE2S_BLOCKBYTES ) - { - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); - S->buflen -= BLAKE2S_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); - } - blake2s_increment_counter( S, ( uint32_t )S->buflen ); blake2s_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ blake2s_compress( S, S->buf ); - for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - + memcpy( out, buffer, outlen ); + secure_zero_memory(buffer, sizeof(buffer)); return 0; } -int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2s_state S[1]; @@ -368,38 +292,75 @@ int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen #if defined(SUPERCOP) int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) { - return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, inlen, 0 ); + return blake2s( out, BLAKE2S_OUTBYTES in, inlen, NULL, 0 ); } #endif #if defined(BLAKE2S_SELFTEST) #include <string.h> #include "blake2-kat.h" -int main( int argc, char **argv ) +int main( void ) { uint8_t key[BLAKE2S_KEYBYTES]; - uint8_t buf[KAT_LENGTH]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; - for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) key[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) buf[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2S_OUTBYTES]; - blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ); + blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) { - puts( "error" ); - return -1; + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2s_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2s_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2s_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } } } puts( "ok" ); return 0; +fail: + puts("error"); + return -1; } #endif diff --git a/ref/blake2sp-ref.c b/ref/blake2sp-ref.c index 3775a2d..8c617ad 100644 --- a/ref/blake2sp-ref.c +++ b/ref/blake2sp-ref.c @@ -26,11 +26,11 @@ #define PARALLELISM_DEGREE 8 -BLAKE2_LOCAL_INLINE(int) blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) +static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset ) { blake2s_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; store32( &P->leaf_length, 0 ); @@ -42,11 +42,11 @@ BLAKE2_LOCAL_INLINE(int) blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, u return blake2s_init_param( S, P ); } -BLAKE2_LOCAL_INLINE(int) blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen ) +static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen ) { blake2s_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; store32( &P->leaf_length, 0 ); @@ -59,17 +59,20 @@ BLAKE2_LOCAL_INLINE(int) blake2sp_init_root( blake2s_state *S, uint8_t outlen, u } -int blake2sp_init( blake2sp_state *S, const uint8_t outlen ) +int blake2sp_init( blake2sp_state *S, size_t outlen ) { + size_t i; + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; if( blake2sp_init_root( S->R, outlen, 0 ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1; S->R->last_node = 1; @@ -77,19 +80,22 @@ int blake2sp_init( blake2sp_state *S, const uint8_t outlen ) return 0; } -int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ) { + size_t i; + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; if( blake2sp_init_root( S->R, outlen, keylen ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1; S->R->last_node = 1; @@ -99,7 +105,7 @@ int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ @@ -108,16 +114,18 @@ int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, } -int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ) +int blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen ) { + const unsigned char * in = (const unsigned char *)pin; size_t left = S->buflen; size_t fill = sizeof( S->buf ) - left; + size_t i; if( left && inlen >= fill ) { memcpy( S->buf + left, in, fill ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); in += fill; @@ -128,20 +136,19 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ) #if defined(_OPENMP) #pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE) #else - - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif - uint64_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2S_BLOCKBYTES; + size_t inlen__ = inlen; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { - blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES ); + blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } @@ -158,11 +165,16 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ) } -int blake2sp_final( blake2sp_state *S, uint8_t *out, const uint8_t outlen ) +int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; + size_t i; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + if(out == NULL || outlen < S->outlen) { + return -1; + } + + for( i = 0; i < PARALLELISM_DEGREE; ++i ) { if( S->buflen > i * BLAKE2S_BLOCKBYTES ) { @@ -176,18 +188,19 @@ int blake2sp_final( blake2sp_state *S, uint8_t *out, const uint8_t outlen ) blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES ); } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES ); - return blake2s_final( S->R, out, outlen ); + return blake2s_final( S->R, out, S->outlen ); } -int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen ) +int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; blake2s_state S[PARALLELISM_DEGREE][1]; blake2s_state FS[1]; + size_t i; /* Verify parameters */ if ( NULL == in && inlen > 0 ) return -1; @@ -200,7 +213,7 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin if( keylen > BLAKE2S_KEYBYTES ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1; S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */ @@ -211,7 +224,7 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ @@ -221,31 +234,31 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif - uint64_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2S_BLOCKBYTES; + size_t inlen__ = inlen; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { - blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES ); + blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } - if( inlen__ > id__ * BLAKE2S_BLOCKBYTES ) + if( inlen__ > i * BLAKE2S_BLOCKBYTES ) { - const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES; + const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES; const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES; - blake2s_update( S[id__], in__, len ); + blake2s_update( S[i], in__, len ); } - blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES ); + blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES ); } if( blake2sp_init_root( FS, outlen, keylen ) < 0 ) @@ -253,7 +266,7 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin FS->last_node = 1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES ); return blake2s_final( FS, out, outlen ); @@ -264,31 +277,68 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin #if defined(BLAKE2SP_SELFTEST) #include <string.h> #include "blake2-kat.h" -int main( int argc, char **argv ) +int main( void ) { uint8_t key[BLAKE2S_KEYBYTES]; - uint8_t buf[KAT_LENGTH]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; - for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) key[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) buf[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2S_OUTBYTES]; - blake2sp( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ); + blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) ) { - puts( "error" ); - return -1; + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2sp_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2sp_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2sp_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } } } puts( "ok" ); return 0; +fail: + puts("error"); + return -1; } #endif diff --git a/ref/genkat-c.c b/ref/genkat-c.c index a30ddf3..1525f8d 100644 --- a/ref/genkat-c.c +++ b/ref/genkat-c.c @@ -28,11 +28,11 @@ #define MAKE_KAT(name,size_prefix) \ do \ { \ - printf( "static const uint8_t " #name "_kat[KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \ + printf( "static const uint8_t " #name "_kat[BLAKE2_KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \ \ for( size_t i = 0; i < LENGTH; ++i ) \ { \ - name( hash, in, NULL, size_prefix ## _OUTBYTES, i, 0 ); \ + name( hash, size_prefix ## _OUTBYTES, in, i, NULL, 0 ); \ printf( "\t{\n\t\t" ); \ \ for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \ @@ -48,11 +48,11 @@ do \ #define MAKE_KEYED_KAT(name,size_prefix) \ do \ { \ - printf( "static const uint8_t " #name "_keyed_kat[KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \ + printf( "static const uint8_t " #name "_keyed_kat[BLAKE2_KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \ \ for( size_t i = 0; i < LENGTH; ++i ) \ { \ - name( hash, in, key, size_prefix ## _OUTBYTES, i, size_prefix ## _KEYBYTES ); \ + name( hash, size_prefix ## _OUTBYTES, in, i, key, size_prefix ## _KEYBYTES ); \ printf( "\t{\n\t\t" ); \ \ for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \ @@ -78,11 +78,10 @@ int main( int argc, char **argv ) for( size_t i = 0; i < sizeof( key ); ++i ) key[i] = i; - puts( "#pragma once\n" - "#ifndef __BLAKE2_KAT_H__\n" - "#define __BLAKE2_KAT_H__\n\n\n" + puts( "#ifndef BLAKE2_KAT_H\n" + "#define BLAKE2_KAT_H\n\n\n" "#include <stdint.h>\n\n" - "#define KAT_LENGTH " STR( LENGTH ) "\n\n\n" ); + "#define BLAKE2_KAT_LENGTH " STR( LENGTH ) "\n\n\n" ); MAKE_KAT( blake2s, BLAKE2S ); MAKE_KEYED_KAT( blake2s, BLAKE2S ); MAKE_KAT( blake2b, BLAKE2B ); @@ -91,122 +90,6 @@ int main( int argc, char **argv ) MAKE_KEYED_KAT( blake2sp, BLAKE2S ); MAKE_KAT( blake2bp, BLAKE2B ); MAKE_KEYED_KAT( blake2bp, BLAKE2B ); - /*printf( "static const uint8_t blake2s_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2s( hash, in, NULL, BLAKE2S_OUTBYTES, i, 0 ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2S_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2s_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2s( hash, in, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2S_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2b_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2b( hash, in, NULL, BLAKE2B_OUTBYTES, i, 0 ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2B_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2b_keyed_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2b( hash, in, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2B_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - - - printf( "static const uint8_t blake2sp_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2sp( hash, in, NULL, BLAKE2S_OUTBYTES, i, 0 ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2S_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2sp_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2sp( hash, in, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2S_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - - - printf( "static const uint8_t blake2bp_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2bp( hash, in, NULL, BLAKE2B_OUTBYTES, i, 0 ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2B_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2bp_keyed_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2bp( hash, in, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2B_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" );*/ - puts( "#endif\n\n\n" ); + puts( "#endif" ); return 0; } diff --git a/ref/genkat-json.c b/ref/genkat-json.c index 81a5cbe..f230928 100644 --- a/ref/genkat-json.c +++ b/ref/genkat-json.c @@ -40,7 +40,7 @@ do \ printf(" \"key\": \"\",\n");\ printf(" \"out\": \"");\ \ - name( hash, in, NULL, size_prefix ## _OUTBYTES, i, 0 ); \ + name( hash, size_prefix ## _OUTBYTES, in, i, NULL, 0 ); \ \ for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \ printf( "%02x", hash[j]);\ @@ -66,7 +66,7 @@ do \ printf("\",\n");\ printf(" \"out\": \"");\ \ - name( hash, in, key, size_prefix ## _OUTBYTES, i, size_prefix ## _KEYBYTES ); \ + name( hash, size_prefix ## _OUTBYTES, in, i, key, size_prefix ## _KEYBYTES ); \ \ for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \ printf( "%02x", hash[j]);\ @@ -88,7 +88,7 @@ int main( int argc, char **argv ) for( size_t i = 0; i < sizeof( key ); ++i ) key[i] = i; - printf("["); + printf("["); MAKE_KAT( blake2s, BLAKE2S ); MAKE_KEYED_KAT( blake2s, BLAKE2S ); MAKE_KAT( blake2b, BLAKE2B ); @@ -97,7 +97,7 @@ int main( int argc, char **argv ) MAKE_KEYED_KAT( blake2sp, BLAKE2S ); MAKE_KAT( blake2bp, BLAKE2B ); MAKE_KEYED_KAT( blake2bp, BLAKE2B ); - printf("\n]\n"); + printf("\n]\n"); fflush(stdout); return 0; } diff --git a/ref/makefile b/ref/makefile index baa6c0f..7815544 100644 --- a/ref/makefile +++ b/ref/makefile @@ -1,7 +1,7 @@ CC=gcc -CFLAGS=-O2 -Wall -I../testvectors +CFLAGS=-O2 -I../testvectors -all: blake2s blake2b blake2sp blake2bp +all: blake2s blake2b blake2sp blake2bp check blake2s: blake2s-ref.c $(CC) blake2s-ref.c -o $@ $(CFLAGS) -DBLAKE2S_SELFTEST @@ -15,11 +15,17 @@ blake2sp: blake2sp-ref.c blake2s-ref.c blake2bp: blake2bp-ref.c blake2b-ref.c $(CC) blake2bp-ref.c blake2b-ref.c -o $@ $(CFLAGS) -DBLAKE2BP_SELFTEST +check: blake2s blake2b blake2sp blake2bp + ./blake2s + ./blake2b + ./blake2sp + ./blake2bp + kat: $(CC) $(CFLAGS) -o genkat-c genkat-c.c blake2b-ref.c blake2s-ref.c blake2sp-ref.c blake2bp-ref.c $(CC) $(CFLAGS) -g -o genkat-json genkat-json.c blake2b-ref.c blake2s-ref.c blake2sp-ref.c blake2bp-ref.c ./genkat-c > blake2-kat.h ./genkat-json > blake2-kat.json -clean: +clean: rm -rf *.o genkat-c genkat-json blake2s blake2b blake2sp blake2bp diff --git a/sse/blake2-config.h b/sse/blake2-config.h index 40455b1..e3b69e8 100644 --- a/sse/blake2-config.h +++ b/sse/blake2-config.h @@ -12,9 +12,8 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2_CONFIG_H__ -#define __BLAKE2_CONFIG_H__ +#ifndef BLAKE2_CONFIG_H +#define BLAKE2_CONFIG_H /* These don't work everywhere */ #if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__) diff --git a/sse/blake2-impl.h b/sse/blake2-impl.h index bbe3c0f..03df0b5 100644 --- a/sse/blake2-impl.h +++ b/sse/blake2-impl.h @@ -1,5 +1,5 @@ /* - BLAKE2 reference source code package - optimized C implementations + BLAKE2 reference source code package - reference C implementations Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at @@ -12,14 +12,25 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2_IMPL_H__ -#define __BLAKE2_IMPL_H__ +#ifndef BLAKE2_IMPL_H +#define BLAKE2_IMPL_H #include <stdint.h> #include <string.h> -BLAKE2_LOCAL_INLINE(uint32_t) load32( const void *src ) +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline +#endif + +static BLAKE2_INLINE uint32_t load32( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint32_t w; @@ -27,15 +38,14 @@ BLAKE2_LOCAL_INLINE(uint32_t) load32( const void *src ) return w; #else const uint8_t *p = ( const uint8_t * )src; - uint32_t w = *p++; - w |= ( uint32_t )( *p++ ) << 8; - w |= ( uint32_t )( *p++ ) << 16; - w |= ( uint32_t )( *p++ ) << 24; - return w; + return (( uint32_t )( p[0] ) << 0) | + (( uint32_t )( p[1] ) << 8) | + (( uint32_t )( p[2] ) << 16) | + (( uint32_t )( p[3] ) << 24) ; #endif } -BLAKE2_LOCAL_INLINE(uint64_t) load64( const void *src ) +static BLAKE2_INLINE uint64_t load64( const void *src ) { #if defined(NATIVE_LITTLE_ENDIAN) uint64_t w; @@ -43,93 +53,81 @@ BLAKE2_LOCAL_INLINE(uint64_t) load64( const void *src ) return w; #else const uint8_t *p = ( const uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - w |= ( uint64_t )( *p++ ) << 48; - w |= ( uint64_t )( *p++ ) << 56; - return w; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) | + (( uint64_t )( p[6] ) << 48) | + (( uint64_t )( p[7] ) << 56) ; #endif } -BLAKE2_LOCAL_INLINE(void) store32( void *dst, uint32_t w ) +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); #else uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); #endif } -BLAKE2_LOCAL_INLINE(void) store64( void *dst, uint64_t w ) +static BLAKE2_INLINE void store64( void *dst, uint64_t w ) { #if defined(NATIVE_LITTLE_ENDIAN) memcpy(dst, &w, sizeof w); #else uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); + p[6] = (uint8_t)(w >> 48); + p[7] = (uint8_t)(w >> 56); #endif } -BLAKE2_LOCAL_INLINE(uint64_t) load48( const void *src ) +static BLAKE2_INLINE uint64_t load48( const void *src ) { const uint8_t *p = ( const uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - return w; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) ; } -BLAKE2_LOCAL_INLINE(void) store48( void *dst, uint64_t w ) +static BLAKE2_INLINE void store48( void *dst, uint64_t w ) { uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; -} - -BLAKE2_LOCAL_INLINE(uint32_t) rotl32( const uint32_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 32 - c ) ); -} - -BLAKE2_LOCAL_INLINE(uint64_t) rotl64( const uint64_t w, const unsigned c ) -{ - return ( w << c ) | ( w >> ( 64 - c ) ); + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); } -BLAKE2_LOCAL_INLINE(uint32_t) rotr32( const uint32_t w, const unsigned c ) +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 32 - c ) ); } -BLAKE2_LOCAL_INLINE(uint64_t) rotr64( const uint64_t w, const unsigned c ) +static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 64 - c ) ); } /* prevents compiler optimizing out memset() */ -BLAKE2_LOCAL_INLINE(void) secure_zero_memory(void *v, size_t n) +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) { static void *(*const volatile memset_v)(void *, int, size_t) = &memset; memset_v(v, 0, n); diff --git a/sse/blake2.h b/sse/blake2.h index 1a9fdf4..6a3069b 100644 --- a/sse/blake2.h +++ b/sse/blake2.h @@ -12,19 +12,16 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2_H__ -#define __BLAKE2_H__ +#ifndef BLAKE2_H +#define BLAKE2_H #include <stddef.h> #include <stdint.h> -#ifdef BLAKE2_NO_INLINE -#define BLAKE2_LOCAL_INLINE(type) static type -#endif - -#ifndef BLAKE2_LOCAL_INLINE -#define BLAKE2_LOCAL_INLINE(type) static inline type +#if defined(_MSC_VER) +#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop)) +#else +#define BLAKE2_PACKED(x) x __attribute__((packed)) #endif #if defined(__cplusplus) @@ -49,60 +46,65 @@ extern "C" { BLAKE2B_PERSONALBYTES = 16 }; - typedef struct __blake2s_state + typedef struct blake2s_state__ { uint32_t h[8]; uint32_t t[2]; uint32_t f[2]; - uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; + uint8_t buf[BLAKE2S_BLOCKBYTES]; size_t buflen; + size_t outlen; uint8_t last_node; } blake2s_state; - typedef struct __blake2b_state + typedef struct blake2b_state__ { uint64_t h[8]; uint64_t t[2]; uint64_t f[2]; - uint8_t buf[2 * BLAKE2B_BLOCKBYTES]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; size_t buflen; + size_t outlen; uint8_t last_node; } blake2b_state; - typedef struct __blake2sp_state + typedef struct blake2sp_state__ { blake2s_state S[8][1]; blake2s_state R[1]; - uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; - size_t buflen; + uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; } blake2sp_state; - typedef struct __blake2bp_state + typedef struct blake2bp_state__ { blake2b_state S[4][1]; blake2b_state R[1]; - uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; - size_t buflen; + uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; } blake2bp_state; -#pragma pack(push, 1) - typedef struct __blake2s_param + BLAKE2_PACKED(struct blake2s_param__ { uint8_t digest_length; /* 1 */ uint8_t key_length; /* 2 */ uint8_t fanout; /* 3 */ uint8_t depth; /* 4 */ uint32_t leaf_length; /* 8 */ - uint8_t node_offset[6];// 14 + uint8_t node_offset[6];/* 14 */ uint8_t node_depth; /* 15 */ uint8_t inner_length; /* 16 */ /* uint8_t reserved[0]; */ uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */ uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */ - } blake2s_param; + }); - typedef struct __blake2b_param + typedef struct blake2s_param__ blake2s_param; + + BLAKE2_PACKED(struct blake2b_param__ { uint8_t digest_length; /* 1 */ uint8_t key_length; /* 2 */ @@ -115,43 +117,48 @@ extern "C" { uint8_t reserved[14]; /* 32 */ uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ - } blake2b_param; -#pragma pack(pop) + }); + + typedef struct blake2b_param__ blake2b_param; + + /* Padded structs result in a compile-time error */ + enum { + BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES), + BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES) + }; /* Streaming API */ - int blake2s_init( blake2s_state *S, const uint8_t outlen ); - int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2s_init( blake2s_state *S, size_t outlen ); + int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); - int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ); - int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ); + int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, void *out, size_t outlen ); - int blake2b_init( blake2b_state *S, const uint8_t outlen ); - int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); + int blake2b_init( blake2b_state *S, size_t outlen ); + int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); - int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ); - int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ); + int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final( blake2b_state *S, void *out, size_t outlen ); - int blake2sp_init( blake2sp_state *S, const uint8_t outlen ); - int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); - int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ); - int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen ); + int blake2sp_init( blake2sp_state *S, size_t outlen ); + int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen ); + int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ); - int blake2bp_init( blake2bp_state *S, const uint8_t outlen ); - int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ); - int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ); - int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen ); + int blake2bp_init( blake2bp_state *S, size_t outlen ); + int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen ); + int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ); /* Simple API */ - int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); - int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); - int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); - int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ); + int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); - static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) - { - return blake2b( out, in, key, outlen, inlen, keylen ); - } + /* This is simply an alias for blake2b */ + int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } diff --git a/sse/blake2b-load-sse2.h b/sse/blake2b-load-sse2.h index 0004a98..1ca4918 100644 --- a/sse/blake2b-load-sse2.h +++ b/sse/blake2b-load-sse2.h @@ -12,9 +12,8 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2B_LOAD_SSE2_H__ -#define __BLAKE2B_LOAD_SSE2_H__ +#ifndef BLAKE2B_LOAD_SSE2_H +#define BLAKE2B_LOAD_SSE2_H #define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) #define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) diff --git a/sse/blake2b-load-sse41.h b/sse/blake2b-load-sse41.h index 42a1349..4cd81dc 100644 --- a/sse/blake2b-load-sse41.h +++ b/sse/blake2b-load-sse41.h @@ -12,9 +12,8 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2B_LOAD_SSE41_H__ -#define __BLAKE2B_LOAD_SSE41_H__ +#ifndef BLAKE2B_LOAD_SSE41_H +#define BLAKE2B_LOAD_SSE41_H #define LOAD_MSG_0_1(b0, b1) \ do \ diff --git a/sse/blake2b-round.h b/sse/blake2b-round.h index 4ce2255..5a01dc3 100644 --- a/sse/blake2b-round.h +++ b/sse/blake2b-round.h @@ -12,9 +12,8 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2B_ROUND_H__ -#define __BLAKE2B_ROUND_H__ +#ifndef BLAKE2B_ROUND_H +#define BLAKE2B_ROUND_H #define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) diff --git a/sse/blake2b.c b/sse/blake2b.c index f9090a1..e1f0edd 100644 --- a/sse/blake2b.c +++ b/sse/blake2b.c @@ -49,199 +49,91 @@ static const uint64_t blake2b_IV[8] = 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL }; -static const uint8_t blake2b_sigma[12][16] = +/* Some helper functions */ +static void blake2b_set_lastnode( blake2b_state *S ) { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } -}; - - -/* Some helper functions, not necessarily useful */ -BLAKE2_LOCAL_INLINE(int) blake2b_set_lastnode( blake2b_state *S ) -{ - S->f[1] = -1; - return 0; + S->f[1] = (uint64_t)-1; } -BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastnode( blake2b_state *S ) -{ - S->f[1] = 0; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_is_lastblock( const blake2b_state *S ) +static int blake2b_is_lastblock( const blake2b_state *S ) { return S->f[0] != 0; } -BLAKE2_LOCAL_INLINE(int) blake2b_set_lastblock( blake2b_state *S ) +static void blake2b_set_lastblock( blake2b_state *S ) { if( S->last_node ) blake2b_set_lastnode( S ); - S->f[0] = -1; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_clear_lastblock( blake2b_state *S ) -{ - if( S->last_node ) blake2b_clear_lastnode( S ); - - S->f[0] = 0; - return 0; + S->f[0] = (uint64_t)-1; } - -BLAKE2_LOCAL_INLINE(int) blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) { -#if __x86_64__ - /* ADD/ADC chain */ - __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0]; - t += inc; - S->t[0] = ( uint64_t )( t >> 0 ); - S->t[1] = ( uint64_t )( t >> 64 ); -#else S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); -#endif - return 0; -} - - -/* Parameter-related functions */ -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) -{ - P->leaf_length = leaf_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) -{ - P->node_offset = node_offset; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2b_init0( blake2b_state *S ) -{ - memset( S, 0, sizeof( blake2b_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; - - return 0; } /* init xors IV with input parameter block */ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) { + size_t i; /*blake2b_init0( S ); */ - const uint8_t * v = ( const uint8_t * )( blake2b_IV ); - const uint8_t * p = ( const uint8_t * )( P ); - uint8_t * h = ( uint8_t * )( S->h ); + const unsigned char * v = ( const unsigned char * )( blake2b_IV ); + const unsigned char * p = ( const unsigned char * )( P ); + unsigned char * h = ( unsigned char * )( S->h ); /* IV XOR ParamBlock */ memset( S, 0, sizeof( blake2b_state ) ); - for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + for( i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + S->outlen = P->digest_length; return 0; } /* Some sort of default parameter block initialization, for sequential blake2b */ -int blake2b_init( blake2b_state *S, const uint8_t outlen ) +int blake2b_init( blake2b_state *S, size_t outlen ) { - const blake2b_param P = - { - outlen, - 0, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; + blake2b_param P[1]; if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - return blake2b_init_param( S, &P ); + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + return blake2b_init_param( S, P ); } -int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) { - const blake2b_param P = - { - outlen, - keylen, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; + blake2b_param P[1]; if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; - if( blake2b_init_param( S, &P ) < 0 ) + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store64( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2b_init_param( S, P ) < 0 ) return 0; { @@ -254,7 +146,7 @@ int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, c return 0; } -BLAKE2_LOCAL_INLINE(int) blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) { __m128i row1l, row1h; __m128i row2l, row2h; @@ -276,22 +168,22 @@ BLAKE2_LOCAL_INLINE(int) blake2b_compress( blake2b_state *S, const uint8_t block const __m128i m6 = LOADU( block + 96 ); const __m128i m7 = LOADU( block + 112 ); #else - const uint64_t m0 = ( ( uint64_t * )block )[ 0]; - const uint64_t m1 = ( ( uint64_t * )block )[ 1]; - const uint64_t m2 = ( ( uint64_t * )block )[ 2]; - const uint64_t m3 = ( ( uint64_t * )block )[ 3]; - const uint64_t m4 = ( ( uint64_t * )block )[ 4]; - const uint64_t m5 = ( ( uint64_t * )block )[ 5]; - const uint64_t m6 = ( ( uint64_t * )block )[ 6]; - const uint64_t m7 = ( ( uint64_t * )block )[ 7]; - const uint64_t m8 = ( ( uint64_t * )block )[ 8]; - const uint64_t m9 = ( ( uint64_t * )block )[ 9]; - const uint64_t m10 = ( ( uint64_t * )block )[10]; - const uint64_t m11 = ( ( uint64_t * )block )[11]; - const uint64_t m12 = ( ( uint64_t * )block )[12]; - const uint64_t m13 = ( ( uint64_t * )block )[13]; - const uint64_t m14 = ( ( uint64_t * )block )[14]; - const uint64_t m15 = ( ( uint64_t * )block )[15]; + const uint64_t m0 = load64(block + 0 * sizeof(uint64_t)); + const uint64_t m1 = load64(block + 1 * sizeof(uint64_t)); + const uint64_t m2 = load64(block + 2 * sizeof(uint64_t)); + const uint64_t m3 = load64(block + 3 * sizeof(uint64_t)); + const uint64_t m4 = load64(block + 4 * sizeof(uint64_t)); + const uint64_t m5 = load64(block + 5 * sizeof(uint64_t)); + const uint64_t m6 = load64(block + 6 * sizeof(uint64_t)); + const uint64_t m7 = load64(block + 7 * sizeof(uint64_t)); + const uint64_t m8 = load64(block + 8 * sizeof(uint64_t)); + const uint64_t m9 = load64(block + 9 * sizeof(uint64_t)); + const uint64_t m10 = load64(block + 10 * sizeof(uint64_t)); + const uint64_t m11 = load64(block + 11 * sizeof(uint64_t)); + const uint64_t m12 = load64(block + 12 * sizeof(uint64_t)); + const uint64_t m13 = load64(block + 13 * sizeof(uint64_t)); + const uint64_t m14 = load64(block + 14 * sizeof(uint64_t)); + const uint64_t m15 = load64(block + 15 * sizeof(uint64_t)); #endif row1l = LOADU( &S->h[0] ); row1h = LOADU( &S->h[2] ); @@ -321,67 +213,56 @@ BLAKE2_LOCAL_INLINE(int) blake2b_compress( blake2b_state *S, const uint8_t block row2h = _mm_xor_si128( row4h, row2h ); STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); - return 0; } -int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen ) +int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { size_t left = S->buflen; - size_t fill = 2 * BLAKE2B_BLOCKBYTES - left; - + size_t fill = BLAKE2B_BLOCKBYTES - left; if( inlen > fill ) { + S->buflen = 0; memcpy( S->buf + left, in, fill ); /* Fill buffer */ - S->buflen += fill; blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); blake2b_compress( S, S->buf ); /* Compress */ - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); /* Shift buffer left */ - S->buflen -= BLAKE2B_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else /* inlen <= fill */ - { - memcpy( S->buf + left, in, inlen ); - S->buflen += inlen; /* Be lazy, do not compress */ - in += inlen; - inlen -= inlen; + in += fill; inlen -= fill; + while(inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress( S, in ); + in += BLAKE2B_BLOCKBYTES; + inlen -= BLAKE2B_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen ) +int blake2b_final( blake2b_state *S, void *out, size_t outlen ) { - if( outlen > BLAKE2B_OUTBYTES ) + if( out == NULL || outlen < S->outlen ) return -1; if( blake2b_is_lastblock( S ) ) return -1; - if( S->buflen > BLAKE2B_BLOCKBYTES ) - { - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); - S->buflen -= BLAKE2B_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); - } - blake2b_increment_counter( S, S->buflen ); blake2b_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ blake2b_compress( S, S->buf ); - memcpy( out, &S->h[0], outlen ); + + memcpy( out, &S->h[0], S->outlen ); return 0; } -int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2b_state S[1]; @@ -410,41 +291,82 @@ int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen return 0; } +int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { + return blake2b(out, outlen, in, inlen, key, keylen); +} + #if defined(SUPERCOP) int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) { - return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); + return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 ); } #endif #if defined(BLAKE2B_SELFTEST) #include <string.h> #include "blake2-kat.h" -int main( int argc, char **argv ) +int main( void ) { uint8_t key[BLAKE2B_KEYBYTES]; - uint8_t buf[KAT_LENGTH]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; - for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) + for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) key[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) buf[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2B_OUTBYTES]; - blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); + blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ); if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) { - puts( "error" ); - return -1; + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2b_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2b_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) { + goto fail; + } } } puts( "ok" ); return 0; +fail: + puts("error"); + return -1; } #endif diff --git a/sse/blake2bp.c b/sse/blake2bp.c index 459c669..d73b603 100644 --- a/sse/blake2bp.c +++ b/sse/blake2bp.c @@ -27,11 +27,11 @@ #define PARALLELISM_DEGREE 4 -BLAKE2_LOCAL_INLINE(int) blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) +static int blake2bp_init_leaf( blake2b_state *S, size_t outlen, size_t keylen, uint64_t offset ) { blake2b_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; @@ -44,11 +44,11 @@ BLAKE2_LOCAL_INLINE(int) blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, u return blake2b_init_param( S, P ); } -BLAKE2_LOCAL_INLINE(int) blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen ) +static int blake2bp_init_root( blake2b_state *S, size_t outlen, size_t keylen ) { blake2b_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; @@ -62,17 +62,19 @@ BLAKE2_LOCAL_INLINE(int) blake2bp_init_root( blake2b_state *S, uint8_t outlen, u } -int blake2bp_init( blake2bp_state *S, const uint8_t outlen ) +int blake2bp_init( blake2bp_state *S, size_t outlen ) { + size_t i; if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; if( blake2bp_init_root( S->R, outlen, 0 ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1; S->R->last_node = 1; @@ -80,19 +82,22 @@ int blake2bp_init( blake2bp_state *S, const uint8_t outlen ) return 0; } -int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ) { + size_t i; + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; if( blake2bp_init_root( S->R, outlen, keylen ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1; S->R->last_node = 1; @@ -102,7 +107,7 @@ int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, memset( block, 0, BLAKE2B_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES ); secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ @@ -111,16 +116,18 @@ int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, } -int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ) +int blake2bp_update( blake2bp_state *S, const void *pin, size_t inlen ) { + const unsigned char * in = (const unsigned char *)pin; size_t left = S->buflen; size_t fill = sizeof( S->buf ) - left; + size_t i; if( left && inlen >= fill ) { memcpy( S->buf + left, in, fill ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); in += fill; @@ -132,19 +139,19 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ) #pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif - uint64_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2B_BLOCKBYTES; + size_t inlen__ = inlen; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2B_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) { - blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES ); + blake2b_update( S->S[i], in__, BLAKE2B_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; } @@ -162,11 +169,16 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen ) -int blake2bp_final( blake2bp_state *S, uint8_t *out, const uint8_t outlen ) +int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; + size_t i; + + if(out == NULL || outlen < S->outlen) { + return -1; + } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) { if( S->buflen > i * BLAKE2B_BLOCKBYTES ) { @@ -180,17 +192,18 @@ int blake2bp_final( blake2bp_state *S, uint8_t *out, const uint8_t outlen ) blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES ); } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES ); - return blake2b_final( S->R, out, outlen ); + return blake2b_final( S->R, out, S->outlen ); } -int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen ) +int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; blake2b_state S[PARALLELISM_DEGREE][1]; blake2b_state FS[1]; + size_t i; /* Verify parameters */ if ( NULL == in && inlen > 0 ) return -1; @@ -203,7 +216,7 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin if( keylen > BLAKE2B_KEYBYTES ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1; S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */ @@ -214,7 +227,7 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin memset( block, 0, BLAKE2B_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES ); secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ @@ -224,31 +237,31 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif - uint64_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2B_BLOCKBYTES; + size_t inlen__ = inlen; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2B_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) { - blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES ); + blake2b_update( S[i], in__, BLAKE2B_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; } - if( inlen__ > id__ * BLAKE2B_BLOCKBYTES ) + if( inlen__ > i * BLAKE2B_BLOCKBYTES ) { - const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES; + const size_t left = inlen__ - i * BLAKE2B_BLOCKBYTES; const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES; - blake2b_update( S[id__], in__, len ); + blake2b_update( S[i], in__, len ); } - blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES ); + blake2b_final( S[i], hash[i], BLAKE2B_OUTBYTES ); } if( blake2bp_init_root( FS, outlen, keylen ) < 0 ) @@ -256,7 +269,7 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin FS->last_node = 1; /* Mark as last node */ - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES ); return blake2b_final( FS, out, outlen ); @@ -266,35 +279,68 @@ int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin #if defined(BLAKE2BP_SELFTEST) #include <string.h> #include "blake2-kat.h" -int main( int argc, char **argv ) +int main( void ) { uint8_t key[BLAKE2B_KEYBYTES]; - uint8_t buf[KAT_LENGTH]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; - for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i ) + for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) key[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) buf[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2B_OUTBYTES]; - /*blake2bp( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); */ - blake2bp_state S[1]; - blake2bp_init_key( S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES ); - blake2bp_update( S, buf, i ); - blake2bp_final( S, hash, BLAKE2B_OUTBYTES ); + blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ); if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) ) { - puts( "error" ); - return -1; + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2bp_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2bp_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2bp_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2bp_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2bp_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES)) { + goto fail; + } } } puts( "ok" ); return 0; +fail: + puts("error"); + return -1; } #endif diff --git a/sse/blake2s-load-sse2.h b/sse/blake2s-load-sse2.h index eadefa7..b355c47 100644 --- a/sse/blake2s-load-sse2.h +++ b/sse/blake2s-load-sse2.h @@ -12,9 +12,8 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_LOAD_SSE2_H__ -#define __BLAKE2S_LOAD_SSE2_H__ +#ifndef BLAKE2S_LOAD_SSE2_H +#define BLAKE2S_LOAD_SSE2_H #define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0) #define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1) diff --git a/sse/blake2s-load-sse41.h b/sse/blake2s-load-sse41.h index 54bf0cd..c874dce 100644 --- a/sse/blake2s-load-sse41.h +++ b/sse/blake2s-load-sse41.h @@ -12,9 +12,8 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_LOAD_SSE41_H__ -#define __BLAKE2S_LOAD_SSE41_H__ +#ifndef BLAKE2S_LOAD_SSE41_H +#define BLAKE2S_LOAD_SSE41_H #define LOAD_MSG_0_1(buf) \ buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0))); diff --git a/sse/blake2s-load-xop.h b/sse/blake2s-load-xop.h index a3b5d65..217ce61 100644 --- a/sse/blake2s-load-xop.h +++ b/sse/blake2s-load-xop.h @@ -12,14 +12,14 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_LOAD_XOP_H__ -#define __BLAKE2S_LOAD_XOP_H__ +#ifndef BLAKE2S_LOAD_XOP_H +#define BLAKE2S_LOAD_XOP_H #define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */ +#if 0 /* Basic VPPERM emulation, for testing purposes */ -/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) +static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) { const __m128i sixteen = _mm_set1_epi8(16); const __m128i t0 = _mm_shuffle_epi8(src1, sel); @@ -27,7 +27,8 @@ const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen), _mm_cmpgt_epi8(sel, sixteen)); /* (>=16) = 0xff : 00 */ return _mm_blendv_epi8(t0, s1, mask); -}*/ +} +#endif #define LOAD_MSG_0_1(buf) \ buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); diff --git a/sse/blake2s-round.h b/sse/blake2s-round.h index 7470d92..463617f 100644 --- a/sse/blake2s-round.h +++ b/sse/blake2s-round.h @@ -12,9 +12,8 @@ More information about the BLAKE2 hash function can be found at https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_ROUND_H__ -#define __BLAKE2S_ROUND_H__ +#ifndef BLAKE2S_ROUND_H +#define BLAKE2S_ROUND_H #define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) diff --git a/sse/blake2s.c b/sse/blake2s.c index 030a85e..d61e587 100644 --- a/sse/blake2s.c +++ b/sse/blake2s.c @@ -45,132 +45,36 @@ static const uint32_t blake2s_IV[8] = 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL }; -static const uint8_t blake2s_sigma[10][16] = +/* Some helper functions */ +static void blake2s_set_lastnode( blake2s_state *S ) { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , -}; - - -/* Some helper functions, not necessarily useful */ -BLAKE2_LOCAL_INLINE(int) blake2s_set_lastnode( blake2s_state *S ) -{ - S->f[1] = -1; - return 0; + S->f[1] = (uint32_t)-1; } -BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastnode( blake2s_state *S ) -{ - S->f[1] = 0; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_is_lastblock( const blake2s_state *S ) +static int blake2s_is_lastblock( const blake2s_state *S ) { return S->f[0] != 0; } -BLAKE2_LOCAL_INLINE(int) blake2s_set_lastblock( blake2s_state *S ) +static void blake2s_set_lastblock( blake2s_state *S ) { if( S->last_node ) blake2s_set_lastnode( S ); - S->f[0] = -1; - return 0; + S->f[0] = (uint32_t)-1; } -BLAKE2_LOCAL_INLINE(int) blake2s_clear_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_clear_lastnode( S ); - - S->f[0] = 0; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) { uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0]; t += inc; S->t[0] = ( uint32_t )( t >> 0 ); S->t[1] = ( uint32_t )( t >> 32 ); - return 0; -} - - -/* Parameter-related functions */ -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) -{ - P->leaf_length = leaf_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) -{ - store48( P->node_offset, node_offset ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); - return 0; -} - -BLAKE2_LOCAL_INLINE(int) blake2s_init0( blake2s_state *S ) -{ - memset( S, 0, sizeof( blake2s_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; - - return 0; } /* init2 xors IV with input parameter block */ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) { + size_t i; /*blake2s_init0( S ); */ const uint8_t * v = ( const uint8_t * )( blake2s_IV ); const uint8_t * p = ( const uint8_t * )( P ); @@ -178,56 +82,59 @@ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) /* IV XOR ParamBlock */ memset( S, 0, sizeof( blake2s_state ) ); - for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + for( i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + S->outlen = P->digest_length; return 0; } /* Some sort of default parameter block initialization, for sequential blake2s */ -int blake2s_init( blake2s_state *S, const uint8_t outlen ) +int blake2s_init( blake2s_state *S, size_t outlen ) { - const blake2s_param P = - { - outlen, - 0, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; + blake2s_param P[1]; + /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - return blake2s_init_param( S, &P ); + + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store48( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + return blake2s_init_param( S, P ); } -int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) { - const blake2s_param P = - { - outlen, - keylen, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; + blake2s_param P[1]; /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1; - if( blake2s_init_param( S, &P ) < 0 ) + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store48( &P->node_offset, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2s_init_param( S, P ) < 0 ) return -1; { @@ -241,7 +148,7 @@ int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, c } -BLAKE2_LOCAL_INLINE(int) blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +static void blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) { __m128i row1, row2, row3, row4; __m128i buf1, buf2, buf3, buf4; @@ -262,27 +169,27 @@ BLAKE2_LOCAL_INLINE(int) blake2s_compress( blake2s_state *S, const uint8_t block const __m128i m2 = LOADU( block + 32 ); const __m128i m3 = LOADU( block + 48 ); #else - const uint32_t m0 = ( ( uint32_t * )block )[ 0]; - const uint32_t m1 = ( ( uint32_t * )block )[ 1]; - const uint32_t m2 = ( ( uint32_t * )block )[ 2]; - const uint32_t m3 = ( ( uint32_t * )block )[ 3]; - const uint32_t m4 = ( ( uint32_t * )block )[ 4]; - const uint32_t m5 = ( ( uint32_t * )block )[ 5]; - const uint32_t m6 = ( ( uint32_t * )block )[ 6]; - const uint32_t m7 = ( ( uint32_t * )block )[ 7]; - const uint32_t m8 = ( ( uint32_t * )block )[ 8]; - const uint32_t m9 = ( ( uint32_t * )block )[ 9]; - const uint32_t m10 = ( ( uint32_t * )block )[10]; - const uint32_t m11 = ( ( uint32_t * )block )[11]; - const uint32_t m12 = ( ( uint32_t * )block )[12]; - const uint32_t m13 = ( ( uint32_t * )block )[13]; - const uint32_t m14 = ( ( uint32_t * )block )[14]; - const uint32_t m15 = ( ( uint32_t * )block )[15]; + const uint32_t m0 = load32(block + 0 * sizeof(uint32_t)); + const uint32_t m1 = load32(block + 1 * sizeof(uint32_t)); + const uint32_t m2 = load32(block + 2 * sizeof(uint32_t)); + const uint32_t m3 = load32(block + 3 * sizeof(uint32_t)); + const uint32_t m4 = load32(block + 4 * sizeof(uint32_t)); + const uint32_t m5 = load32(block + 5 * sizeof(uint32_t)); + const uint32_t m6 = load32(block + 6 * sizeof(uint32_t)); + const uint32_t m7 = load32(block + 7 * sizeof(uint32_t)); + const uint32_t m8 = load32(block + 8 * sizeof(uint32_t)); + const uint32_t m9 = load32(block + 9 * sizeof(uint32_t)); + const uint32_t m10 = load32(block + 10 * sizeof(uint32_t)); + const uint32_t m11 = load32(block + 11 * sizeof(uint32_t)); + const uint32_t m12 = load32(block + 12 * sizeof(uint32_t)); + const uint32_t m13 = load32(block + 13 * sizeof(uint32_t)); + const uint32_t m14 = load32(block + 14 * sizeof(uint32_t)); + const uint32_t m15 = load32(block + 15 * sizeof(uint32_t)); #endif row1 = ff0 = LOADU( &S->h[0] ); row2 = ff1 = LOADU( &S->h[4] ); - row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); - row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) ); + row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] ); + row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) ); ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); @@ -295,73 +202,61 @@ BLAKE2_LOCAL_INLINE(int) blake2s_compress( blake2s_state *S, const uint8_t block ROUND( 9 ); STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); - return 0; } -/* inlen now in bytes */ -int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen ) +int blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { size_t left = S->buflen; - size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; - + size_t fill = BLAKE2S_BLOCKBYTES - left; if( inlen > fill ) { + S->buflen = 0; memcpy( S->buf + left, in, fill ); /* Fill buffer */ - S->buflen += fill; blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); blake2s_compress( S, S->buf ); /* Compress */ - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); /* Shift buffer left */ - S->buflen -= BLAKE2S_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else /* inlen <= fill */ - { - memcpy( S->buf + left, in, inlen ); - S->buflen += inlen; /* Be lazy, do not compress */ - in += inlen; - inlen -= inlen; + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -/* Is this correct? */ -int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen ) +int blake2s_final( blake2s_state *S, void *out, size_t outlen ) { uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; + size_t i; - if( outlen > BLAKE2S_OUTBYTES ) + if( out == NULL || outlen < S->outlen ) return -1; if( blake2s_is_lastblock( S ) ) return -1; - if( S->buflen > BLAKE2S_BLOCKBYTES ) - { - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); - S->buflen -= BLAKE2S_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); - } - - blake2s_increment_counter( S, ( uint32_t )S->buflen ); + blake2s_increment_counter( S, (uint32_t)S->buflen ); blake2s_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ blake2s_compress( S, S->buf ); - for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - memcpy( out, buffer, outlen ); + memcpy( out, buffer, S->outlen ); + secure_zero_memory( buffer, sizeof(buffer) ); return 0; } /* inlen, at least, should be uint64_t. Others can be size_t. */ -int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen ) +int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2s_state S[1]; @@ -393,38 +288,75 @@ int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen #if defined(SUPERCOP) int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) { - return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, inlen, 0 ); + return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 ); } #endif #if defined(BLAKE2S_SELFTEST) #include <string.h> #include "blake2-kat.h" -int main( int argc, char **argv ) +int main( void ) { uint8_t key[BLAKE2S_KEYBYTES]; - uint8_t buf[KAT_LENGTH]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; - for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) key[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) buf[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2S_OUTBYTES]; + blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); - if( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 || - 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) { - puts( "error" ); - return -1; + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2s_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2s_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2s_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } } } puts( "ok" ); return 0; +fail: + puts("error"); + return -1; } #endif diff --git a/sse/blake2sp.c b/sse/blake2sp.c index 7f554b8..ed4a40f 100644 --- a/sse/blake2sp.c +++ b/sse/blake2sp.c @@ -26,11 +26,11 @@ #define PARALLELISM_DEGREE 8 -BLAKE2_LOCAL_INLINE(int) blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) +static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset ) { blake2s_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; @@ -42,11 +42,11 @@ BLAKE2_LOCAL_INLINE(int) blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, u return blake2s_init_param( S, P ); } -BLAKE2_LOCAL_INLINE(int) blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen ) +static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen ) { blake2s_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; @@ -59,17 +59,20 @@ BLAKE2_LOCAL_INLINE(int) blake2sp_init_root( blake2s_state *S, uint8_t outlen, u } -int blake2sp_init( blake2sp_state *S, const uint8_t outlen ) +int blake2sp_init( blake2sp_state *S, size_t outlen ) { + size_t i; + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; if( blake2sp_init_root( S->R, outlen, 0 ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1; S->R->last_node = 1; @@ -77,19 +80,22 @@ int blake2sp_init( blake2sp_state *S, const uint8_t outlen ) return 0; } -int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen ) +int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ) { + size_t i; + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; if( blake2sp_init_root( S->R, outlen, keylen ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1; S->R->last_node = 1; @@ -99,7 +105,7 @@ int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ @@ -108,16 +114,18 @@ int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, } -int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ) +int blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen ) { + const unsigned char * in = (const unsigned char *)pin; size_t left = S->buflen; size_t fill = sizeof( S->buf ) - left; + size_t i; if( left && inlen >= fill ) { memcpy( S->buf + left, in, fill ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); in += fill; @@ -129,19 +137,19 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ) #pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif - uint64_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2S_BLOCKBYTES; + size_t inlen__ = inlen; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { - blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES ); + blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } @@ -158,11 +166,16 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen ) } -int blake2sp_final( blake2sp_state *S, uint8_t *out, const uint8_t outlen ) +int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; + size_t i; + + if(out == NULL || outlen < S->outlen) { + return -1; + } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) { if( S->buflen > i * BLAKE2S_BLOCKBYTES ) { @@ -176,18 +189,19 @@ int blake2sp_final( blake2sp_state *S, uint8_t *out, const uint8_t outlen ) blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES ); } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES ); - return blake2s_final( S->R, out, outlen ); + return blake2s_final( S->R, out, S->outlen ); } -int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen ) +int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; blake2s_state S[PARALLELISM_DEGREE][1]; blake2s_state FS[1]; + size_t i; /* Verify parameters */ if ( NULL == in && inlen > 0 ) return -1; @@ -200,7 +214,7 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin if( keylen > BLAKE2S_KEYBYTES ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1; S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */ @@ -211,7 +225,7 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ @@ -221,31 +235,31 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif - uint64_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2S_BLOCKBYTES; + size_t inlen__ = inlen; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { - blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES ); + blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } - if( inlen__ > id__ * BLAKE2S_BLOCKBYTES ) + if( inlen__ > i * BLAKE2S_BLOCKBYTES ) { - const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES; + const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES; const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES; - blake2s_update( S[id__], in__, len ); + blake2s_update( S[i], in__, len ); } - blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES ); + blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES ); } if( blake2sp_init_root( FS, outlen, keylen ) < 0 ) @@ -253,7 +267,7 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin FS->last_node = 1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES ); return blake2s_final( FS, out, outlen ); @@ -262,31 +276,68 @@ int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uin #if defined(BLAKE2SP_SELFTEST) #include <string.h> #include "blake2-kat.h" -int main( int argc, char **argv ) +int main( void ) { uint8_t key[BLAKE2S_KEYBYTES]; - uint8_t buf[KAT_LENGTH]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; - for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i ) + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) key[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) buf[i] = ( uint8_t )i; - for( size_t i = 0; i < KAT_LENGTH; ++i ) + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2S_OUTBYTES]; - blake2sp( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ); + blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) ) { - puts( "error" ); - return -1; + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2sp_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2sp_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2sp_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } } } puts( "ok" ); return 0; +fail: + puts("error"); + return -1; } #endif diff --git a/sse/genkat-c.c b/sse/genkat-c.c index a30ddf3..1525f8d 100644 --- a/sse/genkat-c.c +++ b/sse/genkat-c.c @@ -28,11 +28,11 @@ #define MAKE_KAT(name,size_prefix) \ do \ { \ - printf( "static const uint8_t " #name "_kat[KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \ + printf( "static const uint8_t " #name "_kat[BLAKE2_KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \ \ for( size_t i = 0; i < LENGTH; ++i ) \ { \ - name( hash, in, NULL, size_prefix ## _OUTBYTES, i, 0 ); \ + name( hash, size_prefix ## _OUTBYTES, in, i, NULL, 0 ); \ printf( "\t{\n\t\t" ); \ \ for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \ @@ -48,11 +48,11 @@ do \ #define MAKE_KEYED_KAT(name,size_prefix) \ do \ { \ - printf( "static const uint8_t " #name "_keyed_kat[KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \ + printf( "static const uint8_t " #name "_keyed_kat[BLAKE2_KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \ \ for( size_t i = 0; i < LENGTH; ++i ) \ { \ - name( hash, in, key, size_prefix ## _OUTBYTES, i, size_prefix ## _KEYBYTES ); \ + name( hash, size_prefix ## _OUTBYTES, in, i, key, size_prefix ## _KEYBYTES ); \ printf( "\t{\n\t\t" ); \ \ for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \ @@ -78,11 +78,10 @@ int main( int argc, char **argv ) for( size_t i = 0; i < sizeof( key ); ++i ) key[i] = i; - puts( "#pragma once\n" - "#ifndef __BLAKE2_KAT_H__\n" - "#define __BLAKE2_KAT_H__\n\n\n" + puts( "#ifndef BLAKE2_KAT_H\n" + "#define BLAKE2_KAT_H\n\n\n" "#include <stdint.h>\n\n" - "#define KAT_LENGTH " STR( LENGTH ) "\n\n\n" ); + "#define BLAKE2_KAT_LENGTH " STR( LENGTH ) "\n\n\n" ); MAKE_KAT( blake2s, BLAKE2S ); MAKE_KEYED_KAT( blake2s, BLAKE2S ); MAKE_KAT( blake2b, BLAKE2B ); @@ -91,122 +90,6 @@ int main( int argc, char **argv ) MAKE_KEYED_KAT( blake2sp, BLAKE2S ); MAKE_KAT( blake2bp, BLAKE2B ); MAKE_KEYED_KAT( blake2bp, BLAKE2B ); - /*printf( "static const uint8_t blake2s_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2s( hash, in, NULL, BLAKE2S_OUTBYTES, i, 0 ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2S_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2s_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2s( hash, in, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2S_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2b_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2b( hash, in, NULL, BLAKE2B_OUTBYTES, i, 0 ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2B_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2b_keyed_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2b( hash, in, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2B_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - - - printf( "static const uint8_t blake2sp_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2sp( hash, in, NULL, BLAKE2S_OUTBYTES, i, 0 ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2S_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2sp_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2sp( hash, in, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2S_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - - - printf( "static const uint8_t blake2bp_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2bp( hash, in, NULL, BLAKE2B_OUTBYTES, i, 0 ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2B_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" ); - printf( "static const uint8_t blake2bp_keyed_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" ); - - for( size_t i = 0; i < LENGTH; ++i ) - { - blake2bp( hash, in, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ); - printf( "\t{\n\t\t" ); - - for( int j = 0; j < BLAKE2B_OUTBYTES; ++j ) - printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); - - printf( "\t},\n" ); - } - - printf( "};\n\n\n\n\n" );*/ - puts( "#endif\n\n\n" ); + puts( "#endif" ); return 0; } diff --git a/sse/genkat-json.c b/sse/genkat-json.c index 81a5cbe..f230928 100644 --- a/sse/genkat-json.c +++ b/sse/genkat-json.c @@ -40,7 +40,7 @@ do \ printf(" \"key\": \"\",\n");\ printf(" \"out\": \"");\ \ - name( hash, in, NULL, size_prefix ## _OUTBYTES, i, 0 ); \ + name( hash, size_prefix ## _OUTBYTES, in, i, NULL, 0 ); \ \ for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \ printf( "%02x", hash[j]);\ @@ -66,7 +66,7 @@ do \ printf("\",\n");\ printf(" \"out\": \"");\ \ - name( hash, in, key, size_prefix ## _OUTBYTES, i, size_prefix ## _KEYBYTES ); \ + name( hash, size_prefix ## _OUTBYTES, in, i, key, size_prefix ## _KEYBYTES ); \ \ for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \ printf( "%02x", hash[j]);\ @@ -88,7 +88,7 @@ int main( int argc, char **argv ) for( size_t i = 0; i < sizeof( key ); ++i ) key[i] = i; - printf("["); + printf("["); MAKE_KAT( blake2s, BLAKE2S ); MAKE_KEYED_KAT( blake2s, BLAKE2S ); MAKE_KAT( blake2b, BLAKE2B ); @@ -97,7 +97,7 @@ int main( int argc, char **argv ) MAKE_KEYED_KAT( blake2sp, BLAKE2S ); MAKE_KAT( blake2bp, BLAKE2B ); MAKE_KEYED_KAT( blake2bp, BLAKE2B ); - printf("\n]\n"); + printf("\n]\n"); fflush(stdout); return 0; } diff --git a/sse/makefile b/sse/makefile index 90bf156..b23b4a8 100644 --- a/sse/makefile +++ b/sse/makefile @@ -1,7 +1,7 @@ CC=gcc -CFLAGS=-Wall -O3 -march=native -I../testvectors +CFLAGS=-O3 -I../testvectors -all: blake2s blake2b blake2sp blake2bp +all: blake2s blake2b blake2sp blake2bp check blake2s: blake2s.c $(CC) blake2s.c -o $@ $(CFLAGS) -DBLAKE2S_SELFTEST @@ -15,6 +15,12 @@ blake2sp: blake2sp.c blake2s.c blake2bp: blake2bp.c blake2b.c $(CC) blake2bp.c blake2b.c -o $@ $(CFLAGS) -DBLAKE2BP_SELFTEST +check: blake2s blake2b blake2sp blake2bp + ./blake2s + ./blake2b + ./blake2sp + ./blake2bp + kat: $(CC) $(CFLAGS) -o genkat-c genkat-c.c blake2b.c blake2s.c blake2sp.c blake2bp.c $(CC) $(CFLAGS) -g -o genkat-json genkat-json.c blake2b.c blake2s.c blake2sp.c blake2bp.c diff --git a/testvectors/blake2-kat.h b/testvectors/blake2-kat.h index 435525b..9f9fdfe 100644 --- a/testvectors/blake2-kat.h +++ b/testvectors/blake2-kat.h @@ -1,15 +1,14 @@ -#pragma once -#ifndef __BLAKE2_KAT_H__ -#define __BLAKE2_KAT_H__ +#ifndef BLAKE2_KAT_H +#define BLAKE2_KAT_H #include <stdint.h> -#define KAT_LENGTH 256 +#define BLAKE2_KAT_LENGTH 256 -static const uint8_t blake2s_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = +static const uint8_t blake2s_kat[BLAKE2_KAT_LENGTH][BLAKE2S_OUTBYTES] = { { 0x69, 0x21, 0x7A, 0x30, 0x79, 0x90, 0x80, 0x94, @@ -1552,7 +1551,7 @@ static const uint8_t blake2s_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = -static const uint8_t blake2s_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = +static const uint8_t blake2s_keyed_kat[BLAKE2_KAT_LENGTH][BLAKE2S_OUTBYTES] = { { 0x48, 0xA8, 0x99, 0x7D, 0xA4, 0x07, 0x87, 0x6B, @@ -3095,7 +3094,7 @@ static const uint8_t blake2s_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = -static const uint8_t blake2b_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = +static const uint8_t blake2b_kat[BLAKE2_KAT_LENGTH][BLAKE2B_OUTBYTES] = { { 0x78, 0x6A, 0x02, 0xF7, 0x42, 0x01, 0x59, 0x03, @@ -5662,7 +5661,7 @@ static const uint8_t blake2b_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = -static const uint8_t blake2b_keyed_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = +static const uint8_t blake2b_keyed_kat[BLAKE2_KAT_LENGTH][BLAKE2B_OUTBYTES] = { { 0x10, 0xEB, 0xB6, 0x77, 0x00, 0xB1, 0x86, 0x8E, @@ -8229,7 +8228,7 @@ static const uint8_t blake2b_keyed_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = -static const uint8_t blake2sp_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = +static const uint8_t blake2sp_kat[BLAKE2_KAT_LENGTH][BLAKE2S_OUTBYTES] = { { 0xDD, 0x0E, 0x89, 0x17, 0x76, 0x93, 0x3F, 0x43, @@ -9772,7 +9771,7 @@ static const uint8_t blake2sp_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = -static const uint8_t blake2sp_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = +static const uint8_t blake2sp_keyed_kat[BLAKE2_KAT_LENGTH][BLAKE2S_OUTBYTES] = { { 0x71, 0x5C, 0xB1, 0x38, 0x95, 0xAE, 0xB6, 0x78, @@ -11315,7 +11314,7 @@ static const uint8_t blake2sp_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = -static const uint8_t blake2bp_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = +static const uint8_t blake2bp_kat[BLAKE2_KAT_LENGTH][BLAKE2B_OUTBYTES] = { { 0xB5, 0xEF, 0x81, 0x1A, 0x80, 0x38, 0xF7, 0x0B, @@ -13882,7 +13881,7 @@ static const uint8_t blake2bp_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = -static const uint8_t blake2bp_keyed_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = +static const uint8_t blake2bp_keyed_kat[BLAKE2_KAT_LENGTH][BLAKE2B_OUTBYTES] = { { 0x9D, 0x94, 0x61, 0x07, 0x3E, 0x4E, 0xB6, 0x40, |