Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/BLAKE2/BLAKE2.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeigh Brown <leigh@solinno.co.uk>2018-04-02 22:07:05 +0300
committerLeigh Brown <leigh@solinno.co.uk>2018-04-02 22:07:05 +0300
commit7965d3e6e1b4193438b8d3a656787587d2579227 (patch)
treeef8d4edef59ecaae3389baba8a8ab49e173ec9fb /neon/blake2-impl.h
parentbeb75f4512223e6a3a03a48992345256c5ef393a (diff)
Add ARM NEON versions of blake2s and blake2b
NOTE! The NEON version of blake2s is currently NO FASTER than the reference implementations. However, it is retained for reference and in case it can be further improved. The NEON version of blake2b is more than twice as fast as the reference implementation on the Raspberry PI 2 Model B.
Diffstat (limited to 'neon/blake2-impl.h')
-rw-r--r--neon/blake2-impl.h160
1 files changed, 160 insertions, 0 deletions
diff --git a/neon/blake2-impl.h b/neon/blake2-impl.h
new file mode 100644
index 0000000..5dff7fc
--- /dev/null
+++ b/neon/blake2-impl.h
@@ -0,0 +1,160 @@
+/*
+ BLAKE2 reference source code package - reference C implementations
+
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
+ your option. The terms of these licenses can be found at:
+
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ - OpenSSL license : https://www.openssl.org/source/license.html
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
+
+ More information about the BLAKE2 hash function can be found at
+ https://blake2.net.
+*/
+#ifndef BLAKE2_IMPL_H
+#define BLAKE2_IMPL_H
+
+#include <stdint.h>
+#include <string.h>
+
+#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
+ #if defined(_MSC_VER)
+ #define BLAKE2_INLINE __inline
+ #elif defined(__GNUC__)
+ #define BLAKE2_INLINE __inline__
+ #else
+ #define BLAKE2_INLINE
+ #endif
+#else
+ #define BLAKE2_INLINE inline
+#endif
+
+static BLAKE2_INLINE uint32_t load32( const void *src )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+ uint32_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ const uint8_t *p = ( const uint8_t * )src;
+ return (( uint32_t )( p[0] ) << 0) |
+ (( uint32_t )( p[1] ) << 8) |
+ (( uint32_t )( p[2] ) << 16) |
+ (( uint32_t )( p[3] ) << 24) ;
+#endif
+}
+
+static BLAKE2_INLINE uint64_t load64( const void *src )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+ uint64_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ const uint8_t *p = ( const uint8_t * )src;
+ return (( uint64_t )( p[0] ) << 0) |
+ (( uint64_t )( p[1] ) << 8) |
+ (( uint64_t )( p[2] ) << 16) |
+ (( uint64_t )( p[3] ) << 24) |
+ (( uint64_t )( p[4] ) << 32) |
+ (( uint64_t )( p[5] ) << 40) |
+ (( uint64_t )( p[6] ) << 48) |
+ (( uint64_t )( p[7] ) << 56) ;
+#endif
+}
+
+static BLAKE2_INLINE uint16_t load16( const void *src )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+ uint16_t w;
+ memcpy(&w, src, sizeof w);
+ return w;
+#else
+ const uint8_t *p = ( const uint8_t * )src;
+ return (( uint16_t )( p[0] ) << 0) |
+ (( uint16_t )( p[1] ) << 8) ;
+#endif
+}
+
+static BLAKE2_INLINE void store16( void *dst, uint16_t w )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+ memcpy(dst, &w, sizeof w);
+#else
+ uint8_t *p = ( uint8_t * )dst;
+ *p++ = ( uint8_t )w; w >>= 8;
+ *p++ = ( uint8_t )w;
+#endif
+}
+
+static BLAKE2_INLINE void store32( void *dst, uint32_t w )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+ memcpy(dst, &w, sizeof w);
+#else
+ uint8_t *p = ( uint8_t * )dst;
+ p[0] = (uint8_t)(w >> 0);
+ p[1] = (uint8_t)(w >> 8);
+ p[2] = (uint8_t)(w >> 16);
+ p[3] = (uint8_t)(w >> 24);
+#endif
+}
+
+static BLAKE2_INLINE void store64( void *dst, uint64_t w )
+{
+#if defined(NATIVE_LITTLE_ENDIAN)
+ memcpy(dst, &w, sizeof w);
+#else
+ uint8_t *p = ( uint8_t * )dst;
+ p[0] = (uint8_t)(w >> 0);
+ p[1] = (uint8_t)(w >> 8);
+ p[2] = (uint8_t)(w >> 16);
+ p[3] = (uint8_t)(w >> 24);
+ p[4] = (uint8_t)(w >> 32);
+ p[5] = (uint8_t)(w >> 40);
+ p[6] = (uint8_t)(w >> 48);
+ p[7] = (uint8_t)(w >> 56);
+#endif
+}
+
+static BLAKE2_INLINE uint64_t load48( const void *src )
+{
+ const uint8_t *p = ( const uint8_t * )src;
+ return (( uint64_t )( p[0] ) << 0) |
+ (( uint64_t )( p[1] ) << 8) |
+ (( uint64_t )( p[2] ) << 16) |
+ (( uint64_t )( p[3] ) << 24) |
+ (( uint64_t )( p[4] ) << 32) |
+ (( uint64_t )( p[5] ) << 40) ;
+}
+
+static BLAKE2_INLINE void store48( void *dst, uint64_t w )
+{
+ uint8_t *p = ( uint8_t * )dst;
+ p[0] = (uint8_t)(w >> 0);
+ p[1] = (uint8_t)(w >> 8);
+ p[2] = (uint8_t)(w >> 16);
+ p[3] = (uint8_t)(w >> 24);
+ p[4] = (uint8_t)(w >> 32);
+ p[5] = (uint8_t)(w >> 40);
+}
+
+static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
+{
+ return ( w >> c ) | ( w << ( 32 - c ) );
+}
+
+static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
+{
+ return ( w >> c ) | ( w << ( 64 - c ) );
+}
+
+/* prevents compiler optimizing out memset() */
+static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
+{
+ static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
+ memset_v(v, 0, n);
+}
+
+#endif