Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/boringssl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/crypto
diff options
context:
space:
mode:
authorAdam Langley <agl@google.com>2015-02-03 00:34:50 +0300
committerAdam Langley <agl@google.com>2015-02-03 03:34:17 +0300
commit868c7ef1f489b565e3469ff739654a7bf258773e (patch)
treeeb6138580a87da02511dc389060cf5fb624b2249 /crypto
parent64b17ccf15b9c8fdd79555e76e5ab5d108f2a3df (diff)
Don't assume alignment of ChaCha key on ARM.
When addressing [1], I checked the AEAD code but brain-farted: a key is aligned in that code, but it's the Poly1305 key, which doesn't matter here. It would be nice to align the ChaCha key too, but Android doesn't have |posix_memalign| in the versions that we care about. It does have |memalign|, but that's documented as "obsolete" and we don't have a concept of an Android OS yet and I don't want to add one just for this. So this change uses the buffer for loading the key again. (Note that we never used to check for alignment of the |key| before calling this. We must have gotten it for free somehow when checking the alignment of |in| and |out|. But there are clearly some paths that don't have an aligned key: https://code.google.com/p/chromium/issues/detail?id=454308.) At least the generation script started paying off immediately ☺. [1] https://boringssl-review.googlesource.com/#/c/3132/1/crypto/chacha/chacha_vec.c@185 Change-Id: I4f893ba0733440fddd453f9636cc2aeaf05076ed Reviewed-on: https://boringssl-review.googlesource.com/3270 Reviewed-by: Adam Langley <agl@google.com>
Diffstat (limited to 'crypto')
-rw-r--r--crypto/chacha/chacha_vec.c19
-rw-r--r--crypto/chacha/chacha_vec_arm.S151
2 files changed, 83 insertions, 87 deletions
diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c
index 88830bc6..2b0fd9c5 100644
--- a/crypto/chacha/chacha_vec.c
+++ b/crypto/chacha/chacha_vec.c
@@ -40,6 +40,7 @@ typedef unsigned vec __attribute__((vector_size(16)));
* This implementation supports parallel processing of multiple blocks,
* including potentially using general-purpose registers. */
#if __ARM_NEON__
+#include <string.h>
#include <arm_neon.h>
#define GPR_TOO 1
#define VBPI 2
@@ -162,29 +163,15 @@ void CRYPTO_chacha_20(
uint8_t alignment_buffer[16] __attribute__((aligned(16)));
#endif
vec s0, s1, s2, s3;
-#if !defined(__ARM_NEON__) && !defined(__SSE2__)
- __attribute__ ((aligned (16))) unsigned key[8], nonce[4];
-#endif
__attribute__ ((aligned (16))) unsigned chacha_const[] =
{0x61707865,0x3320646E,0x79622D32,0x6B206574};
-#if defined(__ARM_NEON__) || defined(__SSE2__)
- kp = (unsigned *)key;
-#else
- ((vec *)key)[0] = REVV_BE(((vec *)key)[0]);
- ((vec *)key)[1] = REVV_BE(((vec *)key)[1]);
- nonce[0] = REVW_BE(((unsigned *)nonce)[0]);
- nonce[1] = REVW_BE(((unsigned *)nonce)[1]);
- nonce[2] = REVW_BE(((unsigned *)nonce)[2]);
- nonce[3] = REVW_BE(((unsigned *)nonce)[3]);
kp = (unsigned *)key;
- np = (unsigned *)nonce;
-#endif
#if defined(__ARM_NEON__)
np = (unsigned*) nonce;
#endif
s0 = LOAD_ALIGNED(chacha_const);
- s1 = LOAD_ALIGNED(&((vec*)kp)[0]);
- s2 = LOAD_ALIGNED(&((vec*)kp)[1]);
+ s1 = LOAD(&((vec*)kp)[0]);
+ s2 = LOAD(&((vec*)kp)[1]);
s3 = (vec){
counter & 0xffffffff,
#if __ARM_NEON__ || defined(OPENSSL_X86)
diff --git a/crypto/chacha/chacha_vec_arm.S b/crypto/chacha/chacha_vec_arm.S
index 15d4556c..a1fb541e 100644
--- a/crypto/chacha/chacha_vec_arm.S
+++ b/crypto/chacha/chacha_vec_arm.S
@@ -62,74 +62,88 @@ CRYPTO_chacha_20_neon:
@ args = 8, pretend = 0, frame = 128
@ frame_needed = 1, uses_anonymous_args = 0
push {r4, r5, r6, r7, r8, r9, r10, fp, lr}
- mov r4, r2
+ mov ip, r3
vpush.64 {d8, d9, d10, d11, d12, d13, d14, d15}
+ mov r9, r2
+ ldr r4, .L92+16
+ mov fp, r0
+ mov r10, r1
+ mov lr, ip
+.LPIC16:
+ add r4, pc
movw r8, #43691
- movt r8, 43690
- mov ip, r3
- umull r8, r9, r4, r8
sub sp, sp, #132
add r7, sp, #0
sub sp, sp, #112
- mov fp, r0
- mov r10, r1
+ movt r8, 43690
+ str r0, [r7, #60]
+ str r1, [r7, #12]
str r2, [r7, #8]
+ ldmia r4, {r0, r1, r2, r3}
add r4, sp, #15
- ldr r2, .L92+16
bic r4, r4, #15
- ldr r5, [r7, #232]
- add lr, r4, #64
-.LPIC16:
- add r2, pc
- str r0, [r7, #60]
- str r1, [r7, #12]
- str r3, [r7, #44]
- ldmia r2, {r0, r1, r2, r3}
- ldr r6, [r5]
- str r4, [r7, #72]
- ldr r5, [r5, #4]
- ldr r4, [r7, #236]
- str r6, [r7, #120]
- str r5, [r7, #124]
- str r4, [r7, #112]
- stmia lr, {r0, r1, r2, r3}
- movs r3, #0
- ldr r0, [r7, #72]
- str r3, [r7, #116]
- lsrs r3, r9, #7
+ str ip, [r7, #44]
+ umull r8, r9, r9, r8
+ mov r6, r4
+ adds r4, r4, #64
+ add r5, r6, #80
+ str r5, [r7, #68]
+ stmia r4, {r0, r1, r2, r3}
+ movs r4, #0
+ ldr r0, [ip] @ unaligned
+ ldr r1, [ip, #4] @ unaligned
+ ldr r2, [ip, #8] @ unaligned
+ ldr r3, [ip, #12] @ unaligned
+ vldr d24, [r6, #64]
+ vldr d25, [r6, #72]
+ str r4, [r7, #116]
+ mov r4, r5
+ stmia r5!, {r0, r1, r2, r3}
+ ldr r0, [lr, #16]! @ unaligned
+ ldr r3, [r7, #232]
+ str r6, [r7, #72]
+ ldr r2, [lr, #8] @ unaligned
+ ldr ip, [r3]
+ ldr r6, [r7, #236]
+ ldr r1, [lr, #4] @ unaligned
+ ldr r3, [lr, #12] @ unaligned
+ ldr r5, [r7, #72]
+ vldr d26, [r5, #80]
+ vldr d27, [r5, #88]
+ str ip, [r7, #120]
+ stmia r4!, {r0, r1, r2, r3}
+ lsrs r2, r9, #7
+ ldr r3, [r7, #232]
+ str r6, [r7, #112]
+ vldr d28, [r5, #80]
+ vldr d29, [r5, #88]
+ ldr r3, [r3, #4]
+ str r3, [r7, #124]
vldr d22, [r7, #112]
vldr d23, [r7, #120]
- vldr d24, [r0, #64]
- vldr d25, [r0, #72]
- vld1.64 {d26-d27}, [ip:64]
- vldr d28, [ip, #16]
- vldr d29, [ip, #24]
beq .L26
- ldr r1, [r0, #64]
- lsls r2, r3, #8
- sub r3, r2, r3, lsl #6
- str r3, [r7, #4]
- ldr r2, [r0, #72]
- str r1, [r7, #40]
- mov r1, r3
- ldr r3, [r0, #68]
+ lsls r1, r2, #8
+ ldr r4, [r5, #64]
+ sub r2, r1, r2, lsl #6
+ str r2, [r7, #4]
vldr d0, .L92
vldr d1, .L92+8
- str r2, [r7, #32]
- adds r2, r4, #2
- str r3, [r7, #36]
- ldr r3, [r0, #76]
- str r2, [r7, #48]
- mov r2, r0
mov r0, fp
+ mov r1, r2
+ ldr r2, [r5, #68]
+ str r4, [r7, #40]
+ ldr r4, [r5, #72]
+ str r2, [r7, #36]
+ ldr r2, [r5, #76]
+ str r4, [r7, #32]
+ adds r4, r6, #2
str r10, [r7, #64]
- str r3, [r7, #28]
- adds r3, r0, r1
- mov r1, r6
- str r3, [r7, #16]
- add r3, r2, #80
- mov r2, r5
- str r3, [r7, #68]
+ str r2, [r7, #28]
+ adds r2, r0, r1
+ mov r1, ip
+ str r2, [r7, #16]
+ mov r2, r3
+ str r4, [r7, #48]
.L4:
ldr r0, [r7, #44]
add r8, r7, #28
@@ -749,14 +763,12 @@ CRYPTO_chacha_20_neon:
rsb fp, fp, r1
lsrs fp, fp, #6
beq .L6
- ldr r6, [r7, #72]
ldr r5, [r7, #12]
ldr r4, [r7, #16]
- mov r3, r6
- adds r3, r3, #80
+ ldr r6, [r7, #72]
+ ldr lr, [r7, #68]
vldr d30, .L95
vldr d31, .L95+8
- mov lr, r3
str fp, [r7, #104]
str fp, [r7, #108]
.L8:
@@ -1299,18 +1311,15 @@ CRYPTO_chacha_20_neon:
vldm sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
.L89:
- ldr r4, [r7, #12]
+ ldr r5, [r7, #12]
vadd.i32 q12, q12, q10
- ldr r5, [r7, #72]
+ ldr r4, [r7, #68]
cmp r9, #31
- ldr r0, [r4] @ unaligned
- add r6, r5, #80
- ldr r1, [r4, #4] @ unaligned
- ldr r2, [r4, #8] @ unaligned
- mov r5, r6
- ldr r3, [r4, #12] @ unaligned
- mov r4, r6
- str r6, [r7, #68]
+ ldr r0, [r5] @ unaligned
+ ldr r1, [r5, #4] @ unaligned
+ mov r6, r4
+ ldr r2, [r5, #8] @ unaligned
+ ldr r3, [r5, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
ldr r2, [r7, #72]
ldr r6, [r7, #16]
@@ -1355,13 +1364,13 @@ CRYPTO_chacha_20_neon:
str fp, [r7, #16]
b .L2
.L90:
- ldr r3, [r7, #12]
+ mov r3, r5
+ ldr r4, [r7, #68]
+ ldr r0, [r3, #16]! @ unaligned
add lr, r1, #16
- mov r4, r5
- mov r6, r5
mov r5, r1
vadd.i32 q13, q13, q15
- ldr r0, [r3, #16]! @ unaligned
+ mov r6, r4
cmp r9, #47
ldr r1, [r3, #4] @ unaligned
ldr r2, [r3, #8] @ unaligned