Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/boringssl.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Langley <agl@chromium.org>2014-06-20 23:00:00 +0400
committerAdam Langley <agl@chromium.org>2014-06-21 00:17:33 +0400
commit75b833cc819a9d189adb0fdd56327bee600ff9e9 (patch)
tree35da51859fe77699452f6046d327ac8874bf7c55 /crypto/bn/asm
parentb36a3156b6cc76294267d7083ddd31e6e5f9c170 (diff)
OpenSSL: make final reduction in Montgomery multiplication constant-time.
(The issue was reported by Shay Gueron.) The final reduction in Montgomery multiplication computes if (X >= m) then X = X - m else X = X In OpenSSL, this was done by computing T = X - m, doing a constant-time selection of the *addresses* of X and T, and loading from the resulting address. But this is not cache-neutral. This patch changes the behaviour by loading both X and T into registers, and doing a constant-time selection of the *values*. TODO(fork): only some of the fixes from the original patch still apply to the 1.0.2 code.
Diffstat (limited to 'crypto/bn/asm')
-rw-r--r--crypto/bn/asm/x86-mont.pl15
-rw-r--r--crypto/bn/asm/x86_64-mont.pl57
-rw-r--r--crypto/bn/asm/x86_64-mont5.pl13
3 files changed, 41 insertions, 44 deletions
diff --git a/crypto/bn/asm/x86-mont.pl b/crypto/bn/asm/x86-mont.pl
index e8f6b050..0626b487 100644
--- a/crypto/bn/asm/x86-mont.pl
+++ b/crypto/bn/asm/x86-mont.pl
@@ -570,16 +570,15 @@ $sbit=$num;
&jge (&label("sub"));
&sbb ("eax",0); # handle upmost overflow bit
- &and ($tp,"eax");
- &not ("eax");
- &mov ($np,$rp);
- &and ($np,"eax");
- &or ($tp,$np); # tp=carry?tp:rp
&set_label("copy",16); # copy or in-place refresh
- &mov ("eax",&DWP(0,$tp,$num,4));
- &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
- &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
+ &mov ("edx",&DWP(0,$tp,$num,4));
+ &mov ($np,&DWP(0,$rp,$num,4));
+ &xor ("edx",$np); # conditional select
+ &and ("edx","eax");
+ &xor ("edx",$np);
+ &mov (&DWP(0,$tp,$num,4),$j) # zap temporary vector
+ &mov (&DWP(0,$rp,$num,4),"edx"); # rp[i]=tp[i]
&dec ($num);
&jge (&label("copy"));
diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl
index 38039286..29c0f9c4 100644
--- a/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -273,22 +273,21 @@ $code.=<<___;
mov %rax,($rp,$i,8) # rp[i]=tp[i]-np[i]
mov 8($ap,$i,8),%rax # tp[i+1]
lea 1($i),$i # i++
- dec $j # doesnn't affect CF!
+ dec $j # doesn't affect CF!
jnz .Lsub
sbb \$0,%rax # handle upmost overflow bit
xor $i,$i
- and %rax,$ap
- not %rax
- mov $rp,$np
- and %rax,$np
mov $num,$j # j=num
- or $np,$ap # ap=borrow?tp:rp
.align 16
.Lcopy: # copy or in-place refresh
- mov ($ap,$i,8),%rax
+ mov (%rsp,$i,8),$ap
+ mov ($rp,$i,8),$np
+ xor $np,$ap # conditional select:
+ and %rax,$ap # ((ap ^ np) & %rax) ^ np
+ xor $np,$ap # ap = borrow?tp:rp
mov $i,(%rsp,$i,8) # zap temporary vector
- mov %rax,($rp,$i,8) # rp[i]=tp[i]
+ mov $ap,($rp,$i,8) # rp[i]=tp[i]
lea 1($i),$i
sub \$1,$j
jnz .Lcopy
@@ -643,7 +642,6 @@ my @ri=("%rax","%rdx",$m0,$m1);
$code.=<<___;
mov 16(%rsp,$num,8),$rp # restore $rp
mov 0(%rsp),@ri[0] # tp[0]
- pxor %xmm0,%xmm0
mov 8(%rsp),@ri[1] # tp[1]
shr \$2,$num # num/=4
lea (%rsp),$ap # borrow ap for tp
@@ -681,35 +679,36 @@ $code.=<<___;
mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i]
sbb \$0,@ri[0] # handle upmost overflow bit
+ mov @ri[0],%xmm0
+ punpcklqdq %xmm0,%xmm0 # extend mask to 128 bits
mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i]
xor $i,$i # i=0
- and @ri[0],$ap
- not @ri[0]
- mov $rp,$np
- and @ri[0],$np
- lea -1($num),$j
- or $np,$ap # ap=borrow?tp:rp
-
- movdqu ($ap),%xmm1
- movdqa %xmm0,(%rsp)
- movdqu %xmm1,($rp)
+
+ mov $num,$j
+ pxor %xmm5,%xmm5
jmp .Lcopy4x
.align 16
-.Lcopy4x: # copy or in-place refresh
- movdqu 16($ap,$i),%xmm2
- movdqu 32($ap,$i),%xmm1
- movdqa %xmm0,16(%rsp,$i)
- movdqu %xmm2,16($rp,$i)
- movdqa %xmm0,32(%rsp,$i)
- movdqu %xmm1,32($rp,$i)
+.Lcopy4x: # copy or in-place refresh
+ movdqu (%rsp,$i),%xmm2
+ movdqu 16(%rsp,$i),%xmm4
+ movdqu ($rp,$i),%xmm1
+ movdqu 16($rp,$i),%xmm3
+ pxor %xmm1,%xmm2 # conditional select
+ pxor %xmm3,%xmm4
+ pand %xmm0,%xmm2
+ pand %xmm0,%xmm4
+ pxor %xmm1,%xmm2
+ pxor %xmm3,%xmm4
+ movdqu %xmm2,($rp,$i)
+ movdqu %xmm4,16($rp,$i)
+ movdqa %xmm5,(%rsp,$i) # zap temporary vectors
+ movdqa %xmm5,16(%rsp,$i)
+
lea 32($i),$i
dec $j
jnz .Lcopy4x
shl \$2,$num
- movdqu 16($ap,$i),%xmm2
- movdqa %xmm0,16(%rsp,$i)
- movdqu %xmm2,16($rp,$i)
___
}
$code.=<<___;
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index c107df95..85386c16 100644
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -330,17 +330,16 @@ $code.=<<___;
sbb \$0,%rax # handle upmost overflow bit
xor $i,$i
- and %rax,$ap
- not %rax
- mov $rp,$np
- and %rax,$np
mov $num,$j # j=num
- or $np,$ap # ap=borrow?tp:rp
.align 16
.Lcopy: # copy or in-place refresh
- mov ($ap,$i,8),%rax
+ mov (%rsp,$i,8),$ap
+ mov ($rp,$i,8),$np
+ xor $np,$ap # conditional select:
+ and %rax,$ap # ((ap ^ np) & %rax) ^ np
+ xor $np,$ap # ap = borrow?tp:rp
mov $i,(%rsp,$i,8) # zap temporary vector
- mov %rax,($rp,$i,8) # rp[i]=tp[i]
+ mov $ap,($rp,$i,8) # rp[i]=tp[i]
lea 1($i),$i
sub \$1,$j
jnz .Lcopy