Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/quite/humla-spongycastle.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2014-03-14 08:32:34 +0400
committerPeter Dettman <peter.dettman@bouncycastle.org>2014-03-14 08:32:34 +0400
commit4290ad85821c3f046f885d14d8fafa3ce26bf168 (patch)
tree68852ac2199d2a89958fd79c5fd6bc7d84428b35 /core/src/main/java/org
parentaf4ac75432712f98940242862d1b5b242b93248c (diff)
Optimize squaring by inlining the shiftUpBit
Diffstat (limited to 'core/src/main/java/org')
-rw-r--r--core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat192.java102
-rw-r--r--core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat224.java118
-rw-r--r--core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat256.java134
3 files changed, 252 insertions, 102 deletions
diff --git a/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat192.java b/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat192.java
index f080cc27..c622350a 100644
--- a/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat192.java
+++ b/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat192.java
@@ -596,8 +596,9 @@ public abstract class Nat192
long x_0 = x[0] & M;
long zz_1;
+ int c = 0, w;
{
- int c = 0, i = 5, j = 12;
+ int i = 5, j = 12;
do
{
long xVal = (x[i--] & M);
@@ -611,7 +612,8 @@ public abstract class Nat192
{
long p = x_0 * x_0;
zz_1 = ((c << 31) & M) | (p >>> 33);
- zz[0] = (int)(p >>> 1);
+ zz[0] = (int)p;
+ c = (int)(p >>> 32) & 1;
}
}
@@ -620,7 +622,9 @@ public abstract class Nat192
{
zz_1 += x_1 * x_0;
- zz[1] = (int)zz_1;
+ w = (int)zz_1;
+ zz[1] = (w << 1) | c;
+ c = w >>> 31;
zz_2 += zz_1 >>> 32;
}
@@ -629,7 +633,9 @@ public abstract class Nat192
long zz_4 = zz[4] & M;
{
zz_2 += x_2 * x_0;
- zz[2] = (int)zz_2;
+ w = (int)zz_2;
+ zz[2] = (w << 1) | c;
+ c = w >>> 31;
zz_3 += (zz_2 >>> 32) + x_2 * x_1;
zz_4 += zz_3 >>> 32;
zz_3 &= M;
@@ -640,7 +646,9 @@ public abstract class Nat192
long zz_6 = zz[6] & M;
{
zz_3 += x_3 * x_0;
- zz[3] = (int)zz_3;
+ w = (int)zz_3;
+ zz[3] = (w << 1) | c;
+ c = w >>> 31;
zz_4 += (zz_3 >>> 32) + x_3 * x_1;
zz_5 += (zz_4 >>> 32) + x_3 * x_2;
zz_4 &= M;
@@ -653,7 +661,9 @@ public abstract class Nat192
long zz_8 = zz[8] & M;
{
zz_4 += x_4 * x_0;
- zz[4] = (int)zz_4;
+ w = (int)zz_4;
+ zz[4] = (w << 1) | c;
+ c = w >>> 31;
zz_5 += (zz_4 >>> 32) + x_4 * x_1;
zz_6 += (zz_5 >>> 32) + x_4 * x_2;
zz_5 &= M;
@@ -668,7 +678,9 @@ public abstract class Nat192
long zz_10 = zz[10] & M;
{
zz_5 += x_5 * x_0;
- zz[5] = (int)zz_5;
+ w = (int)zz_5;
+ zz[5] = (w << 1) | c;
+ c = w >>> 31;
zz_6 += (zz_5 >>> 32) + x_5 * x_1;
zz_7 += (zz_6 >>> 32) + x_5 * x_2;
zz_8 += (zz_7 >>> 32) + x_5 * x_3;
@@ -676,14 +688,23 @@ public abstract class Nat192
zz_10 += zz_9 >>> 32;
}
- zz[6] = (int)zz_6;
- zz[7] = (int)zz_7;
- zz[8] = (int)zz_8;
- zz[9] = (int)zz_9;
- zz[10] = (int)zz_10;
- zz[11] += (int)(zz_10 >>> 32);
-
- Nat.shiftUpBit(12, zz, (int)x_0 << 31);
+ w = (int)zz_6;
+ zz[6] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_7;
+ zz[7] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_8;
+ zz[8] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_9;
+ zz[9] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_10;
+ zz[10] = (w << 1) | c;
+ c = w >>> 31;
+ w = zz[11] + (int)(zz_10 >> 32);
+ zz[11] = (w << 1) | c;
}
public static void square(int[] x, int xOff, int[] zz, int zzOff)
@@ -691,8 +712,9 @@ public abstract class Nat192
long x_0 = x[xOff + 0] & M;
long zz_1;
+ int c = 0, w;
{
- int c = 0, i = 5, j = 12;
+ int i = 5, j = 12;
do
{
long xVal = (x[xOff + i--] & M);
@@ -706,7 +728,8 @@ public abstract class Nat192
{
long p = x_0 * x_0;
zz_1 = ((c << 31) & M) | (p >>> 33);
- zz[zzOff + 0] = (int)(p >>> 1);
+ zz[zzOff + 0] = (int)p;
+ c = (int)(p >>> 32) & 1;
}
}
@@ -715,7 +738,9 @@ public abstract class Nat192
{
zz_1 += x_1 * x_0;
- zz[zzOff + 1] = (int)zz_1;
+ w = (int)zz_1;
+ zz[zzOff + 1] = (w << 1) | c;
+ c = w >>> 31;
zz_2 += zz_1 >>> 32;
}
@@ -724,7 +749,9 @@ public abstract class Nat192
long zz_4 = zz[zzOff + 4] & M;
{
zz_2 += x_2 * x_0;
- zz[zzOff + 2] = (int)zz_2;
+ w = (int)zz_2;
+ zz[zzOff + 2] = (w << 1) | c;
+ c = w >>> 31;
zz_3 += (zz_2 >>> 32) + x_2 * x_1;
zz_4 += zz_3 >>> 32;
zz_3 &= M;
@@ -735,7 +762,9 @@ public abstract class Nat192
long zz_6 = zz[zzOff + 6] & M;
{
zz_3 += x_3 * x_0;
- zz[zzOff + 3] = (int)zz_3;
+ w = (int)zz_3;
+ zz[zzOff + 3] = (w << 1) | c;
+ c = w >>> 31;
zz_4 += (zz_3 >>> 32) + x_3 * x_1;
zz_5 += (zz_4 >>> 32) + x_3 * x_2;
zz_4 &= M;
@@ -748,7 +777,9 @@ public abstract class Nat192
long zz_8 = zz[zzOff + 8] & M;
{
zz_4 += x_4 * x_0;
- zz[zzOff + 4] = (int)zz_4;
+ w = (int)zz_4;
+ zz[zzOff + 4] = (w << 1) | c;
+ c = w >>> 31;
zz_5 += (zz_4 >>> 32) + x_4 * x_1;
zz_6 += (zz_5 >>> 32) + x_4 * x_2;
zz_5 &= M;
@@ -763,7 +794,9 @@ public abstract class Nat192
long zz_10 = zz[zzOff + 10] & M;
{
zz_5 += x_5 * x_0;
- zz[zzOff + 5] = (int)zz_5;
+ w = (int)zz_5;
+ zz[zzOff + 5] = (w << 1) | c;
+ c = w >>> 31;
zz_6 += (zz_5 >>> 32) + x_5 * x_1;
zz_7 += (zz_6 >>> 32) + x_5 * x_2;
zz_8 += (zz_7 >>> 32) + x_5 * x_3;
@@ -771,14 +804,23 @@ public abstract class Nat192
zz_10 += zz_9 >>> 32;
}
- zz[zzOff + 6] = (int)zz_6;
- zz[zzOff + 7] = (int)zz_7;
- zz[zzOff + 8] = (int)zz_8;
- zz[zzOff + 9] = (int)zz_9;
- zz[zzOff + 10] = (int)zz_10;
- zz[zzOff + 11] += (int)(zz_10 >>> 32);
-
- Nat.shiftUpBit(12, zz, zzOff, (int)x_0 << 31);
+ w = (int)zz_6;
+ zz[zzOff + 6] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_7;
+ zz[zzOff + 7] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_8;
+ zz[zzOff + 8] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_9;
+ zz[zzOff + 9] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_10;
+ zz[zzOff + 10] = (w << 1) | c;
+ c = w >>> 31;
+ w = zz[zzOff + 11] + (int)(zz_10 >> 32);
+ zz[zzOff + 11] = (w << 1) | c;
}
public static int sub(int[] x, int[] y, int[] z)
diff --git a/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat224.java b/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat224.java
index 8bdb5be6..8f1a66dc 100644
--- a/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat224.java
+++ b/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat224.java
@@ -748,8 +748,9 @@ public abstract class Nat224
long x_0 = x[0] & M;
long zz_1;
+ int c = 0, w;
{
- int c = 0, i = 6, j = 14;
+ int i = 6, j = 14;
do
{
long xVal = (x[i--] & M);
@@ -763,7 +764,8 @@ public abstract class Nat224
{
long p = x_0 * x_0;
zz_1 = ((c << 31) & M) | (p >>> 33);
- zz[0] = (int)(p >>> 1);
+ zz[0] = (int)p;
+ c = (int)(p >>> 32) & 1;
}
}
@@ -772,7 +774,9 @@ public abstract class Nat224
{
zz_1 += x_1 * x_0;
- zz[1] = (int)zz_1;
+ w = (int)zz_1;
+ zz[1] = (w << 1) | c;
+ c = w >>> 31;
zz_2 += zz_1 >>> 32;
}
@@ -781,7 +785,9 @@ public abstract class Nat224
long zz_4 = zz[4] & M;
{
zz_2 += x_2 * x_0;
- zz[2] = (int)zz_2;
+ w = (int)zz_2;
+ zz[2] = (w << 1) | c;
+ c = w >>> 31;
zz_3 += (zz_2 >>> 32) + x_2 * x_1;
zz_4 += zz_3 >>> 32;
zz_3 &= M;
@@ -792,7 +798,9 @@ public abstract class Nat224
long zz_6 = zz[6] & M;
{
zz_3 += x_3 * x_0;
- zz[3] = (int)zz_3;
+ w = (int)zz_3;
+ zz[3] = (w << 1) | c;
+ c = w >>> 31;
zz_4 += (zz_3 >>> 32) + x_3 * x_1;
zz_5 += (zz_4 >>> 32) + x_3 * x_2;
zz_4 &= M;
@@ -805,7 +813,9 @@ public abstract class Nat224
long zz_8 = zz[8] & M;
{
zz_4 += x_4 * x_0;
- zz[4] = (int)zz_4;
+ w = (int)zz_4;
+ zz[4] = (w << 1) | c;
+ c = w >>> 31;
zz_5 += (zz_4 >>> 32) + x_4 * x_1;
zz_6 += (zz_5 >>> 32) + x_4 * x_2;
zz_5 &= M;
@@ -820,7 +830,9 @@ public abstract class Nat224
long zz_10 = zz[10] & M;
{
zz_5 += x_5 * x_0;
- zz[5] = (int)zz_5;
+ w = (int)zz_5;
+ zz[5] = (w << 1) | c;
+ c = w >>> 31;
zz_6 += (zz_5 >>> 32) + x_5 * x_1;
zz_7 += (zz_6 >>> 32) + x_5 * x_2;
zz_6 &= M;
@@ -837,7 +849,9 @@ public abstract class Nat224
long zz_12 = zz[12] & M;
{
zz_6 += x_6 * x_0;
- zz[6] = (int)zz_6;
+ w = (int)zz_6;
+ zz[6] = (w << 1) | c;
+ c = w >>> 31;
zz_7 += (zz_6 >>> 32) + x_6 * x_1;
zz_8 += (zz_7 >>> 32) + x_6 * x_2;
zz_9 += (zz_8 >>> 32) + x_6 * x_3;
@@ -846,15 +860,26 @@ public abstract class Nat224
zz_12 += zz_11 >>> 32;
}
- zz[7] = (int)zz_7;
- zz[8] = (int)zz_8;
- zz[9] = (int)zz_9;
- zz[10] = (int)zz_10;
- zz[11] = (int)zz_11;
- zz[12] = (int)zz_12;
- zz[13] += (int)(zz_12 >>> 32);
-
- Nat.shiftUpBit(14, zz, (int)x_0 << 31);
+ w = (int)zz_7;
+ zz[7] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_8;
+ zz[8] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_9;
+ zz[9] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_10;
+ zz[10] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_11;
+ zz[11] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_12;
+ zz[12] = (w << 1) | c;
+ c = w >>> 31;
+ w = zz[13] + (int)(zz_12 >> 32);
+ zz[13] = (w << 1) | c;
}
public static void square(int[] x, int xOff, int[] zz, int zzOff)
@@ -862,8 +887,9 @@ public abstract class Nat224
long x_0 = x[xOff + 0] & M;
long zz_1;
+ int c = 0, w;
{
- int c = 0, i = 6, j = 14;
+ int i = 6, j = 14;
do
{
long xVal = (x[xOff + i--] & M);
@@ -877,7 +903,8 @@ public abstract class Nat224
{
long p = x_0 * x_0;
zz_1 = ((c << 31) & M) | (p >>> 33);
- zz[zzOff + 0] = (int)(p >>> 1);
+ zz[zzOff + 0] = (int)p;
+ c = (int)(p >>> 32) & 1;
}
}
@@ -886,7 +913,9 @@ public abstract class Nat224
{
zz_1 += x_1 * x_0;
- zz[zzOff + 1] = (int)zz_1;
+ w = (int)zz_1;
+ zz[zzOff + 1] = (w << 1) | c;
+ c = w >>> 31;
zz_2 += zz_1 >>> 32;
}
@@ -895,7 +924,9 @@ public abstract class Nat224
long zz_4 = zz[zzOff + 4] & M;
{
zz_2 += x_2 * x_0;
- zz[zzOff + 2] = (int)zz_2;
+ w = (int)zz_2;
+ zz[zzOff + 2] = (w << 1) | c;
+ c = w >>> 31;
zz_3 += (zz_2 >>> 32) + x_2 * x_1;
zz_4 += zz_3 >>> 32;
zz_3 &= M;
@@ -906,7 +937,9 @@ public abstract class Nat224
long zz_6 = zz[zzOff + 6] & M;
{
zz_3 += x_3 * x_0;
- zz[zzOff + 3] = (int)zz_3;
+ w = (int)zz_3;
+ zz[zzOff + 3] = (w << 1) | c;
+ c = w >>> 31;
zz_4 += (zz_3 >>> 32) + x_3 * x_1;
zz_5 += (zz_4 >>> 32) + x_3 * x_2;
zz_4 &= M;
@@ -919,7 +952,9 @@ public abstract class Nat224
long zz_8 = zz[zzOff + 8] & M;
{
zz_4 += x_4 * x_0;
- zz[zzOff + 4] = (int)zz_4;
+ w = (int)zz_4;
+ zz[zzOff + 4] = (w << 1) | c;
+ c = w >>> 31;
zz_5 += (zz_4 >>> 32) + x_4 * x_1;
zz_6 += (zz_5 >>> 32) + x_4 * x_2;
zz_5 &= M;
@@ -934,7 +969,9 @@ public abstract class Nat224
long zz_10 = zz[zzOff + 10] & M;
{
zz_5 += x_5 * x_0;
- zz[zzOff + 5] = (int)zz_5;
+ w = (int)zz_5;
+ zz[zzOff + 5] = (w << 1) | c;
+ c = w >>> 31;
zz_6 += (zz_5 >>> 32) + x_5 * x_1;
zz_7 += (zz_6 >>> 32) + x_5 * x_2;
zz_6 &= M;
@@ -951,7 +988,9 @@ public abstract class Nat224
long zz_12 = zz[zzOff + 12] & M;
{
zz_6 += x_6 * x_0;
- zz[zzOff + 6] = (int)zz_6;
+ w = (int)zz_6;
+ zz[zzOff + 6] = (w << 1) | c;
+ c = w >>> 31;
zz_7 += (zz_6 >>> 32) + x_6 * x_1;
zz_8 += (zz_7 >>> 32) + x_6 * x_2;
zz_9 += (zz_8 >>> 32) + x_6 * x_3;
@@ -960,15 +999,26 @@ public abstract class Nat224
zz_12 += zz_11 >>> 32;
}
- zz[zzOff + 7] = (int)zz_7;
- zz[zzOff + 8] = (int)zz_8;
- zz[zzOff + 9] = (int)zz_9;
- zz[zzOff + 10] = (int)zz_10;
- zz[zzOff + 11] = (int)zz_11;
- zz[zzOff + 12] = (int)zz_12;
- zz[zzOff + 13] += (int)(zz_12 >>> 32);
-
- Nat.shiftUpBit(14, zz, zzOff, (int)x_0 << 31);
+ w = (int)zz_7;
+ zz[zzOff + 7] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_8;
+ zz[zzOff + 8] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_9;
+ zz[zzOff + 9] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_10;
+ zz[zzOff + 10] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_11;
+ zz[zzOff + 11] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_12;
+ zz[zzOff + 12] = (w << 1) | c;
+ c = w >>> 31;
+ w = zz[zzOff + 13] + (int)(zz_12 >> 32);
+ zz[zzOff + 13] = (w << 1) | c;
}
public static int sub(int[] x, int[] y, int[] z)
diff --git a/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat256.java b/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat256.java
index 4bbc687f..6947317b 100644
--- a/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat256.java
+++ b/core/src/main/java/org/bouncycastle/math/ec/custom/sec/Nat256.java
@@ -806,8 +806,9 @@ public abstract class Nat256
long x_0 = x[0] & M;
long zz_1;
+ int c = 0, w;
{
- int c = 0, i = 7, j = 16;
+ int i = 7, j = 16;
do
{
long xVal = (x[i--] & M);
@@ -821,7 +822,8 @@ public abstract class Nat256
{
long p = x_0 * x_0;
zz_1 = ((c << 31) & M) | (p >>> 33);
- zz[0] = (int)(p >>> 1);
+ zz[0] = (int)p;
+ c = (int)(p >>> 32) & 1;
}
}
@@ -830,7 +832,9 @@ public abstract class Nat256
{
zz_1 += x_1 * x_0;
- zz[1] = (int)zz_1;
+ w = (int)zz_1;
+ zz[1] = (w << 1) | c;
+ c = w >>> 31;
zz_2 += zz_1 >>> 32;
}
@@ -839,7 +843,9 @@ public abstract class Nat256
long zz_4 = zz[4] & M;
{
zz_2 += x_2 * x_0;
- zz[2] = (int)zz_2;
+ w = (int)zz_2;
+ zz[2] = (w << 1) | c;
+ c = w >>> 31;
zz_3 += (zz_2 >>> 32) + x_2 * x_1;
zz_4 += zz_3 >>> 32;
zz_3 &= M;
@@ -850,7 +856,9 @@ public abstract class Nat256
long zz_6 = zz[6] & M;
{
zz_3 += x_3 * x_0;
- zz[3] = (int)zz_3;
+ w = (int)zz_3;
+ zz[3] = (w << 1) | c;
+ c = w >>> 31;
zz_4 += (zz_3 >>> 32) + x_3 * x_1;
zz_5 += (zz_4 >>> 32) + x_3 * x_2;
zz_4 &= M;
@@ -863,7 +871,9 @@ public abstract class Nat256
long zz_8 = zz[8] & M;
{
zz_4 += x_4 * x_0;
- zz[4] = (int)zz_4;
+ w = (int)zz_4;
+ zz[4] = (w << 1) | c;
+ c = w >>> 31;
zz_5 += (zz_4 >>> 32) + x_4 * x_1;
zz_6 += (zz_5 >>> 32) + x_4 * x_2;
zz_5 &= M;
@@ -878,7 +888,9 @@ public abstract class Nat256
long zz_10 = zz[10] & M;
{
zz_5 += x_5 * x_0;
- zz[5] = (int)zz_5;
+ w = (int)zz_5;
+ zz[5] = (w << 1) | c;
+ c = w >>> 31;
zz_6 += (zz_5 >>> 32) + x_5 * x_1;
zz_7 += (zz_6 >>> 32) + x_5 * x_2;
zz_6 &= M;
@@ -895,7 +907,9 @@ public abstract class Nat256
long zz_12 = zz[12] & M;
{
zz_6 += x_6 * x_0;
- zz[6] = (int)zz_6;
+ w = (int)zz_6;
+ zz[6] = (w << 1) | c;
+ c = w >>> 31;
zz_7 += (zz_6 >>> 32) + x_6 * x_1;
zz_8 += (zz_7 >>> 32) + x_6 * x_2;
zz_7 &= M;
@@ -914,7 +928,9 @@ public abstract class Nat256
long zz_14 = zz[14] & M;
{
zz_7 += x_7 * x_0;
- zz[7] = (int)zz_7;
+ w = (int)zz_7;
+ zz[7] = (w << 1) | c;
+ c = w >>> 31;
zz_8 += (zz_7 >>> 32) + x_7 * x_1;
zz_9 += (zz_8 >>> 32) + x_7 * x_2;
zz_10 += (zz_9 >>> 32) + x_7 * x_3;
@@ -924,16 +940,29 @@ public abstract class Nat256
zz_14 += zz_13 >>> 32;
}
- zz[8] = (int)zz_8;
- zz[9] = (int)zz_9;
- zz[10] = (int)zz_10;
- zz[11] = (int)zz_11;
- zz[12] = (int)zz_12;
- zz[13] = (int)zz_13;
- zz[14] = (int)zz_14;
- zz[15] += (int)(zz_14 >>> 32);
-
- Nat.shiftUpBit(16, zz, (int)x_0 << 31);
+ w = (int)zz_8;
+ zz[8] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_9;
+ zz[9] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_10;
+ zz[10] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_11;
+ zz[11] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_12;
+ zz[12] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_13;
+ zz[13] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_14;
+ zz[14] = (w << 1) | c;
+ c = w >>> 31;
+ w = zz[15] + (int)(zz_14 >> 32);
+ zz[15] = (w << 1) | c;
}
public static void square(int[] x, int xOff, int[] zz, int zzOff)
@@ -941,8 +970,9 @@ public abstract class Nat256
long x_0 = x[xOff + 0] & M;
long zz_1;
+ int c = 0, w;
{
- int c = 0, i = 7, j = 16;
+ int i = 7, j = 16;
do
{
long xVal = (x[xOff + i--] & M);
@@ -956,7 +986,8 @@ public abstract class Nat256
{
long p = x_0 * x_0;
zz_1 = ((c << 31) & M) | (p >>> 33);
- zz[zzOff + 0] = (int)(p >>> 1);
+ zz[zzOff + 0] = (int)p;
+ c = (int)(p >>> 32) & 1;
}
}
@@ -965,7 +996,9 @@ public abstract class Nat256
{
zz_1 += x_1 * x_0;
- zz[zzOff + 1] = (int)zz_1;
+ w = (int)zz_1;
+ zz[zzOff + 1] = (w << 1) | c;
+ c = w >>> 31;
zz_2 += zz_1 >>> 32;
}
@@ -974,7 +1007,9 @@ public abstract class Nat256
long zz_4 = zz[zzOff + 4] & M;
{
zz_2 += x_2 * x_0;
- zz[zzOff + 2] = (int)zz_2;
+ w = (int)zz_2;
+ zz[zzOff + 2] = (w << 1) | c;
+ c = w >>> 31;
zz_3 += (zz_2 >>> 32) + x_2 * x_1;
zz_4 += zz_3 >>> 32;
zz_3 &= M;
@@ -985,7 +1020,9 @@ public abstract class Nat256
long zz_6 = zz[zzOff + 6] & M;
{
zz_3 += x_3 * x_0;
- zz[zzOff + 3] = (int)zz_3;
+ w = (int)zz_3;
+ zz[zzOff + 3] = (w << 1) | c;
+ c = w >>> 31;
zz_4 += (zz_3 >>> 32) + x_3 * x_1;
zz_5 += (zz_4 >>> 32) + x_3 * x_2;
zz_4 &= M;
@@ -998,7 +1035,9 @@ public abstract class Nat256
long zz_8 = zz[zzOff + 8] & M;
{
zz_4 += x_4 * x_0;
- zz[zzOff + 4] = (int)zz_4;
+ w = (int)zz_4;
+ zz[zzOff + 4] = (w << 1) | c;
+ c = w >>> 31;
zz_5 += (zz_4 >>> 32) + x_4 * x_1;
zz_6 += (zz_5 >>> 32) + x_4 * x_2;
zz_5 &= M;
@@ -1013,7 +1052,9 @@ public abstract class Nat256
long zz_10 = zz[zzOff + 10] & M;
{
zz_5 += x_5 * x_0;
- zz[zzOff + 5] = (int)zz_5;
+ w = (int)zz_5;
+ zz[zzOff + 5] = (w << 1) | c;
+ c = w >>> 31;
zz_6 += (zz_5 >>> 32) + x_5 * x_1;
zz_7 += (zz_6 >>> 32) + x_5 * x_2;
zz_6 &= M;
@@ -1030,7 +1071,9 @@ public abstract class Nat256
long zz_12 = zz[zzOff + 12] & M;
{
zz_6 += x_6 * x_0;
- zz[zzOff + 6] = (int)zz_6;
+ w = (int)zz_6;
+ zz[zzOff + 6] = (w << 1) | c;
+ c = w >>> 31;
zz_7 += (zz_6 >>> 32) + x_6 * x_1;
zz_8 += (zz_7 >>> 32) + x_6 * x_2;
zz_7 &= M;
@@ -1049,7 +1092,9 @@ public abstract class Nat256
long zz_14 = zz[zzOff + 14] & M;
{
zz_7 += x_7 * x_0;
- zz[zzOff + 7] = (int)zz_7;
+ w = (int)zz_7;
+ zz[zzOff + 7] = (w << 1) | c;
+ c = w >>> 31;
zz_8 += (zz_7 >>> 32) + x_7 * x_1;
zz_9 += (zz_8 >>> 32) + x_7 * x_2;
zz_10 += (zz_9 >>> 32) + x_7 * x_3;
@@ -1059,16 +1104,29 @@ public abstract class Nat256
zz_14 += zz_13 >>> 32;
}
- zz[zzOff + 8] = (int)zz_8;
- zz[zzOff + 9] = (int)zz_9;
- zz[zzOff + 10] = (int)zz_10;
- zz[zzOff + 11] = (int)zz_11;
- zz[zzOff + 12] = (int)zz_12;
- zz[zzOff + 13] = (int)zz_13;
- zz[zzOff + 14] = (int)zz_14;
- zz[zzOff + 15] += (int)(zz_14 >>> 32);
-
- Nat.shiftUpBit(16, zz, zzOff, (int)x_0 << 31);
+ w = (int)zz_8;
+ zz[zzOff + 8] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_9;
+ zz[zzOff + 9] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_10;
+ zz[zzOff + 10] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_11;
+ zz[zzOff + 11] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_12;
+ zz[zzOff + 12] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_13;
+ zz[zzOff + 13] = (w << 1) | c;
+ c = w >>> 31;
+ w = (int)zz_14;
+ zz[zzOff + 14] = (w << 1) | c;
+ c = w >>> 31;
+ w = zz[zzOff + 15] + (int)(zz_14 >> 32);
+ zz[zzOff + 15] = (w << 1) | c;
}
public static int sub(int[] x, int[] y, int[] z)