diff options
author | Peter Dettman <peter.dettman@bouncycastle.org> | 2013-10-03 18:37:40 +0400 |
---|---|---|
committer | Peter Dettman <peter.dettman@bouncycastle.org> | 2013-10-03 18:37:40 +0400 |
commit | 96432bf9b9092cdbe3bf18247e7b2931048c3cbf (patch) | |
tree | 7eaaf8e8a2e3f8f776e12de319f3a2a7ae7bdec2 /core/src/main/java | |
parent | d8d2adec6eac15d572b12e73dfb9349407c85a25 (diff) |
Add support for width 3 in multiplication interleaving
Diffstat (limited to 'core/src/main/java')
-rw-r--r-- | core/src/main/java/org/bouncycastle/math/ec/LongArray.java | 167 |
1 files changed, 134 insertions, 33 deletions
diff --git a/core/src/main/java/org/bouncycastle/math/ec/LongArray.java b/core/src/main/java/org/bouncycastle/math/ec/LongArray.java index 74b6bf52..3b9cd801 100644 --- a/core/src/main/java/org/bouncycastle/math/ec/LongArray.java +++ b/core/src/main/java/org/bouncycastle/math/ec/LongArray.java @@ -12,25 +12,63 @@ class LongArray * This expands 8 bit indices into 16 bit contents, by inserting 0s between bits. * In a binary field, this operation is the same as squaring an 8 bit number. */ - private static final int[] INTERLEAVE_TABLE = new int[] { 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, - 0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, - 0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155, 0x0400, 0x0401, 0x0404, - 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 0x0500, - 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, - 0x0555, 0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, - 0x1051, 0x1054, 0x1055, 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 0x1140, 0x1141, 0x1144, - 0x1145, 0x1150, 0x1151, 0x1154, 0x1155, 0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 0x1440, - 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, - 0x1515, 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555, 0x4000, 0x4001, 0x4004, 0x4005, 0x4010, - 0x4011, 0x4014, 0x4015, 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 0x4100, 0x4101, 0x4104, - 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155, 0x4400, - 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, - 0x4455, 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, - 0x4551, 0x4554, 0x4555, 0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 0x5040, 0x5041, 0x5044, - 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 0x5140, - 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155, 0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, - 0x5415, 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, - 0x5511, 0x5514, 0x5515, 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555 }; + private static final int[] INTERLEAVE_TABLE = new int[] + { + 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, + 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, + 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, + 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155, + 0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, + 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, + 0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, + 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555, + 0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, + 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, + 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, + 0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155, + 0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, + 0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, + 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, + 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555, + 0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, + 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, + 0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, + 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155, + 0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, + 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, + 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, + 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555, + 0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, + 0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, + 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, + 0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155, + 0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, + 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, + 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, + 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555 + }; + + /* + * This expands 7 bit indices into 21 bit contents, by inserting 0s between bits. + */ + private static final int[] INTERLEAVE3_TABLE = new int[] { + 0x00000, 0x00001, 0x00008, 0x00009, 0x00040, 0x00041, 0x00048, 0x00049, + 0x00200, 0x00201, 0x00208, 0x00209, 0x00240, 0x00241, 0x00248, 0x00249, + 0x01000, 0x01001, 0x01008, 0x01009, 0x01040, 0x01041, 0x01048, 0x01049, + 0x01200, 0x01201, 0x01208, 0x01209, 0x01240, 0x01241, 0x01248, 0x01249, + 0x08000, 0x08001, 0x08008, 0x08009, 0x08040, 0x08041, 0x08048, 0x08049, + 0x08200, 0x08201, 0x08208, 0x08209, 0x08240, 0x08241, 0x08248, 0x08249, + 0x09000, 0x09001, 0x09008, 0x09009, 0x09040, 0x09041, 0x09048, 0x09049, + 0x09200, 0x09201, 0x09208, 0x09209, 0x09240, 0x09241, 0x09248, 0x09249, + 0x40000, 0x40001, 0x40008, 0x40009, 0x40040, 0x40041, 0x40048, 0x40049, + 0x40200, 0x40201, 0x40208, 0x40209, 0x40240, 0x40241, 0x40248, 0x40249, + 0x41000, 0x41001, 0x41008, 0x41009, 0x41040, 0x41041, 0x41048, 0x41049, + 0x41200, 0x41201, 0x41208, 0x41209, 0x41240, 0x41241, 0x41248, 0x41249, + 0x48000, 0x48001, 0x48008, 0x48009, 0x48040, 0x48041, 0x48048, 0x48049, + 0x48200, 0x48201, 0x48208, 0x48209, 0x48240, 0x48241, 0x48248, 0x48249, + 0x49000, 0x49001, 0x49008, 0x49009, 0x49040, 0x49041, 0x49048, 0x49049, + 0x49200, 0x49201, 0x49208, 0x49209, 0x49240, 0x49241, 0x49248, 0x49249 + }; // For toString(); must have length 64 private static final String ZEROES = "0000000000000000000000000000000000000000000000000000000000000000"; @@ -533,14 +571,23 @@ class LongArray return new LongArray(c); } - // TODO It'd be better to be able to tune the width directly (need support for interleaving arbitrary widths) - int complexity = aLen <= 4 ? 1 : 2; - - int width = 1 << complexity; - int shifts = (64 >>> complexity); + int width, shifts, top; + if (aLen <= 2) + { + // TODO Too high 'shifts' causes sub-optimal performance in 113 bit SEC curves + width = 2; shifts = 32; top = 64; + } + else if (aLen <= 4) + { + width = 3; shifts = 21; top = 63; + } + else + { + width = 4; shifts = 16; top = 64; + } int bExt = bLen; - if ((B.m_ints[bLen - 1] >>> (65 - shifts)) != 0L) + if ((B.m_ints[bLen - 1] >>> (top + 1 - shifts)) != 0L) { ++bExt; } @@ -549,7 +596,14 @@ class LongArray long[] c = new long[cLen << width]; System.arraycopy(B.m_ints, 0, c, 0, bLen); - interleave(A.m_ints, 0, c, bExt, aLen, complexity); + if (width == 3) + { + interleave3(A.m_ints, 0, c, bExt, aLen); + } + else + { + interleave(A.m_ints, 0, c, bExt, aLen, bitLengths[width] - 1); + } int[] ci = new int[1 << width]; for (int i = 1; i < ci.length; ++i) @@ -571,15 +625,23 @@ class LongArray } } - if ((k += width) >= 64) + if ((k += width) >= top) { - break; + if (k >= 64) + { + break; + } + + // NOTE: This adjustment really only designed with width=3 in mind + k = 64 - width; + MASK &= MASK << (top - k); } shiftLeft(c, bExt); } - int ciPos = ci.length, pow2 = ciPos >>> 1, offset = 64; + int ciPos = ci.length, pow2 = ciPos >>> 1; + int offset = top; while (--ciPos > 1) { if (ciPos == pow2) @@ -715,13 +777,52 @@ class LongArray while (pos < _2len) { long mi = m_ints[pos >>> 1]; - r[pos++] = interleave32((int)mi); - r[pos++] = interleave32((int)(mi >>> 32)); + r[pos++] = expand32((int)mi); + r[pos++] = expand32((int)(mi >>> 32)); } return new LongArray(r); } + private static void interleave3(long[] x, int xOff, long[] z, int zOff, int count) + { + for (int i = 0; i < count; ++i) + { + z[zOff + i] = interleave3(x[xOff + i]); + } + } + + private static long interleave3(long x) + { + return (x & (1L << 63)) + | expand21((int)(x & 0x1FFFFFL)) + | expand21((int)((x >>> 21) & 0x1FFFFFL)) << 1 + | expand21((int)((x >>> 42) & 0x1FFFFFL)) << 2; + +// long z = x & (1L << 63), zPos = 0, wPos = 0, xPos = 0; +// for (;;) +// { +// z |= ((x >>> xPos) & 1L) << zPos; +// if (++zPos == 63) +// { +// String sz2 = Long.toBinaryString(z); +// return z; +// } +// if ((xPos += 21) >= 63) +// { +// xPos = ++wPos; +// } +// } + } + + private static long expand21(int n) + { + int r00 = INTERLEAVE3_TABLE[n & 0x7F]; + int r21 = INTERLEAVE3_TABLE[(n >>> 7) & 0x7F]; + int r42 = INTERLEAVE3_TABLE[(n >>> 14) & 0x7F]; + return (r42 & 0xFFFFFFFFL) << 42 | (r21 & 0xFFFFFFFFL) << 21 | (r00 & 0xFFFFFFFFL); + } + private static void interleave(long[] x, int xOff, long[] z, int zOff, int count, int rounds) { for (int i = 0; i < count; ++i) @@ -734,12 +835,12 @@ class LongArray { while (--rounds >= 0) { - x = interleave32((int)x) | (interleave32((int)(x >>> 32)) << 1); + x = expand32((int)x) | (expand32((int)(x >>> 32)) << 1); } return x; } - private static long interleave32(int n) + private static long expand32(int n) { int r00 = INTERLEAVE_TABLE[n & 0xFF] | INTERLEAVE_TABLE[(n >>> 8) & 0xFF] << 16; int r32 = INTERLEAVE_TABLE[(n >>> 16) & 0xFF] | INTERLEAVE_TABLE[(n >>> 24) & 0xFF] << 16; |