Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/cr-marcstevens/sha1collisiondetection.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcr-marcstevens <github@marc-stevens.nl>2017-05-10 10:59:59 +0300
committercr-marcstevens <github@marc-stevens.nl>2017-05-10 11:05:21 +0300
commite36117e1a4f4c1680e3a9ef14b1e244a3c2cd79d (patch)
treea38270f38400d46f1734bbed54bdd672cd60bb11
parent39e34e8fc94c45385f47d5cab77cdd2eb1c40118 (diff)
src/simd_table_gen.c: Completed generation of tables
Makefile: automatically generate SIMD tables when SIMD is supported
-rw-r--r--Makefile15
-rw-r--r--src/simd_table_gen.c212
2 files changed, 198 insertions, 29 deletions
diff --git a/Makefile b/Makefile
index 5a719c0..16d02a0 100644
--- a/Makefile
+++ b/Makefile
@@ -32,7 +32,7 @@ endif
LIBCOMPAT=1:0:0
-SIMD_MAX_DVS=32
+SIMD_MAX_DVS?=32
PREFIX ?= /usr/local
BINDIR=$(PREFIX)/bin
@@ -150,13 +150,16 @@ ifeq (,$(NOSIMD))
@if $(MAKE) SIMDTESTFLAGS="$(NEON128FLAGS) -DTEST_NEON128 -DSHA1DC_HAVE_NEON128" simd_test >/dev/null; then $(MAKE) SIMD=NEON128 enablesimd ; else $(MAKE) SIMD=NEON128 disablesimd; fi
endif
@if [ `grep "=1" Makefile.config | wc -l` -ne 0 ]; then \
- echo "HAVE_SIMD=1" >> Makefile.config; \
- echo "#ifndef SHA1DC_HAVE_SIMD\n#define SHA1DC_HAVE_SIMD\n#endif\n" >> lib/simd/config.h; \
+ (echo "HAVE_SIMD=1" >> Makefile.config); \
+ (echo "#ifndef SHA1DC_HAVE_SIMD\n#define SHA1DC_HAVE_SIMD\n#endif\n" >> lib/simd/config.h); \
+ cat Makefile.config; \
+ echo "\nGenerating SIMD tables: lib/simd/dvs_simd.c lib/simd/dvs_simd.h..."; \
+ ($(MAKE) gen_simd_tables | grep "finalpadding" -A20 | cat) || (echo "FAILED !"); \
else \
- echo "HAVE_SIMD=0" >> Makefile.config; \
- echo "#ifdef SHA1DC_HAVE_SIMD\n#undef SHA1DC_HAVE_SIMD\n#endif\n" >> lib/simd/config.h; \
+ (echo "HAVE_SIMD=0" >> Makefile.config); \
+ (echo "#ifdef SHA1DC_HAVE_SIMD\n#undef SHA1DC_HAVE_SIMD\n#endif\n" >> lib/simd/config.h); \
+ cat Makefile.config; \
fi
- @cat Makefile.config
.PHONY: enablesimd disablesimd
enablesimd:
diff --git a/src/simd_table_gen.c b/src/simd_table_gen.c
index 1c73cbc..1b63667 100644
--- a/src/simd_table_gen.c
+++ b/src/simd_table_gen.c
@@ -5,10 +5,25 @@
* https://opensource.org/licenses/MIT
***/
-#define MAX_SIMD_EXPONENT (4) /* max SIMD width 2^4=16 in words */
+#include "../lib/simd/config.h"
+
+/* TODO: set SIMD_MAX_WORD_ALIGNMENT and MAX_SIMD_EXPONENT in ../lib/simd/config.h depending on platform and features */
+
+/* #define MAX_SIMD_EXPONENT (4) // max SIMD width 2^4=16 in words */
+#if defined(SHA1DC_HAVE_AVX512)
+#define MAX_SIMD_EXPONENT 4
+#elif defined(SHA1DC_HAVE_AVX256)
+#define MAX_SIMD_EXPONENT 3
+#elif (defined(SHA1DC_HAVE_SSE128) || defined(SHA1DC_HAVE_NEON128))
+#define MAX_SIMD_EXPONENT 2
+#elif defined(SHA1DC_HAVE_MMX64)
+#define MAX_SIMD_EXPONENT 1
+#else
+#define MAX_SIMD_EXPONENT 0
+#endif
#define SIMD_MAX_WORD_ALIGNMENT (4) /* max alignment required is 4 words (even for 16 word vectors) */
-#define SIMD_MAX_CASE_PADDING (0) /* max padding between cases in words to try to improve alignment */
+#define SIMD_MAX_CASE_PADDING (SIMD_MAX_WORD_ALIGNMENT-1) /* max padding between cases in words to try to improve alignment */
#include <stdio.h>
#include <stdlib.h>
@@ -16,15 +31,22 @@
typedef struct
{
- int dvType;
- int dvK;
- int dvB;
+ int dvType;
+ int dvK;
+ int dvB;
int ok58;
int ok65;
uint32_t dv[80];
uint32_t dm[80];
} DV_info_t;
+typedef struct
+{
+ int off1, len1, pad1, off2, len2, pad2, finalpad;
+ int simd_off2[MAX_SIMD_EXPONENT+1];
+ int first58;
+} DV_order_info_t;
+
static uint32_t rotate_left(uint32_t x, unsigned n) { return (x<<n)|(x>>(32-n)); }
static uint32_t rotate_right(uint32_t x, unsigned n) { return (x>>n)|(x<<(32-n)); }
@@ -70,16 +92,28 @@ int parse_error(char* str)
return 1;
}
+DV_order_info_t DV_order_info;
+
int eval_align(int off1, int len1, int off2, int len2)
{
int i, j, weight = 0, maxalign;
- int newoff;
+ int newoff, pad2;
+
+ DV_order_info.off1 = off1;
+ DV_order_info.len1 = len1;
+ DV_order_info.pad1 = off2 - (off1 + len1);
+ DV_order_info.off2 = off2;
+ DV_order_info.len2 = len2;
+ DV_order_info.pad2 = 0;
+ DV_order_info.simd_off2[0] = off2;
+
for (j = 1; j <= MAX_SIMD_EXPONENT; ++j)
{
maxalign = (1<<j);
if (maxalign > SIMD_MAX_WORD_ALIGNMENT)
maxalign = SIMD_MAX_WORD_ALIGNMENT;
+ /* count how many function calls are needed for this SIMD size */
newoff = off1 & ~(maxalign-1);
for (i = newoff; i < off1 + len1; i += 1<<j)
++weight;
@@ -87,7 +121,28 @@ int eval_align(int off1, int len1, int off2, int len2)
newoff = off2 & ~(maxalign-1);
for (i = newoff; i < off2 + len2; i += 1<<j)
++weight;
+
+ /* compute offset for second case and extra padding required */
+ pad2 = i - (off2 + len2);
+ while (pad2 >= maxalign && newoff >= maxalign)
+ {
+ pad2 -= maxalign;
+ newoff -= maxalign;
+ }
+
+ DV_order_info.simd_off2[j] = newoff;
+ /* for this SIMD size we read pad2 words after the last DV, use final padding to ensure we read in allocated memory */
+ if (pad2 > DV_order_info.finalpad)
+ DV_order_info.finalpad = pad2;
+ /* but except for the last row, we can wrap-around, so we can remove padding in multiples of maxalign */
+ pad2 %= maxalign;
+ if (pad2 > DV_order_info.pad2)
+ DV_order_info.pad2 = pad2;
+
}
+ /* remove regular row padding from final table padding */
+ DV_order_info.finalpad -= DV_order_info.pad2;
+
return weight;
}
@@ -96,8 +151,20 @@ int generate_code(DV_info_t* DVS, int nrdvs)
int i,j;
int totok58 = 0, totok65 = 0;
int overlap, bestoverlap, bestweight, cnt58, cnt65, weight;
- int best58first = 0, bestpadding = 0, endpadding;
+ int best58first = 0, bestpadding = 0, nrcols;
FILE* fd;
+ DV_info_t* ordered_DVS[256];
+ DV_info_t pad_DV;
+
+ pad_DV.dvType = 0;
+ pad_DV.dvK = 0;
+ pad_DV.dvB = 0;
+ for (i = 0; i < 80; ++i)
+ pad_DV.dm[i] = 0;
+ pad_DV.ok58 = 0;
+ pad_DV.ok65 = 0;
+ for (i=0; i < 256; ++i)
+ ordered_DVS[i] = & pad_DV;
/* Compute overlap */
for (i = 0; i < nrdvs; ++i)
@@ -153,7 +220,7 @@ int generate_code(DV_info_t* DVS, int nrdvs)
bestweight = 1<<20;
best58first = 1;
- bestpadding = 0;
+ bestpadding = 0;
for (i = 0; i < SIMD_MAX_WORD_ALIGNMENT && i <= SIMD_MAX_CASE_PADDING; ++i)
{
weight = eval_align(0, cnt58, cnt58+i, cnt65);
@@ -174,38 +241,97 @@ int generate_code(DV_info_t* DVS, int nrdvs)
bestweight = weight;
}
}
- /* TODO: compute better endpadding based on MAX_SIMD_WORD_ALIGNMENT instead of 1<<MAX_SIMD_EXPONENT */
- endpadding = ((1<<20) - (cnt58 + cnt65 + bestpadding)) & ((1<<MAX_SIMD_EXPONENT)-1);
+
if (best58first)
{
- printf("Using case58(%i) padding(%i) case65(%i) padding(%i)\n", cnt58, bestpadding, cnt65, endpadding);
+ DV_order_info.first58 = 1;
+ /* recompute optimal padding for this case */
+ eval_align(0, cnt58, cnt58 + bestpadding, cnt65);
+ nrcols = cnt58 + cnt65 + DV_order_info.pad1 + DV_order_info.pad2;
+ printf("Using [case58(%i) padding(%i) case65(%i) padding(%i)]*80 + finalpadding(%i)\n", cnt58, bestpadding, cnt65, DV_order_info.pad2, DV_order_info.finalpad);
+ for (i=0,j=0; i < nrdvs; ++i)
+ if (DVS[i].ok58)
+ ordered_DVS[j++] = DVS+i;
+ j += DV_order_info.pad1;
+ for (i=0; i < nrdvs; ++i)
+ if (DVS[i].ok65)
+ ordered_DVS[j++] = DVS+i;
+ j += DV_order_info.pad2;
+ if (j != nrcols)
+ parse_error("j != nrdvs");
} else {
- printf("Using case65(%i) padding(%i) case58(%i) padding(%i)\n", cnt65, bestpadding, cnt58, endpadding);
+ DV_order_info.first58 = 0;
+ /* recompute optimal padding for this case */
+ eval_align(0, cnt65, cnt65 + bestpadding, cnt58);
+ nrcols = cnt58 + cnt65 + DV_order_info.pad1 + DV_order_info.pad2;
+ printf("Using [case65(%i) padding(%i) case58(%i) padding(%i)]*80 + finalpadding(%i)\n", cnt65, bestpadding, cnt58, DV_order_info.pad2, DV_order_info.finalpad);
+ for (i=0,j=0; i < nrdvs; ++i)
+ if (DVS[i].ok65)
+ ordered_DVS[j++] = DVS+i;
+ j += DV_order_info.pad1;
+ for (i=0; i < nrdvs; ++i)
+ if (DVS[i].ok58)
+ ordered_DVS[j++] = DVS+i;
+ j += DV_order_info.pad2;
+ if (j != nrcols)
+ parse_error("j != nrdvs");
+ }
+ for (j = 1; j <= MAX_SIMD_EXPONENT; ++j)
+ {
+ printf("SIMD %4i: off1=0 off2=%i\n", (32<<j), DV_order_info.simd_off2[j]);
}
/* Output code */
fd = fopen("lib/simd/dvs_simd.h", "w");
if (fd == NULL)
parse_error("Cannot open output file to write");
- fprintf(fd,
+ fprintf(fd,
"/***\n"
"* Copyright 2017 Marc Stevens <marc@marc-stevens.nl>, Dan Shumow <danshu@microsoft.com>\n"
"* Distributed under the MIT Software License.\n"
"* See accompanying file LICENSE.txt or copy at\n"
"* https://opensource.org/licenses/MIT\n"
"***/\n\n"
+ "#ifndef SHA1DC_DVS_SIMD_HEADER\n"
+ "#define SHA1DC_DVS_SIMD_HEADER\n\n"
"#include <stdlib.h>\n"
- "#include <stdint.h>\n"
- "#define SHA1DC_SIMD_NRDVS (%i)\n"
- "#define SHA1DC_SIMD_TABLESIZE (%i)\n"
- "extern uint32_t sha1_dm_interleaved[80][SHA1DC_SIMD_TABLESIZE];\n"
- , nrdvs, nrdvs+16);
-
+ "#include <stdint.h>\n\n"
+ "#define SHA1DC_SIMD_NRDVS (%i)\n" /*nrdvs*/
+ "#define SHA1DC_SIMD_TABLESIZE (%i)\n" /*nrdvs+pad1+pad2*/
+ "#define SHA1DC_SIMD_FINALPADDING (%i)\n" /*finalpad*/
+ ,
+ nrdvs, nrdvs+DV_order_info.pad1+DV_order_info.pad2, DV_order_info.finalpad
+ );
+ for (j = 1; j <= MAX_SIMD_EXPONENT; ++j)
+ {
+ fprintf(fd,
+ "#define SHA1DC_SIMD_%i_OFFSET58 (%i)\n" /* off1 / off2 */
+ "#define SHA1DC_SIMD_%i_OFFSET65 (%i)\n" /* off2 / off1 */
+ ,
+ (1<<j), DV_order_info.first58 ? 0 : DV_order_info.simd_off2[j],
+ (1<<j), DV_order_info.first58 ? DV_order_info.simd_off2[j] : 0
+ );
+ }
+ fprintf(fd,
+ "\ntypedef struct {\n"
+ " uint32_t dm[80][SHA1DC_SIMD_TABLESIZE];\n"
+ " uint32_t mask58[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n"
+ " uint32_t mask65[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n"
+ );
+ fprintf(fd,
+ " int dvType[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n"
+ " int dvK[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n"
+ " int dvB[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n"
+ " } sha1_dvs_interleaved_t;\n"
+ "extern const sha1_dvs_interleaved_t sha1_dvs_interleaved;\n\n"
+ "#endif /* SHA1DC_DVS_SIMD_HEADER */\n"
+ );
fclose(fd);
+
fd = fopen("lib/simd/dvs_simd.c", "w");
if (fd == NULL)
parse_error("Cannot open output file to write");
- fprintf(fd,
+ fprintf(fd,
"/***\n"
"* Copyright 2017 Marc Stevens <marc@marc-stevens.nl>, Dan Shumow <danshu@microsoft.com>\n"
"* Distributed under the MIT Software License.\n"
@@ -214,10 +340,50 @@ int generate_code(DV_info_t* DVS, int nrdvs)
"***/\n\n"
"#include \"dvs_simd.h\"\n"
"#include <stdlib.h>\n"
- "#include <stdint.h>\n"
+ "#include <stdint.h>\n\n"
+ "const sha1_dvs_interleaved_t sha1_dvs_interleaved = {\n"
+ " {\n"
);
+ for (i = 0; i < 80; ++i)
+ {
+ fprintf(fd, " {");
+ for (j = 0; j < nrcols; ++j)
+ if (ordered_DVS[j]->ok58+ordered_DVS[j]->ok65==0)
+ fprintf(fd, ", 0");
+ else
+ fprintf(fd, "%s 0x%08x", j==0?"":",", ordered_DVS[j]->dm[i]);
+ fprintf(fd, " }%s\n", i<79 ? "," : "");
+ }
+ fprintf(fd, " },\n");
+
+ fprintf(fd, " {");
+ for (j=0; j < nrcols+DV_order_info.finalpad; ++j)
+ fprintf(fd, "%s %s", j==0?"":",", ordered_DVS[j]->ok58 ? "0xFFFFFFFF" : (ordered_DVS[j]->ok65?"0x00000000":"0"));
+ fprintf(fd, " },\n");
+
+ fprintf(fd, " {");
+ for (j=0; j < nrcols+DV_order_info.finalpad; ++j)
+ fprintf(fd, "%s %s", j==0?"":",", ordered_DVS[j]->ok65 ? "0xFFFFFFFF" : (ordered_DVS[j]->ok58?"0x00000000":"0"));
+ fprintf(fd, " },\n");
+
+ fprintf(fd, " {");
+ for (j=0; j < nrcols+DV_order_info.finalpad; ++j)
+ fprintf(fd, "%s %2i", j==0?"":",", ordered_DVS[j]->dvType);
+ fprintf(fd, " },\n");
+
+ fprintf(fd, " {");
+ for (j=0; j < nrcols+DV_order_info.finalpad; ++j)
+ fprintf(fd, "%s %2i", j==0?"":",", ordered_DVS[j]->dvK);
+ fprintf(fd, " },\n");
+
+ fprintf(fd, " {");
+ for (j=0; j < nrcols+DV_order_info.finalpad; ++j)
+ fprintf(fd, "%s %2i", j==0?"":",", ordered_DVS[j]->dvB);
+ fprintf(fd, " }\n");
+ fprintf(fd, "};\n");
+
fclose(fd);
-
+
return 0;
}
@@ -282,7 +448,7 @@ int process_dv_list(char* filename, int maxDVs)
if (DV != DVS+nrdvs)
parse_error("huh?!?");
if (nrdvs >= maxDVs)
- break;
+ break;
}
fclose(fd);
return generate_code(DVS, nrdvs);