diff options
author | Dan Shumow <shumow@gmail.com> | 2017-05-10 12:19:50 +0300 |
---|---|---|
committer | Dan Shumow <shumow@gmail.com> | 2017-05-10 12:19:50 +0300 |
commit | 0fcea2fe105fd91d8ada3e7d2366708b05a60190 (patch) | |
tree | 25db59e49f6390284cfc3595981325708299d739 | |
parent | 029bc14f8a331599bab85b67f17a13086908a548 (diff) | |
parent | 6d2c286ea7e348e8e2ca26c45045c35dcd7b2319 (diff) |
Merge branch 'simd' of https://github.com/cr-marcstevens/sha1collisiondetection into simd
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 37 | ||||
-rw-r--r-- | lib/simd/sha1_simd.c | 71 | ||||
-rw-r--r-- | lib/simd/sha1_simd.h | 16 | ||||
-rw-r--r-- | lib/simd/sha1_simd_avx256.c | 22 | ||||
-rw-r--r-- | lib/simd/sha1_simd_avx512.c | 13 | ||||
-rw-r--r-- | lib/simd/sha1_simd_mmx64.c | 15 | ||||
-rw-r--r-- | lib/simd/sha1_simd_neon128.c | 13 | ||||
-rw-r--r-- | lib/simd/sha1_simd_sse128.c | 13 | ||||
-rw-r--r-- | src/DV_data.txt | 32 | ||||
-rw-r--r-- | src/simd_table_gen.c | 468 |
11 files changed, 673 insertions, 29 deletions
@@ -31,6 +31,8 @@ #sha1 collision detection specific config files. Makefile.config lib/simd/config.h +lib/simd/dvs_simd.c +lib/simd/dvs_simd.h # sha1 collision detection specific build directories bin/ @@ -14,8 +14,10 @@ endif # if Makefile.config does not exist, create empty lib/simd/config.h and warn ifeq (,$(wildcard Makefile.config)) $(shell echo > lib/simd/config.h) -ifneq (config,$(MAKECMDGOALS)) -$(error Run 'make config' or 'make NOSIMD=1 config' first) +$(shell echo > lib/simd/dvs_simd.h) +$(shell echo > lib/simd/dvs_simd.c) +ifeq (,$(filter config clean,$(MAKECMDGOALS))) +$(error Run 'make [NOSIMD=1] [SIMD_MAX_DVS=32] config' first) endif endif @@ -32,6 +34,8 @@ endif LIBCOMPAT=1:0:0 +SIMD_MAX_DVS?=32 + PREFIX ?= /usr/local BINDIR=$(PREFIX)/bin LIBDIR=$(PREFIX)/lib @@ -92,7 +96,7 @@ SRC_OBJ_DIR=obj_src H_DEP:=$(shell find . -type f -name "*.h") FS_LIB=$(wildcard $(LIB_DIR)/*.c) -FS_SRC=$(wildcard $(SRC_DIR)/*.c) +FS_SRC=$(wildcard $(SRC_DIR)/main.c) FS_SIMD_LIB= ifeq ($(HAVE_SIMD),1) @@ -140,6 +144,8 @@ all: library tools config: @echo > lib/simd/config.h @echo > Makefile.config + @echo > lib/simd/dvs_simd.h + @echo > lib/simd/dvs_simd.c ifeq (,$(NOSIMD)) @if $(MAKE) SIMDTESTFLAGS="$(MMX64FLAGS) -DTEST_MMX64 -DSHA1DC_HAVE_MMX64" simd_test >/dev/null; then $(MAKE) SIMD=MMX64 enablesimd ; else $(MAKE) SIMD=MMX64 disablesimd; fi @if $(MAKE) SIMDTESTFLAGS="$(SSE128FLAGS) -DTEST_SSE128 -DSHA1DC_HAVE_SSE128" simd_test >/dev/null; then $(MAKE) SIMD=SSE128 enablesimd ; else $(MAKE) SIMD=SSE128 disablesimd; fi @@ -148,13 +154,16 @@ ifeq (,$(NOSIMD)) @if $(MAKE) SIMDTESTFLAGS="$(NEON128FLAGS) -DTEST_NEON128 -DSHA1DC_HAVE_NEON128" simd_test >/dev/null; then $(MAKE) SIMD=NEON128 enablesimd ; else $(MAKE) SIMD=NEON128 disablesimd; fi endif @if [ `grep "=1" Makefile.config | wc -l` -ne 0 ]; then \ - echo "HAVE_SIMD=1" >> Makefile.config; \ - echo "#ifndef SHA1DC_HAVE_SIMD\n#define SHA1DC_HAVE_SIMD\n#endif\n" >> lib/simd/config.h; \ + (echo "HAVE_SIMD=1" >> Makefile.config); \ + (echo "#ifndef SHA1DC_HAVE_SIMD\n#define SHA1DC_HAVE_SIMD\n#endif\n" >> lib/simd/config.h); \ + cat Makefile.config; \ + echo "\nGenerating SIMD tables using max $(SIMD_MAX_DVS) DVs: lib/simd/dvs_simd.c lib/simd/dvs_simd.h..."; \ + ($(MAKE) gen_simd_tables | grep "finalpadding" -A20 | cat) || (echo "FAILED !"); \ else \ - echo "HAVE_SIMD=0" >> Makefile.config; \ - echo "#ifdef SHA1DC_HAVE_SIMD\n#undef SHA1DC_HAVE_SIMD\n#endif\n" >> lib/simd/config.h; \ + (echo "HAVE_SIMD=0" >> Makefile.config); \ + (echo "#ifdef SHA1DC_HAVE_SIMD\n#undef SHA1DC_HAVE_SIMD\n#endif\n" >> lib/simd/config.h); \ + cat Makefile.config; \ fi - @cat Makefile.config .PHONY: enablesimd disablesimd enablesimd: @@ -183,14 +192,13 @@ uninstall: .PHONY: clean clean: - -rm -rf bin Makefile.config lib/simd/config.h dep_lib obj_lib dep_src obj_src + -rm -rf bin Makefile.config lib/simd/config.h lib/simd/dvs_simd.* dep_lib obj_lib dep_src obj_src -find . -type f -name '*.a' -print -delete -find . -type f -name '*.d' -print -delete -find . -type f -name '*.o' -print -delete -find . -type f -name '*.la' -print -delete -find . -type f -name '*.lo' -print -delete -find . -type f -name '*.so' -print -delete - -find . -type d -name '.libs' -print | xargs rm -rv .PHONY: test test: tools @@ -235,6 +243,15 @@ bin/sha1dcsum_partialcoll: $(FS_OBJ_SRC) bin/libsha1detectcoll.$(LIB_EXT) $(LD) $(LDFLAGS) $(FS_OBJ_SRC) -Lbin -lsha1detectcoll -o bin/sha1dcsum_partialcoll +bin/simd_table_gen: $(SRC_OBJ_DIR)/simd_table_gen.lo + $(MKDIR) $(shell dirname $@) + $(LD) $(LDFLAGS) $(SRC_OBJ_DIR)/simd_table_gen.lo -Lbin -o bin/simd_table_gen + +.PHONY: gen_simd_tables +gen_simd_tables: bin/simd_table_gen + bin/simd_table_gen src/DV_data.txt $(SIMD_MAX_DVS) + + $(SRC_DEP_DIR)/%.d: $(SRC_DIR)/%.c $(MKDIR) $(shell dirname $@) $(CC_DEP) $(CFLAGS) -M -MF $@ $< diff --git a/lib/simd/sha1_simd.c b/lib/simd/sha1_simd.c index cfc6f6c..a852c4f 100644 --- a/lib/simd/sha1_simd.c +++ b/lib/simd/sha1_simd.c @@ -10,8 +10,9 @@ #include "sha1.h" #include "sha1_simd.h" +#include "dvs_simd.h" - +/* #define CNT_SHA1_DVS (16) uint32_t sha1_dvs_interleaved[80][CNT_SHA1_DVS] = { @@ -104,6 +105,7 @@ size_t offset58 = 0; size_t offset65 = 0; size_t len58 = 0; size_t len65 = 0; +*/ /* volatile char should have atomic writes. */ volatile char simd_index = -1; @@ -114,7 +116,7 @@ sha1_simd_implementation_t *simd_implementation_table[SIMD_IMPLEMENTATION_CNT+1] &sha1_simd_mmx64_implementation, #endif #ifdef SHA1DC_HAVE_SSE128 - &sha1_simd_SSE128_implementation, + &sha1_simd_sse128_implementation, #endif #ifdef SHA1DC_HAVE_NEON128 &sha1_simd_neon128_implementation, @@ -128,7 +130,72 @@ sha1_simd_implementation_t *simd_implementation_table[SIMD_IMPLEMENTATION_CNT+1] NULL }; +void initialize_simd() +{ + /* TODO Put configuration code here. */ +} + +size_t get_simd_index() +{ + if ((char)-1 == simd_index) + { + initialize_simd(); + } + + return (size_t)simd_index; +} +void sha1_recompress_fast_58_simd(void* ihvin, void* ihvout, const void* me, void* state) +{ + size_t i; + + i = get_simd_index(); + + if ((i < SIMD_IMPLEMENTATION_CNT) && + (NULL != simd_implementation_table[i])) + { + simd_implementation_table[i]->sha1_recompression_fast_58(ihvin, ihvout, me, state); + } +} + +void sha1_recompress_fast_65_simd(void* ihvin, void* ihvout, const void* me, void* state) +{ + size_t i; + + i = get_simd_index(); + + if ((i < SIMD_IMPLEMENTATION_CNT) && + (NULL != simd_implementation_table[i])) + { + simd_implementation_table[i]->sha1_recompression_fast_65(ihvin, ihvout, me, state); + } +} + +void sha1_apply_message_differences_simd(const uint32_t me[80], const void* dm, void* dme) +{ + size_t i; + + i = get_simd_index(); + + if ((i < SIMD_IMPLEMENTATION_CNT) && + (NULL != simd_implementation_table[i])) + { + simd_implementation_table[i]->sha1_apply_message_differences(me, dm, dme); + } +} + +void sha1_compare_digests_simd(const SHA1_CTX* ctx, const void* ihv_full_collision, const void* ihv_reduced_round, void* collision_detected) +{ + size_t i; + + i = get_simd_index(); + + if ((i < SIMD_IMPLEMENTATION_CNT) && + (NULL != simd_implementation_table[i])) + { + simd_implementation_table[i]->sha1_compare_digests(ctx, ihv_full_collision, ihv_reduced_round, collision_detected); + } +} #ifdef SHA1_SIMD_IMPLEMENTED_XXXXX static void sha1_process_simd(SHA1_CTX* ctx, const uint32_t block[16]) diff --git a/lib/simd/sha1_simd.h b/lib/simd/sha1_simd.h index 7ec9e87..21965a8 100644 --- a/lib/simd/sha1_simd.h +++ b/lib/simd/sha1_simd.h @@ -7,13 +7,15 @@ #ifndef SHA1DC_SHA1_SIMD_H #define SHA1DC_SHA1_SIMD_H +#include "config.h" + typedef enum { - simd_type_mmx64 = 0, - simd_type_sse128, - simd_type_neon128, - simd_type_avx256, - simd_type_avx512, - simd_type_unknown = 0xff + simd_type_mmx64 = 0, + simd_type_sse128, + simd_type_neon128, + simd_type_avx256, + simd_type_avx512, + simd_type_unknown = 0xff } simd_type; typedef void (*sha1_recompression_simd_fn)(void*, void*, const void*, const void*); @@ -37,7 +39,7 @@ typedef struct { #endif #ifdef SHA1DC_HAVE_SSE128 - #define SHA1DC_SSE128 (1)_ + #define SHA1DC_SSE128 (1) extern sha1_simd_implementation_t sha1_simd_sse128_implementation; #else #define SHA1DC_SSE128 (0) diff --git a/lib/simd/sha1_simd_avx256.c b/lib/simd/sha1_simd_avx256.c index dc23f60..91c1930 100644 --- a/lib/simd/sha1_simd_avx256.c +++ b/lib/simd/sha1_simd_avx256.c @@ -11,19 +11,31 @@ #ifdef SHA1DC_HAVE_AVX256 #include "sha1.h" +#include "sha1_simd.h" + #include "simd_avx256.h" -#define SHA1_MESSAGE_EXPANSION_SIMD sha1_message_expansion_avx256 -#define SHA1_COMPRESSION_SIMD sha1_avx256 -#define SHA1_COMPRESSION_W_SIMD sha1_W_avx256 +#define SHA1_MESSAGE_EXPANSION_SIMD sha1_message_expansion_avx256 +#define SHA1_COMPRESSION_SIMD sha1_avx256 +#define SHA1_COMPRESSION_W_SIMD sha1_W_avx256 #define SHA1_COMPRESSION_STATES_SIMD sha1_states_avx256 -#define SHA1_RECOMPRESSION_SIMD(t) sha1recompress_fast_ ## t ## _avx256 +#define SHA1_RECOMPRESSION_SIMD(t) sha1_recompress_fast_ ## t ## _avx256 #define SHA1_RECOMPRESSION_TABLE_SIMD sha1_recompression_step_avx256 #define SHA1_APPLY_MESSAGE_DIFFERENCES sha1_apply_message_differences_avx256 -#define SHA1_COMPARE_DIGESTS sha1_compre_digests_avx256 +#define SHA1_COMPARE_DIGESTS sha1_compare_digests_avx256 #include "sha1_simd.cinc" +sha1_simd_implementation_t sha1_simd_avx256_implementation = +{ + simd_type_avx256, + SIMD_VECSIZE, + (sha1_recompression_simd_fn)sha1_recompress_fast_58_avx256, + (sha1_recompression_simd_fn)sha1_recompress_fast_65_avx256, + (sha1_apply_message_differences_simd_fn)sha1_apply_message_differences_avx256, + (sha1_compare_digests_simd_fn)sha1_compare_digests_avx256 +}; + #else #pragma message "The file: sha1_simd_avx256.c is not compiled for this architecture." diff --git a/lib/simd/sha1_simd_avx512.c b/lib/simd/sha1_simd_avx512.c index 376fd50..16bbf31 100644 --- a/lib/simd/sha1_simd_avx512.c +++ b/lib/simd/sha1_simd_avx512.c @@ -17,7 +17,7 @@ #define SHA1_COMPRESSION_SIMD sha1_avx512 #define SHA1_COMPRESSION_W_SIMD sha1_W_avx512 #define SHA1_COMPRESSION_STATES_SIMD sha1_states_avx512 -#define SHA1_RECOMPRESSION_SIMD(t) sha1recompress_fast_ ## t ## _avx512 +#define SHA1_RECOMPRESSION_SIMD(t) sha1_recompress_fast_ ## t ## _avx512 #define SHA1_RECOMPRESSION_TABLE_SIMD sha1_recompression_step_avx512 #define SHA1_APPLY_MESSAGE_DIFFERENCES sha1_apply_message_differences_avx512 #define SHA1_COMPARE_DIGESTS sha1_compare_digests_avx512 @@ -25,6 +25,17 @@ #include "sha1_simd.cinc" +sha1_simd_implementation_t sha1_simd_avx512_implementation = +{ + simd_type_avx512, + SIMD_VECSIZE, + (sha1_recompression_simd_fn)sha1_recompress_fast_58_avx512, + (sha1_recompression_simd_fn)sha1_recompress_fast_65_avx512, + (sha1_apply_message_differences_simd_fn)sha1_apply_message_differences_avx512, + (sha1_compare_digests_simd_fn)sha1_compare_digests_avx512 +}; + + #else #pragma message "The file: sha1_simd_avx512.c is not compiled for this architecture." diff --git a/lib/simd/sha1_simd_mmx64.c b/lib/simd/sha1_simd_mmx64.c index 811c716..69ee396 100644 --- a/lib/simd/sha1_simd_mmx64.c +++ b/lib/simd/sha1_simd_mmx64.c @@ -9,21 +9,32 @@ #include "simd_config.h" #ifdef SHA1DC_HAVE_MMX64 - #include "sha1.h" +#include "sha1_simd.h" + #include "simd_mmx64.h" #define SHA1_MESSAGE_EXPANSION_SIMD sha1_message_expansion_mmx64 #define SHA1_COMPRESSION_SIMD sha1_mmx64 #define SHA1_COMPRESSION_W_SIMD sha1_W_mmx64 #define SHA1_COMPRESSION_STATES_SIMD sha1_states_mmx64 -#define SHA1_RECOMPRESSION_SIMD(t) sha1recompress_fast_ ## t ## _mmx64 +#define SHA1_RECOMPRESSION_SIMD(t) sha1_recompress_fast_ ## t ## _mmx64 #define SHA1_RECOMPRESSION_TABLE_SIMD sha1_recompression_step_mmx64 #define SHA1_APPLY_MESSAGE_DIFFERENCES sha1_apply_message_differences_mmx64 #define SHA1_COMPARE_DIGESTS sha1_compare_digests_mmx64 #include "sha1_simd.cinc" +sha1_simd_implementation_t sha1_simd_mmx64_implementation = +{ + simd_type_mmx64, + SIMD_VECSIZE, + (sha1_recompression_simd_fn)sha1_recompress_fast_58_mmx64, + (sha1_recompression_simd_fn)sha1_recompress_fast_65_mmx64, + (sha1_apply_message_differences_simd_fn)sha1_apply_message_differences_mmx64, + (sha1_compare_digests_simd_fn)sha1_compare_digests_mmx64 +}; + #else #pragma message "The file: sha1_simd_mmx64.c is not compiled for this architecture." diff --git a/lib/simd/sha1_simd_neon128.c b/lib/simd/sha1_simd_neon128.c index de16311..e73b21d 100644 --- a/lib/simd/sha1_simd_neon128.c +++ b/lib/simd/sha1_simd_neon128.c @@ -10,6 +10,7 @@ #include "simd_config.h" #ifdef SHA1DC_HAVE_NEON128 #include "sha1.h" +#include "sha1_simd.h" #include "simd_neon128.h" @@ -17,13 +18,23 @@ #define SHA1_COMPRESSION_SIMD sha1_neon128 #define SHA1_COMPRESSION_W_SIMD sha1_W_neon128 #define SHA1_COMPRESSION_STATES_SIMD sha1_states_neon128 -#define SHA1_RECOMPRESSION_SIMD(t) sha1recompress_fast_ ## t ## _neon128 +#define SHA1_RECOMPRESSION_SIMD(t) sha1_recompress_fast_ ## t ## _neon128 #define SHA1_RECOMPRESSION_TABLE_SIMD sha1_recompression_step_neon128 #define SHA1_APPLY_MESSAGE_DIFFERENCES sha1_apply_message_differences_neon128 #define SHA1_COMPARE_DIGESTS sha1_compare_digests_neon128 #include "sha1_simd.cinc" +sha1_simd_implementation_t sha1_simd_neon128_implementation = +{ + simd_type_neon128, + SIMD_VECSIZE, + (sha1_recompression_simd_fn)sha1_recompress_fast_58_neon128, + (sha1_recompression_simd_fn)sha1_recompress_fast_65_neon128, + (sha1_apply_message_differences_simd_fn)sha1_apply_message_differences_neon128, + (sha1_compare_digests_simd_fn)sha1_compare_digests_neon128 +}; + #else #pragma message "The file: sha1_simd_neon128.c is not compiled for this architecture." diff --git a/lib/simd/sha1_simd_sse128.c b/lib/simd/sha1_simd_sse128.c index dac6560..f714b93 100644 --- a/lib/simd/sha1_simd_sse128.c +++ b/lib/simd/sha1_simd_sse128.c @@ -10,6 +10,7 @@ #include "simd_config.h" #ifdef SHA1DC_HAVE_SSE128 #include "sha1.h" +#include "sha1_simd.h" #include "simd_sse128.h" @@ -17,13 +18,23 @@ #define SHA1_COMPRESSION_SIMD sha1_sse128 #define SHA1_COMPRESSION_W_SIMD sha1_W_sse128 #define SHA1_COMPRESSION_STATES_SIMD sha1_states_sse128 -#define SHA1_RECOMPRESSION_SIMD(t) sha1recompress_fast_ ## t ## _sse128 +#define SHA1_RECOMPRESSION_SIMD(t) sha1_recompress_fast_ ## t ## _sse128 #define SHA1_RECOMPRESSION_TABLE_SIMD sha1_recompression_step_sse128 #define SHA1_APPLY_MESSAGE_DIFFERENCES sha1_apply_message_differences_sse128 #define SHA1_COMPARE_DIGESTS sha1_compare_digests_sse128 #include "sha1_simd.cinc" +sha1_simd_implementation_t sha1_simd_sse128_implementation = +{ + simd_type_sse128, + SIMD_VECSIZE, + (sha1_recompression_simd_fn)sha1_recompress_fast_58_sse128, + (sha1_recompression_simd_fn)sha1_recompress_fast_65_sse128, + (sha1_apply_message_differences_simd_fn)sha1_apply_message_differences_sse128, + (sha1_compare_digests_simd_fn)sha1_compare_digests_sse128 +}; + #else #pragma message "The file: sha1_simd_sse128.c is not compiled for this architecture." diff --git a/src/DV_data.txt b/src/DV_data.txt new file mode 100644 index 0000000..1aa49a9 --- /dev/null +++ b/src/DV_data.txt @@ -0,0 +1,32 @@ +I_48_0 : compl=2^64.5138 (prob=2^-71.5138) +II_46_0 : compl=2^65.0808 (prob=2^-72.0808) +I_50_0 : compl=2^65.2793 (prob=2^-72.2793) +I_49_0 : compl=2^65.4313 (prob=2^-72.4313) +II_51_0 : compl=2^65.7008 (prob=2^-72.7008) +II_52_0 : compl=2^65.7008 (prob=2^-72.7008) +I_51_0 : compl=2^66.4313 (prob=2^-73.4313) +II_50_0 : compl=2^66.8464 (prob=2^-73.8464) +II_53_0 : compl=2^67.1862 (prob=2^-74.1862) +I_48_2 : compl=2^67.1887 (prob=2^-74.1887) +II_54_0 : compl=2^67.5081 (prob=2^-74.5081) +I_49_2 : compl=2^67.5424 (prob=2^-74.5424) +II_56_0 : compl=2^68.0342 (prob=2^-75.0342) +II_45_0 : compl=2^68.2508 (prob=2^-75.2508) +I_47_0 : compl=2^68.2793 (prob=2^-75.2793) +I_50_2 : compl=2^68.3904 (prob=2^-75.3904) +II_49_0 : compl=2^68.4075 (prob=2^-75.4075) +II_55_0 : compl=2^68.8707 (prob=2^-75.8707) +II_48_0 : compl=2^68.8788 (prob=2^-75.8788) +II_47_0 : compl=2^68.9288 (prob=2^-75.9288) +I_46_0 : compl=2^69.0163 (prob=2^-76.0163) +I_52_0 : compl=2^69.2938 (prob=2^-76.2938) +II_50_2 : compl=2^69.8967 (prob=2^-76.8967) +I_51_2 : compl=2^70.32 (prob=2^-77.32) +I_47_2 : compl=2^70.3904 (prob=2^-77.3904) +I_45_0 : compl=2^70.4858 (prob=2^-77.4858) +I_44_0 : compl=2^70.5424 (prob=2^-77.5424) +II_51_2 : compl=2^70.5488 (prob=2^-77.5488) +I_43_0 : compl=2^70.7939 (prob=2^-77.7939) +II_46_2 : compl=2^71.0834 (prob=2^-78.0834) +II_49_2 : compl=2^71.4638 (prob=2^-78.4638) +I_46_2 : compl=2^71.9257 (prob=2^-78.9257) diff --git a/src/simd_table_gen.c b/src/simd_table_gen.c new file mode 100644 index 0000000..44878f4 --- /dev/null +++ b/src/simd_table_gen.c @@ -0,0 +1,468 @@ +/*** +* Copyright 2017 Marc Stevens <marc@marc-stevens.nl>, Dan Shumow <danshu@microsoft.com> +* Distributed under the MIT Software License. +* See accompanying file LICENSE.txt or copy at +* https://opensource.org/licenses/MIT +***/ + +#include "../lib/simd/config.h" + +/* TODO: set SIMD_MAX_WORD_ALIGNMENT and MAX_SIMD_EXPONENT in ../lib/simd/config.h depending on platform and features */ + +/* #define MAX_SIMD_EXPONENT (4) // max SIMD width 2^4=16 in words */ +#if defined(SHA1DC_HAVE_AVX512) +#define MAX_SIMD_EXPONENT 4 +#elif defined(SHA1DC_HAVE_AVX256) +#define MAX_SIMD_EXPONENT 3 +#elif (defined(SHA1DC_HAVE_SSE128) || defined(SHA1DC_HAVE_NEON128)) +#define MAX_SIMD_EXPONENT 2 +#elif defined(SHA1DC_HAVE_MMX64) +#define MAX_SIMD_EXPONENT 1 +#else +#define MAX_SIMD_EXPONENT 0 +#endif + +#define SIMD_MAX_WORD_ALIGNMENT (4) /* max alignment required is 4 words (even for 16 word vectors) */ +#define SIMD_MAX_CASE_PADDING (SIMD_MAX_WORD_ALIGNMENT-1) /* max padding between cases in words to try to improve alignment */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> + +typedef struct +{ + int dvType; + int dvK; + int dvB; + int ok58; + int ok65; + uint32_t dv[80]; + uint32_t dm[80]; +} DV_info_t; + +typedef struct +{ + int off1, len1, pad1, off2, len2, pad2, finalpad; + int simd_off2[MAX_SIMD_EXPONENT+1]; + int first58; +} DV_order_info_t; + +static uint32_t rotate_left(uint32_t x, unsigned n) { return (x<<n)|(x>>(32-n)); } +static uint32_t rotate_right(uint32_t x, unsigned n) { return (x>>n)|(x<<(32-n)); } + +void expand_me(uint32_t v[80], int offset) +{ + int i; + for (i = offset - 1; i >= 0; --i) + v[i] = rotate_right(v[i+16], 1) ^ v[i+13] ^ v[i+8] ^ v[i+2]; + for (i = offset + 16; i < 80; ++i) + v[i] = rotate_left( v[i-3] ^ v[i-8] ^ v[i-14] ^ v[i-16], 1); +} + +void init_DV(DV_info_t* DV) +{ + int i; + int K = DV->dvK, B = DV->dvB, T = DV->dvType; + uint32_t* dv = DV->dv; + uint32_t* dm = DV->dm; + + /* initialize 16 words of dv */ + for (i = 0; i < 80; ++i) + DV->dv[i] = 0; + DV->dv[K+15] = 1 << B; + if (T == 2) + { + DV->dv[K+1] = 1 << ((31+B) & 31); + DV->dv[K+3] = 1 << ((31+B) & 31); + } + /* expand to entire dv */ + expand_me(dv, K); + + /* initialize 16 words of dm */ + for (i = 16; i < 32; ++i) + dm[i] = dv[i-0] ^ rotate_left(dv[i-1],5) ^ dv[i-2] + ^ rotate_left(dv[i-3],30) ^ rotate_left(dv[i-4],30) ^ rotate_left(dv[i-5],30); + /* expand to entire dm */ + expand_me(dm, 16); +} + +int parse_error(char* str) +{ + fprintf(stderr, "Parse error: %s", str); + return 1; +} + +DV_order_info_t DV_order_info; + +int eval_align(int off1, int len1, int off2, int len2) +{ + int i, j, weight = 0, maxalign; + int newoff, pad2; + + DV_order_info.off1 = off1; + DV_order_info.len1 = len1; + DV_order_info.pad1 = off2 - (off1 + len1); + DV_order_info.off2 = off2; + DV_order_info.len2 = len2; + DV_order_info.pad2 = 0; + DV_order_info.simd_off2[0] = off2; + + for (j = 1; j <= MAX_SIMD_EXPONENT; ++j) + { + maxalign = (1<<j); + if (maxalign > SIMD_MAX_WORD_ALIGNMENT) + maxalign = SIMD_MAX_WORD_ALIGNMENT; + + /* count how many function calls are needed for this SIMD size */ + newoff = off1 & ~(maxalign-1); + for (i = newoff; i < off1 + len1; i += 1<<j) + ++weight; + + newoff = off2 & ~(maxalign-1); + for (i = newoff; i < off2 + len2; i += 1<<j) + ++weight; + + /* compute offset for second case and extra padding required */ + pad2 = i - (off2 + len2); + while (pad2 >= maxalign && newoff >= maxalign) + { + pad2 -= maxalign; + newoff -= maxalign; + } + + DV_order_info.simd_off2[j] = newoff; + /* for this SIMD size we read pad2 words after the last DV, use final padding to ensure we read in allocated memory */ + if (pad2 > DV_order_info.finalpad) + DV_order_info.finalpad = pad2; + /* but except for the last row, we can wrap-around, so we can remove padding in multiples of maxalign */ + pad2 %= maxalign; + if (pad2 > DV_order_info.pad2) + DV_order_info.pad2 = pad2; + + } + /* remove regular row padding from final table padding */ + DV_order_info.finalpad -= DV_order_info.pad2; + + return weight; +} + +int generate_code(DV_info_t* DVS, int nrdvs) +{ + int i,j; + int totok58 = 0, totok65 = 0; + int overlap, bestoverlap, bestweight, cnt58, cnt65, weight; + int best58first = 0, bestpadding = 0, nrcols; + FILE* fd; + DV_info_t* ordered_DVS[256]; + DV_info_t pad_DV; + + pad_DV.dvType = 0; + pad_DV.dvK = 0; + pad_DV.dvB = 0; + for (i = 0; i < 80; ++i) + pad_DV.dm[i] = 0; + pad_DV.ok58 = 0; + pad_DV.ok65 = 0; + for (i=0; i < 256; ++i) + ordered_DVS[i] = & pad_DV; + + /* Compute overlap */ + for (i = 0; i < nrdvs; ++i) + { + totok58 += DVS[i].ok58; + totok65 += DVS[i].ok65; + } + printf("totDVs=%i totok58=%i totok65=%i\n", nrdvs, totok58, totok65); + + overlap = totok58 + totok65 - nrdvs; + if (overlap < 0) parse_error("overlap negative"); + + /* Analyze best division */ + bestoverlap = 0; + bestweight = 1<<20; + printf("\nAnalyzing overlap:\n"); + for (i = 0; i <= overlap; ++i) + { + cnt58 = totok58 - overlap + i; + cnt65 = totok65 - i; + weight = 0; + for (j = 1; j <= MAX_SIMD_EXPONENT; ++j) + weight += ((cnt58+(1<<j)-1)>>j) + ((cnt65+(1<<j)-1)>>j); + printf("cnt58=%i cnt65=%i weight=%i\n", cnt58, cnt65, weight); + if (weight < bestweight) + { + bestoverlap = i; + bestweight = weight; + } + } + cnt58 = totok58 - overlap + bestoverlap; + cnt65 = totok65 - bestoverlap; + + printf("Using cnt58=%i cnt65=%i weight=%i\n", cnt58, cnt65, bestweight); + + /* Apply best division */ + j = bestoverlap; + for (i = 0; i < nrdvs; ++i) + { + if (DVS[i].ok58 + DVS[i].ok65 == 2) + { + if (j > 0) + { + DVS[i].ok65 = 0; + --j; + } + else + { + DVS[i].ok58 = 0; + } + } + } + + bestweight = 1<<20; + best58first = 1; + bestpadding = 0; + for (i = 0; i < SIMD_MAX_WORD_ALIGNMENT && i <= SIMD_MAX_CASE_PADDING; ++i) + { + weight = eval_align(0, cnt58, cnt58+i, cnt65); + printf("Eval align %i %i %i = %i\n", cnt58, i, cnt65, weight); + if (weight < bestweight) + { + best58first = 1; + bestpadding = i; + bestweight = weight; + } + + weight = eval_align(0, cnt65, cnt65+i, cnt58); + printf("Eval align %i %i %i = %i\n", cnt65, i, cnt58, weight); + if (weight < bestweight) + { + best58first = 0; + bestpadding = i; + bestweight = weight; + } + } + + if (best58first) + { + DV_order_info.first58 = 1; + /* recompute optimal padding for this case */ + eval_align(0, cnt58, cnt58 + bestpadding, cnt65); + nrcols = cnt58 + cnt65 + DV_order_info.pad1 + DV_order_info.pad2; + printf("Using table structure: [case58(#=%i) padding(#=%i) case65(#=%i) padding(#=%i)]*80 + finalpadding(#=%i)\n", cnt58, bestpadding, cnt65, DV_order_info.pad2, DV_order_info.finalpad); + for (i=0,j=0; i < nrdvs; ++i) + if (DVS[i].ok58) + ordered_DVS[j++] = DVS+i; + j += DV_order_info.pad1; + for (i=0; i < nrdvs; ++i) + if (DVS[i].ok65) + ordered_DVS[j++] = DVS+i; + j += DV_order_info.pad2; + if (j != nrcols) + parse_error("j != nrdvs"); + } else { + DV_order_info.first58 = 0; + /* recompute optimal padding for this case */ + eval_align(0, cnt65, cnt65 + bestpadding, cnt58); + nrcols = cnt58 + cnt65 + DV_order_info.pad1 + DV_order_info.pad2; + printf("Using table structure: [case65(#=%i) padding(#=%i) case58(#=%i) padding(#=%i)]*80 + finalpadding(#=%i)\n", cnt65, bestpadding, cnt58, DV_order_info.pad2, DV_order_info.finalpad); + for (i=0,j=0; i < nrdvs; ++i) + if (DVS[i].ok65) + ordered_DVS[j++] = DVS+i; + j += DV_order_info.pad1; + for (i=0; i < nrdvs; ++i) + if (DVS[i].ok58) + ordered_DVS[j++] = DVS+i; + j += DV_order_info.pad2; + if (j != nrcols) + parse_error("j != nrdvs"); + } + for (j = 1; j <= MAX_SIMD_EXPONENT; ++j) + { + printf("SIMD %4i: off1=0 off2=%i\n", (32<<j), DV_order_info.simd_off2[j]); + } + + /* Output code */ + fd = fopen("lib/simd/dvs_simd.h", "w"); + if (fd == NULL) + parse_error("Cannot open output file to write"); + fprintf(fd, + "/***\n" + "* Copyright 2017 Marc Stevens <marc@marc-stevens.nl>, Dan Shumow <danshu@microsoft.com>\n" + "* Distributed under the MIT Software License.\n" + "* See accompanying file LICENSE.txt or copy at\n" + "* https://opensource.org/licenses/MIT\n" + "***/\n\n" + "#ifndef SHA1DC_DVS_SIMD_HEADER\n" + "#define SHA1DC_DVS_SIMD_HEADER\n\n" + "#include <stdlib.h>\n" + "#include <stdint.h>\n\n" + "#define SHA1DC_SIMD_NRDVS (%i)\n" /*nrdvs*/ + "#define SHA1DC_SIMD_TABLESIZE (%i)\n" /*nrdvs+pad1+pad2*/ + "#define SHA1DC_SIMD_FINALPADDING (%i)\n" /*finalpad*/ + , + nrdvs, nrdvs+DV_order_info.pad1+DV_order_info.pad2, DV_order_info.finalpad + ); + for (j = 1; j <= MAX_SIMD_EXPONENT; ++j) + { + fprintf(fd, + "#define SHA1DC_SIMD_%i_OFFSET58 (%i)\n" /* off1 / off2 */ + "#define SHA1DC_SIMD_%i_OFFSET65 (%i)\n" /* off2 / off1 */ + , + (1<<j), DV_order_info.first58 ? 0 : DV_order_info.simd_off2[j], + (1<<j), DV_order_info.first58 ? DV_order_info.simd_off2[j] : 0 + ); + } + fprintf(fd, + "\ntypedef struct {\n" + " uint32_t dm[80][SHA1DC_SIMD_TABLESIZE];\n" + " uint32_t mask58[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n" + " uint32_t mask65[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n" + ); + fprintf(fd, + " int dvType[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n" + " int dvK[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n" + " int dvB[SHA1DC_SIMD_TABLESIZE+SHA1DC_SIMD_FINALPADDING];\n" + " } sha1_dvs_interleaved_t;\n" + "extern const sha1_dvs_interleaved_t sha1_dvs_interleaved;\n\n" + "#endif /* SHA1DC_DVS_SIMD_HEADER */\n" + ); + fclose(fd); + + fd = fopen("lib/simd/dvs_simd.c", "w"); + if (fd == NULL) + parse_error("Cannot open output file to write"); + fprintf(fd, + "/***\n" + "* Copyright 2017 Marc Stevens <marc@marc-stevens.nl>, Dan Shumow <danshu@microsoft.com>\n" + "* Distributed under the MIT Software License.\n" + "* See accompanying file LICENSE.txt or copy at\n" + "* https://opensource.org/licenses/MIT\n" + "***/\n\n" + "#include \"dvs_simd.h\"\n" + "#include <stdlib.h>\n" + "#include <stdint.h>\n\n" + "const sha1_dvs_interleaved_t sha1_dvs_interleaved = {\n" + " {\n" + ); + for (i = 0; i < 80; ++i) + { + fprintf(fd, " {"); + for (j = 0; j < nrcols; ++j) + if (ordered_DVS[j]->ok58+ordered_DVS[j]->ok65==0) + fprintf(fd, ", 0"); + else + fprintf(fd, "%s 0x%08x", j==0?"":",", ordered_DVS[j]->dm[i]); + fprintf(fd, " }%s\n", i<79 ? "," : ""); + } + fprintf(fd, " },\n"); + + fprintf(fd, " {"); + for (j=0; j < nrcols+DV_order_info.finalpad; ++j) + fprintf(fd, "%s %s", j==0?"":",", ordered_DVS[j]->ok58 ? "0xFFFFFFFF" : (ordered_DVS[j]->ok65?"0x00000000":"0")); + fprintf(fd, " },\n"); + + fprintf(fd, " {"); + for (j=0; j < nrcols+DV_order_info.finalpad; ++j) + fprintf(fd, "%s %s", j==0?"":",", ordered_DVS[j]->ok65 ? "0xFFFFFFFF" : (ordered_DVS[j]->ok58?"0x00000000":"0")); + fprintf(fd, " },\n"); + + fprintf(fd, " {"); + for (j=0; j < nrcols+DV_order_info.finalpad; ++j) + fprintf(fd, "%s %2i", j==0?"":",", ordered_DVS[j]->dvType); + fprintf(fd, " },\n"); + + fprintf(fd, " {"); + for (j=0; j < nrcols+DV_order_info.finalpad; ++j) + fprintf(fd, "%s %2i", j==0?"":",", ordered_DVS[j]->dvK); + fprintf(fd, " },\n"); + + fprintf(fd, " {"); + for (j=0; j < nrcols+DV_order_info.finalpad; ++j) + fprintf(fd, "%s %2i", j==0?"":",", ordered_DVS[j]->dvB); + fprintf(fd, " }\n"); + fprintf(fd, "};\n"); + + fclose(fd); + + return 0; +} + +int process_dv_list(char* filename, int maxDVs) +{ + FILE* fd; + char buffer[1<<16]; + size_t size; + char* ptr; + char* ptrend; + DV_info_t DVS[256]; + int nrdvs = 0; + DV_info_t* DV = DVS+0; + char* DVtypestr[3] = { "err", "I", "II" }; + int K; + + fd = fopen(filename, "r"); + if (fd == NULL) + return parse_error("Cannot open file"); + size = fread(buffer,1,65536,fd); + if (size >= 65536) + return parse_error("File larger than 65536 bytes!"); + printf("Parsing at most %i DVs...\n", maxDVs); + + ptrend = buffer+size; + for (ptr = buffer; ptr < ptrend;) + { + /* I_48_0 : compl=2^64.5138 (prob=2^-71.5138) */ + + DV->dvType = 0; + + while (ptr < ptrend && *ptr != 'I') + ++ptr; + if (ptr >= ptrend) + break; + for (; *ptr == 'I'; ++ptr,++DV->dvType) + ; + if (DV->dvType > 2) return parse_error("there is no DV type III"); + if (*ptr++ != '_') return parse_error("expected _ after I"); + K = DV->dvK = atoi(ptr); + ptr += 2; + if (*ptr++ != '_') return parse_error("expected _ after K"); + DV->dvB = atoi(ptr); + + /* compute dv and dm tables in DV */ + init_DV(DV); + + if (DV->dvType == 1) + { + DV->ok58 = ((58 >= K+5) & (58 <= K+15)) ? 1 : 0; + DV->ok65 = ((65 >= K+5) & (65 <= K+15)) ? 1 : 0; + } + else + { + DV->ok58 = ((58 >= K+9) & (58 <= K+15)) ? 1 : 0; + DV->ok65 = ((65 >= K+9) & (65 <= K+15)) ? 1 : 0; + } + + printf("Parsed DV: %s(%i,%i) ok58=%i ok65=%i\n", DVtypestr[DV->dvType], DV->dvK, DV->dvB, DV->ok58, DV->ok65); + + ++DV; ++nrdvs; + if (DV != DVS+nrdvs) + parse_error("huh?!?"); + if (nrdvs >= maxDVs) + break; + } + fclose(fd); + return generate_code(DVS, nrdvs); +} + +int main(int argc, char** argv) +{ + if (argc < 2) + { + printf("Usage: %s <file> [<nr>]\n", argv[0]); + return 1; + } + if (argc == 2) + return process_dv_list(argv[1],256); + else + return process_dv_list(argv[1],atoi(argv[2])); +} |