From aa70801aaf4038faaf673558c241fa19d5dcd181 Mon Sep 17 00:00:00 2001 From: James Almer Date: Thu, 7 May 2015 01:58:50 -0300 Subject: ripemd: move ripemd{256, 320} into separate functions This allows the removal of a few branches. Before: lavu RIPEMD-160 size: 1048576 runs: 1024 time: 7.052 +- 0.010 After: lavu RIPEMD-160 size: 1048576 runs: 1024 time: 6.865 +- 0.015 Signed-off-by: James Almer Signed-off-by: Michael Niedermayer --- libavutil/ripemd.c | 354 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 222 insertions(+), 132 deletions(-) (limited to 'libavutil/ripemd.c') diff --git a/libavutil/ripemd.c b/libavutil/ripemd.c index 1ceb24b598..00848605ec 100644 --- a/libavutil/ripemd.c +++ b/libavutil/ripemd.c @@ -34,9 +34,8 @@ typedef struct AVRIPEMD { uint64_t count; ///< number of bytes in buffer uint8_t buffer[64]; ///< 512-bit buffer of input values used in hash updating uint32_t state[10]; ///< current hash value - uint8_t ext; ///< extension (0 for 128 and 160, 1 for 256 and 320) /** function used to update hash for 512-bit input block */ - void (*transform)(uint32_t *state, const uint8_t buffer[64], int ext); + void (*transform)(uint32_t *state, const uint8_t buffer[64]); } AVRIPEMD; const int av_ripemd_size = sizeof(AVRIPEMD); @@ -88,8 +87,6 @@ static const int WB[80] = { #define rol(value, bits) ((value << bits) | (value >> (32 - bits))) -#define SWAP(a,b) if (ext) { t = a; a = b; b = t; } - #define ROUND128_0_TO_15(a,b,c,d,e,f,g,h) \ a = rol(a + (( b ^ c ^ d) + block[WA[n]]), ROTA[n]); \ e = rol(e + ((((f ^ g) & h) ^ g) + block[WB[n]] + KB[0]), ROTB[n]); \ @@ -110,21 +107,40 @@ static const int WB[80] = { e = rol(e + (( f ^ g ^ h) + block[WB[n]]), ROTB[n]); \ n++ -static void ripemd128_transform(uint32_t *state, const uint8_t buffer[64], int ext) +#define R128_0 \ + ROUND128_0_TO_15(a,b,c,d,e,f,g,h); \ + ROUND128_0_TO_15(d,a,b,c,h,e,f,g); \ + ROUND128_0_TO_15(c,d,a,b,g,h,e,f); \ + ROUND128_0_TO_15(b,c,d,a,f,g,h,e) + +#define R128_16 \ + ROUND128_16_TO_31(a,b,c,d,e,f,g,h); \ + ROUND128_16_TO_31(d,a,b,c,h,e,f,g); \ + ROUND128_16_TO_31(c,d,a,b,g,h,e,f); \ + ROUND128_16_TO_31(b,c,d,a,f,g,h,e) + +#define R128_32 \ + ROUND128_32_TO_47(a,b,c,d,e,f,g,h); \ + ROUND128_32_TO_47(d,a,b,c,h,e,f,g); \ + ROUND128_32_TO_47(c,d,a,b,g,h,e,f); \ + ROUND128_32_TO_47(b,c,d,a,f,g,h,e) + +#define R128_48 \ + ROUND128_48_TO_63(a,b,c,d,e,f,g,h); \ + ROUND128_48_TO_63(d,a,b,c,h,e,f,g); \ + ROUND128_48_TO_63(c,d,a,b,g,h,e,f); \ + ROUND128_48_TO_63(b,c,d,a,f,g,h,e) + +static void ripemd128_transform(uint32_t *state, const uint8_t buffer[64]) { - uint32_t a, b, c, d, e, f, g, h, t; + uint32_t a, b, c, d, e, f, g, h, av_unused t; uint32_t block[16]; int n; - if (ext) { - a = state[0]; b = state[1]; c = state[2]; d = state[3]; - e = state[4]; f = state[5]; g = state[6]; h = state[7]; - } else { - a = e = state[0]; - b = f = state[1]; - c = g = state[2]; - d = h = state[3]; - } + a = e = state[0]; + b = f = state[1]; + c = g = state[2]; + d = h = state[3]; for (n = 0; n < 16; n++) block[n] = AV_RL32(buffer + 4 * n); @@ -136,77 +152,100 @@ static void ripemd128_transform(uint32_t *state, const uint8_t buffer[64], int e t = d; d = c; c = b; b = a; a = t; t = h; h = g; g = f; f = e; e = t; } - SWAP(a,e) for (; n < 32;) { ROUND128_16_TO_31(a,b,c,d,e,f,g,h); t = d; d = c; c = b; b = a; a = t; t = h; h = g; g = f; f = e; e = t; } - SWAP(b,f) for (; n < 48;) { ROUND128_32_TO_47(a,b,c,d,e,f,g,h); t = d; d = c; c = b; b = a; a = t; t = h; h = g; g = f; f = e; e = t; } - SWAP(c,g) for (; n < 64;) { ROUND128_48_TO_63(a,b,c,d,e,f,g,h); t = d; d = c; c = b; b = a; a = t; t = h; h = g; g = f; f = e; e = t; } - SWAP(d,h) #else -#define R128_0 \ - ROUND128_0_TO_15(a,b,c,d,e,f,g,h); \ - ROUND128_0_TO_15(d,a,b,c,h,e,f,g); \ - ROUND128_0_TO_15(c,d,a,b,g,h,e,f); \ - ROUND128_0_TO_15(b,c,d,a,f,g,h,e) - R128_0; R128_0; R128_0; R128_0; - SWAP(a,e) - -#define R128_16 \ - ROUND128_16_TO_31(a,b,c,d,e,f,g,h); \ - ROUND128_16_TO_31(d,a,b,c,h,e,f,g); \ - ROUND128_16_TO_31(c,d,a,b,g,h,e,f); \ - ROUND128_16_TO_31(b,c,d,a,f,g,h,e) R128_16; R128_16; R128_16; R128_16; - SWAP(b,f) - -#define R128_32 \ - ROUND128_32_TO_47(a,b,c,d,e,f,g,h); \ - ROUND128_32_TO_47(d,a,b,c,h,e,f,g); \ - ROUND128_32_TO_47(c,d,a,b,g,h,e,f); \ - ROUND128_32_TO_47(b,c,d,a,f,g,h,e) R128_32; R128_32; R128_32; R128_32; - SWAP(c,g) - -#define R128_48 \ - ROUND128_48_TO_63(a,b,c,d,e,f,g,h); \ - ROUND128_48_TO_63(d,a,b,c,h,e,f,g); \ - ROUND128_48_TO_63(c,d,a,b,g,h,e,f); \ - ROUND128_48_TO_63(b,c,d,a,f,g,h,e) R128_48; R128_48; R128_48; R128_48; - SWAP(d,h) #endif - if (ext) { - state[0] += a; state[1] += b; state[2] += c; state[3] += d; - state[4] += e; state[5] += f; state[6] += g; state[7] += h; - } else { - h += c + state[1]; - state[1] = state[2] + d + e; - state[2] = state[3] + a + f; - state[3] = state[0] + b + g; - state[0] = h; + h += c + state[1]; + state[1] = state[2] + d + e; + state[2] = state[3] + a + f; + state[3] = state[0] + b + g; + state[0] = h; +} + +static void ripemd256_transform(uint32_t *state, const uint8_t buffer[64]) +{ + uint32_t a, b, c, d, e, f, g, h, av_unused t; + uint32_t block[16]; + int n; + + a = state[0]; b = state[1]; c = state[2]; d = state[3]; + e = state[4]; f = state[5]; g = state[6]; h = state[7]; + + for (n = 0; n < 16; n++) + block[n] = AV_RL32(buffer + 4 * n); + n = 0; + +#if CONFIG_SMALL + for (; n < 16;) { + ROUND128_0_TO_15(a,b,c,d,e,f,g,h); + t = d; d = c; c = b; b = a; a = t; + t = h; h = g; g = f; f = e; e = t; + } + FFSWAP(uint32_t, a, e); + + for (; n < 32;) { + ROUND128_16_TO_31(a,b,c,d,e,f,g,h); + t = d; d = c; c = b; b = a; a = t; + t = h; h = g; g = f; f = e; e = t; + } + FFSWAP(uint32_t, b, f); + + for (; n < 48;) { + ROUND128_32_TO_47(a,b,c,d,e,f,g,h); + t = d; d = c; c = b; b = a; a = t; + t = h; h = g; g = f; f = e; e = t; } + FFSWAP(uint32_t, c, g); + + for (; n < 64;) { + ROUND128_48_TO_63(a,b,c,d,e,f,g,h); + t = d; d = c; c = b; b = a; a = t; + t = h; h = g; g = f; f = e; e = t; + } + FFSWAP(uint32_t, d, h); +#else + + R128_0; R128_0; R128_0; R128_0; + FFSWAP(uint32_t, a, e); + + R128_16; R128_16; R128_16; R128_16; + FFSWAP(uint32_t, b, f); + + R128_32; R128_32; R128_32; R128_32; + FFSWAP(uint32_t, c, g); + + R128_48; R128_48; R128_48; R128_48; + FFSWAP(uint32_t, d, h); +#endif + + state[0] += a; state[1] += b; state[2] += c; state[3] += d; + state[4] += e; state[5] += f; state[6] += g; state[7] += h; } #define ROTATE(x,y) \ @@ -239,22 +278,52 @@ static void ripemd128_transform(uint32_t *state, const uint8_t buffer[64], int e f = rol(f + (( g ^ h ^ i) + block[WB[n]]), ROTB[n]) + j; \ ROTATE(c,h) -static void ripemd160_transform(uint32_t *state, const uint8_t buffer[64], int ext) +#define R160_0 \ + ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j); \ + ROUND160_0_TO_15(e,a,b,c,d,j,f,g,h,i); \ + ROUND160_0_TO_15(d,e,a,b,c,i,j,f,g,h); \ + ROUND160_0_TO_15(c,d,e,a,b,h,i,j,f,g); \ + ROUND160_0_TO_15(b,c,d,e,a,g,h,i,j,f) + +#define R160_16 \ + ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i); \ + ROUND160_16_TO_31(d,e,a,b,c,i,j,f,g,h); \ + ROUND160_16_TO_31(c,d,e,a,b,h,i,j,f,g); \ + ROUND160_16_TO_31(b,c,d,e,a,g,h,i,j,f); \ + ROUND160_16_TO_31(a,b,c,d,e,f,g,h,i,j) + +#define R160_32 \ + ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h); \ + ROUND160_32_TO_47(c,d,e,a,b,h,i,j,f,g); \ + ROUND160_32_TO_47(b,c,d,e,a,g,h,i,j,f); \ + ROUND160_32_TO_47(a,b,c,d,e,f,g,h,i,j); \ + ROUND160_32_TO_47(e,a,b,c,d,j,f,g,h,i) + +#define R160_48 \ + ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g); \ + ROUND160_48_TO_63(b,c,d,e,a,g,h,i,j,f); \ + ROUND160_48_TO_63(a,b,c,d,e,f,g,h,i,j); \ + ROUND160_48_TO_63(e,a,b,c,d,j,f,g,h,i); \ + ROUND160_48_TO_63(d,e,a,b,c,i,j,f,g,h) + +#define R160_64 \ + ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f); \ + ROUND160_64_TO_79(a,b,c,d,e,f,g,h,i,j); \ + ROUND160_64_TO_79(e,a,b,c,d,j,f,g,h,i); \ + ROUND160_64_TO_79(d,e,a,b,c,i,j,f,g,h); \ + ROUND160_64_TO_79(c,d,e,a,b,h,i,j,f,g) + +static void ripemd160_transform(uint32_t *state, const uint8_t buffer[64]) { - uint32_t a, b, c, d, e, f, g, h, i, j, t; + uint32_t a, b, c, d, e, f, g, h, i, j, av_unused t; uint32_t block[16]; int n; - if (ext) { - a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; - f = state[5]; g = state[6]; h = state[7]; i = state[8]; j = state[9]; - } else { - a = f = state[0]; - b = g = state[1]; - c = h = state[2]; - d = i = state[3]; - e = j = state[4]; - } + a = f = state[0]; + b = g = state[1]; + c = h = state[2]; + d = i = state[3]; + e = j = state[4]; for (n = 0; n < 16; n++) block[n] = AV_RL32(buffer + 4 * n); @@ -266,104 +335,129 @@ static void ripemd160_transform(uint32_t *state, const uint8_t buffer[64], int e t = e; e = d; d = c; c = b; b = a; a = t; t = j; j = i; i = h; h = g; g = f; f = t; } - SWAP(b,g) for (; n < 32;) { ROUND160_16_TO_31(a,b,c,d,e,f,g,h,i,j); t = e; e = d; d = c; c = b; b = a; a = t; t = j; j = i; i = h; h = g; g = f; f = t; } - SWAP(d,i) for (; n < 48;) { ROUND160_32_TO_47(a,b,c,d,e,f,g,h,i,j); t = e; e = d; d = c; c = b; b = a; a = t; t = j; j = i; i = h; h = g; g = f; f = t; } - SWAP(a,f) for (; n < 64;) { ROUND160_48_TO_63(a,b,c,d,e,f,g,h,i,j); t = e; e = d; d = c; c = b; b = a; a = t; t = j; j = i; i = h; h = g; g = f; f = t; } - SWAP(c,h) for (; n < 80;) { ROUND160_64_TO_79(a,b,c,d,e,f,g,h,i,j); t = e; e = d; d = c; c = b; b = a; a = t; t = j; j = i; i = h; h = g; g = f; f = t; } - SWAP(e,j) #else -#define R160_0 \ - ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j); \ - ROUND160_0_TO_15(e,a,b,c,d,j,f,g,h,i); \ - ROUND160_0_TO_15(d,e,a,b,c,i,j,f,g,h); \ - ROUND160_0_TO_15(c,d,e,a,b,h,i,j,f,g); \ - ROUND160_0_TO_15(b,c,d,e,a,g,h,i,j,f) - R160_0; R160_0; R160_0; ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j); - SWAP(a,f) - -#define R160_16 \ - ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i); \ - ROUND160_16_TO_31(d,e,a,b,c,i,j,f,g,h); \ - ROUND160_16_TO_31(c,d,e,a,b,h,i,j,f,g); \ - ROUND160_16_TO_31(b,c,d,e,a,g,h,i,j,f); \ - ROUND160_16_TO_31(a,b,c,d,e,f,g,h,i,j) R160_16; R160_16; R160_16; ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i); - SWAP(b,g) - -#define R160_32 \ - ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h); \ - ROUND160_32_TO_47(c,d,e,a,b,h,i,j,f,g); \ - ROUND160_32_TO_47(b,c,d,e,a,g,h,i,j,f); \ - ROUND160_32_TO_47(a,b,c,d,e,f,g,h,i,j); \ - ROUND160_32_TO_47(e,a,b,c,d,j,f,g,h,i) R160_32; R160_32; R160_32; ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h); - SWAP(c,h) - -#define R160_48 \ - ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g); \ - ROUND160_48_TO_63(b,c,d,e,a,g,h,i,j,f); \ - ROUND160_48_TO_63(a,b,c,d,e,f,g,h,i,j); \ - ROUND160_48_TO_63(e,a,b,c,d,j,f,g,h,i); \ - ROUND160_48_TO_63(d,e,a,b,c,i,j,f,g,h) R160_48; R160_48; R160_48; ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g); - SWAP(d,i) - -#define R160_64 \ - ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f); \ - ROUND160_64_TO_79(a,b,c,d,e,f,g,h,i,j); \ - ROUND160_64_TO_79(e,a,b,c,d,j,f,g,h,i); \ - ROUND160_64_TO_79(d,e,a,b,c,i,j,f,g,h); \ - ROUND160_64_TO_79(c,d,e,a,b,h,i,j,f,g) R160_64; R160_64; R160_64; ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f); - SWAP(e,j) #endif - if (ext) { - state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; - state[5] += f; state[6] += g; state[7] += h; state[8] += i; state[9] += j; - } else { - i += c + state[1]; - state[1] = state[2] + d + j; - state[2] = state[3] + e + f; - state[3] = state[4] + a + g; - state[4] = state[0] + b + h; - state[0] = i; + i += c + state[1]; + state[1] = state[2] + d + j; + state[2] = state[3] + e + f; + state[3] = state[4] + a + g; + state[4] = state[0] + b + h; + state[0] = i; +} + +static void ripemd320_transform(uint32_t *state, const uint8_t buffer[64]) +{ + uint32_t a, b, c, d, e, f, g, h, i, j, av_unused t; + uint32_t block[16]; + int n; + + a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; + f = state[5]; g = state[6]; h = state[7]; i = state[8]; j = state[9]; + + for (n = 0; n < 16; n++) + block[n] = AV_RL32(buffer + 4 * n); + n = 0; + +#if CONFIG_SMALL + for (; n < 16;) { + ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j); + t = e; e = d; d = c; c = b; b = a; a = t; + t = j; j = i; i = h; h = g; g = f; f = t; + } + FFSWAP(uint32_t, b, g); + + for (; n < 32;) { + ROUND160_16_TO_31(a,b,c,d,e,f,g,h,i,j); + t = e; e = d; d = c; c = b; b = a; a = t; + t = j; j = i; i = h; h = g; g = f; f = t; + } + FFSWAP(uint32_t, d, i); + + for (; n < 48;) { + ROUND160_32_TO_47(a,b,c,d,e,f,g,h,i,j); + t = e; e = d; d = c; c = b; b = a; a = t; + t = j; j = i; i = h; h = g; g = f; f = t; + } + FFSWAP(uint32_t, a, f); + + for (; n < 64;) { + ROUND160_48_TO_63(a,b,c,d,e,f,g,h,i,j); + t = e; e = d; d = c; c = b; b = a; a = t; + t = j; j = i; i = h; h = g; g = f; f = t; + } + FFSWAP(uint32_t, c, h); + + for (; n < 80;) { + ROUND160_64_TO_79(a,b,c,d,e,f,g,h,i,j); + t = e; e = d; d = c; c = b; b = a; a = t; + t = j; j = i; i = h; h = g; g = f; f = t; } + FFSWAP(uint32_t, e, j); +#else + + R160_0; R160_0; R160_0; + ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j); + FFSWAP(uint32_t, a, f); + + R160_16; R160_16; R160_16; + ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i); + FFSWAP(uint32_t, b, g); + + R160_32; R160_32; R160_32; + ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h); + FFSWAP(uint32_t, c, h); + + R160_48; R160_48; R160_48; + ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g); + FFSWAP(uint32_t, d, i); + + R160_64; R160_64; R160_64; + ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f); + FFSWAP(uint32_t, e, j); +#endif + + state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; + state[5] += f; state[6] += g; state[7] += h; state[8] += i; state[9] += j; } av_cold int av_ripemd_init(AVRIPEMD *ctx, int bits) @@ -376,7 +470,6 @@ av_cold int av_ripemd_init(AVRIPEMD *ctx, int bits) ctx->state[2] = 0x98BADCFE; ctx->state[3] = 0x10325476; ctx->transform = ripemd128_transform; - ctx->ext = 0; break; case 160: // RIPEMD-160 ctx->state[0] = 0x67452301; @@ -385,7 +478,6 @@ av_cold int av_ripemd_init(AVRIPEMD *ctx, int bits) ctx->state[3] = 0x10325476; ctx->state[4] = 0xC3D2E1F0; ctx->transform = ripemd160_transform; - ctx->ext = 0; break; case 256: // RIPEMD-256 ctx->state[0] = 0x67452301; @@ -396,8 +488,7 @@ av_cold int av_ripemd_init(AVRIPEMD *ctx, int bits) ctx->state[5] = 0xFEDCBA98; ctx->state[6] = 0x89ABCDEF; ctx->state[7] = 0x01234567; - ctx->transform = ripemd128_transform; - ctx->ext = 1; + ctx->transform = ripemd256_transform; break; case 320: // RIPEMD-320 ctx->state[0] = 0x67452301; @@ -410,8 +501,7 @@ av_cold int av_ripemd_init(AVRIPEMD *ctx, int bits) ctx->state[7] = 0x89ABCDEF; ctx->state[8] = 0x01234567; ctx->state[9] = 0x3C2D1E0F; - ctx->transform = ripemd160_transform; - ctx->ext = 1; + ctx->transform = ripemd320_transform; break; default: return -1; @@ -430,16 +520,16 @@ void av_ripemd_update(AVRIPEMD* ctx, const uint8_t* data, unsigned int len) for (i = 0; i < len; i++) { ctx->buffer[j++] = data[i]; if (64 == j) { - ctx->transform(ctx->state, ctx->buffer, ctx->ext); + ctx->transform(ctx->state, ctx->buffer); j = 0; } } #else if ((j + len) > 63) { memcpy(&ctx->buffer[j], data, (i = 64 - j)); - ctx->transform(ctx->state, ctx->buffer, ctx->ext); + ctx->transform(ctx->state, ctx->buffer); for (; i + 63 < len; i += 64) - ctx->transform(ctx->state, &data[i], ctx->ext); + ctx->transform(ctx->state, &data[i]); j = 0; } else i = 0; -- cgit v1.2.3