Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kornelski/7z.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'CPP/7zip/Crypto/AES/aesopt.h')
-rwxr-xr-xCPP/7zip/Crypto/AES/aesopt.h839
1 files changed, 839 insertions, 0 deletions
diff --git a/CPP/7zip/Crypto/AES/aesopt.h b/CPP/7zip/Crypto/AES/aesopt.h
new file mode 100755
index 00000000..bcad345f
--- /dev/null
+++ b/CPP/7zip/Crypto/AES/aesopt.h
@@ -0,0 +1,839 @@
+/*
+ -------------------------------------------------------------------------
+ Copyright (c) 2001, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
+ All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+ 1. distributions of this source code include the above copyright
+ notice, this list of conditions and the following disclaimer;
+
+ 2. distributions in binary form include the above copyright
+ notice, this list of conditions and the following disclaimer
+ in the documentation and/or other associated materials;
+
+ 3. the copyright holder's name is not used to endorse products
+ built using this software without specific written permission.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and fitness for purpose.
+ -------------------------------------------------------------------------
+ Issue Date: 29/07/2002
+
+ This file contains the compilation options for AES (Rijndael) and code
+ that is common across encryption, key scheduling and table generation.
+
+ OPERATION
+
+ These source code files implement the AES algorithm Rijndael designed by
+ Joan Daemen and Vincent Rijmen. The version in aes.c is designed for
+ block and key sizes of 128, 192 and 256 bits (16, 24 and 32 bytes) while
+ that in aespp.c provides for block and keys sizes of 128, 160, 192, 224
+ and 256 bits (16, 20, 24, 28 and 32 bytes). This file is a common header
+ file for these two implementations and for aesref.c, which is a reference
+ implementation.
+
+ This version is designed for flexibility and speed using operations on
+ 32-bit words rather than operations on bytes. It provides aes_both fixed
+ and dynamic block and key lengths and can also run with either big or
+ little endian internal byte order (see aes.h). It inputs block and key
+ lengths in bytes with the legal values being 16, 24 and 32 for aes.c and
+ 16, 20, 24, 28 and 32 for aespp.c
+
+ THE CIPHER INTERFACE
+
+ aes_08t (an unsigned 8-bit type)
+ aes_32t (an unsigned 32-bit type)
+ aes_fret (a signed 16 bit type for function return values)
+ aes_good (value != 0, a good return)
+ aes_bad (value == 0, an error return)
+ struct aes_ctx (structure for the cipher encryption context)
+ struct aes_ctx (structure for the cipher decryption context)
+ aes_rval the function return type (aes_fret if not DLL)
+
+ C subroutine calls:
+
+ aes_rval aes_blk_len(unsigned int blen, aes_ctx cx[1]);
+ aes_rval aes_enc_key(const unsigned char in_key[], unsigned int klen, aes_ctx cx[1]);
+ aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]);
+
+ aes_rval aes_dec_len(unsigned int blen, aes_ctx cx[1]);
+ aes_rval aes_dec_key(const unsigned char in_key[], unsigned int klen, aes_ctx cx[1]);
+ aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]);
+
+ IMPORTANT NOTE: If you are using this C interface and your compiler does
+ not set the memory used for objects to zero before use, you will need to
+ ensure that cx.s_flg is set to zero before using these subroutine calls.
+
+ C++ aes class subroutines:
+
+ class AESclass for encryption
+ class AESclass for decryption
+
+ aes_rval len(unsigned int blen = 16);
+ aes_rval key(const unsigned char in_key[], unsigned int klen);
+ aes_rval blk(const unsigned char in_blk[], unsigned char out_blk[]);
+
+ aes_rval len(unsigned int blen = 16);
+ aes_rval key(const unsigned char in_key[], unsigned int klen);
+ aes_rval blk(const unsigned char in_blk[], unsigned char out_blk[]);
+
+ The block length inputs to set_block and set_key are in numbers of
+ BYTES, not bits. The calls to subroutines must be made in the above
+ order but multiple calls can be made without repeating earlier calls
+ if their parameters have not changed. If the cipher block length is
+ variable but set_blk has not been called before cipher operations a
+ value of 16 is assumed (that is, the AES block size). In contrast to
+ earlier versions the block and key length parameters are now checked
+ for correctness and the encryption and decryption routines check to
+ ensure that an appropriate key has been set before they are called.
+
+ COMPILATION
+
+ The files used to provide AES (Rijndael) are
+
+ a. aes.h for the definitions needed for use in C.
+ b. aescpp.h for the definitions needed for use in C++.
+ c. aesopt.h for setting compilation options (also includes common
+ code).
+ d. aescrypt.c for encryption and decrytpion, or
+ e. aescrypt.asm for encryption and decryption using assembler code.
+ f. aeskey.c for key scheduling.
+ g. aestab.c for table loading or generation.
+
+ The assembler code uses the NASM assembler. The above files provice
+ block and key lengths of 16, 24 and 32 bytes (128, 192 and 256 bits).
+ If aescrypp.c and aeskeypp.c are used instead of aescrypt.c and
+ aeskey.c respectively, the block and key lengths can then be 16, 20,
+ 24, 28 or 32 bytes. However this code has not been optimised to the
+ same extent and is hence slower (esepcially for the AES block size
+ of 16 bytes).
+
+ To compile AES (Rijndael) for use in C code use aes.h and exclude
+ the AES_DLL define in aes.h
+
+ To compile AES (Rijndael) for use in in C++ code use aescpp.h and
+ exclude the AES_DLL define in aes.h
+
+ To compile AES (Rijndael) in C as a Dynamic Link Library DLL) use
+ aes.h, include the AES_DLL define and compile the DLL. If using
+ the test files to test the DLL, exclude aes.c from the test build
+ project and compile it with the same defines as used for the DLL
+ (ensure that the DLL path is correct)
+
+ CONFIGURATION OPTIONS (here and in aes.h)
+
+ a. define BLOCK_SIZE in aes.h to set the cipher block size (16, 24
+ or 32 for the standard code, or 16, 20, 24, 28 or 32 for the
+ extended code) or leave this undefined for dynamically variable
+ block size (this will result in much slower code).
+ b. set AES_DLL in aes.h if AES (Rijndael) is to be compiled as a DLL
+ c. You may need to set PLATFORM_BYTE_ORDER to define the byte order.
+ d. If you want the code to run in a specific internal byte order, then
+ INTERNAL_BYTE_ORDER must be set accordingly.
+ e. set other configuration options decribed below.
+*/
+
+#ifndef _AESOPT_H
+#define _AESOPT_H
+
+/* START OF CONFIGURATION OPTIONS
+
+ USE OF DEFINES
+
+ Later in this section there are a number of defines that control the
+ operation of the code. In each section, the purpose of each define is
+ explained so that the relevant form can be included or excluded by
+ setting either 1's or 0's respectively on the branches of the related
+ #if clauses.
+*/
+
+/* 1. PLATFORM SPECIFIC INCLUDES */
+
+#if defined( __CRYPTLIB__ ) && !defined( INC_ALL ) && !defined( INC_CHILD )
+#include "crypt/aes.h"
+#else
+ #include "aes.h"
+#endif
+
+// 2003-09-16: Changed by Igor Pavlov. Check it.
+// #if defined(__GNUC__) || defined(__GNU_LIBRARY__)
+#if (defined(__GNUC__) || defined(__GNU_LIBRARY__)) && !defined(_WIN32)
+
+# include <endian.h>
+# include <byteswap.h>
+#elif defined(__CRYPTLIB__)
+# if defined( INC_ALL )
+# include "crypt.h"
+# elif defined( INC_CHILD )
+# include "../crypt.h"
+# else
+# include "crypt.h"
+# endif
+# if defined(DATA_LITTLEENDIAN)
+# define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
+# else
+# define PLATFORM_BYTE_ORDER AES_BIG_ENDIAN
+# endif
+#elif defined(_MSC_VER)
+# include <stdlib.h>
+#elif !defined(_WIN32)
+# include <stdlib.h>
+# if !defined (_ENDIAN_H)
+# include <sys/param.h>
+# else
+# include _ENDIAN_H
+# endif
+#endif
+
+/* 2. BYTE ORDER IN 32-BIT WORDS
+
+ To obtain the highest speed on processors with 32-bit words, this code
+ needs to determine the order in which bytes are packed into such words.
+ The following block of code is an attempt to capture the most obvious
+ ways in which various environemnts define byte order. It may well fail,
+ in which case the definitions will need to be set by editing at the
+ points marked **** EDIT HERE IF NECESSARY **** below.
+*/
+#define AES_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
+#define AES_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
+
+#if !defined(PLATFORM_BYTE_ORDER)
+#if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN)
+# if defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
+# if defined(BYTE_ORDER)
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+# define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
+# elif (BYTE_ORDER == BIG_ENDIAN)
+# define PLATFORM_BYTE_ORDER AES_BIG_ENDIAN
+# endif
+# endif
+# elif defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN)
+# define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
+# elif !defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
+# define PLATFORM_BYTE_ORDER AES_BIG_ENDIAN
+# endif
+#elif defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)
+# if defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
+# if defined(_BYTE_ORDER)
+# if (_BYTE_ORDER == _LITTLE_ENDIAN)
+# define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
+# elif (_BYTE_ORDER == _BIG_ENDIAN)
+# define PLATFORM_BYTE_ORDER AES_BIG_ENDIAN
+# endif
+# endif
+# elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
+# define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
+# elif !defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN)
+# define PLATFORM_BYTE_ORDER AES_BIG_ENDIAN
+# endif
+#elif 0 /* **** EDIT HERE IF NECESSARY **** */
+#define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
+#elif 0 /* **** EDIT HERE IF NECESSARY **** */
+#define PLATFORM_BYTE_ORDER AES_BIG_ENDIAN
+#elif (('1234' >> 24) == '1')
+# define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
+#elif (('4321' >> 24) == '1')
+# define PLATFORM_BYTE_ORDER AES_BIG_ENDIAN
+#endif
+#endif
+
+#if !defined(PLATFORM_BYTE_ORDER)
+# error Please set undetermined byte order (lines 233 or 235 of aesopt.h).
+#endif
+
+/* 3. ASSEMBLER SUPPORT
+
+ If the assembler code is used for encryption and decryption this file only
+ provides key scheduling so the following defines are used
+*/
+#ifdef AES_ASM
+#define ENCRYPTION_KEY_SCHEDULE
+#define DECRYPTION_KEY_SCHEDULE
+#else
+
+/* 4. FUNCTIONS REQUIRED
+
+ This implementation provides five main subroutines which provide for
+ setting block length, setting encryption and decryption keys and for
+ encryption and decryption. When the assembler code is not being used
+ the following definition blocks allow the selection of the routines
+ that are to be included in the compilation.
+*/
+#if 1
+#define ENCRYPTION_KEY_SCHEDULE
+#endif
+
+#if 1
+#define DECRYPTION_KEY_SCHEDULE
+#endif
+
+#if 1
+#define ENCRYPTION
+#endif
+
+#if 1
+#define DECRYPTION
+#endif
+
+#endif
+
+/* 5. BYTE ORDER WITHIN 32 BIT WORDS
+
+ The fundamental data processing units in Rijndael are 8-bit bytes. The
+ input, output and key input are all enumerated arrays of bytes in which
+ bytes are numbered starting at zero and increasing to one less than the
+ number of bytes in the array in question. This enumeration is only used
+ for naming bytes and does not imply any adjacency or order relationship
+ from one byte to another. When these inputs and outputs are considered
+ as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
+ byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
+ In this implementation bits are numbered from 0 to 7 starting at the
+ numerically least significant end of each byte (bit n represents 2^n).
+
+ However, Rijndael can be implemented more efficiently using 32-bit
+ words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
+ into word[n]. While in principle these bytes can be assembled into words
+ in any positions, this implementation only supports the two formats in
+ which bytes in adjacent positions within words also have adjacent byte
+ numbers. This order is called big-endian if the lowest numbered bytes
+ in words have the highest numeric significance and little-endian if the
+ opposite applies.
+
+ This code can work in either order irrespective of the order used by the
+ machine on which it runs. Normally the internal byte order will be set
+ to the order of the processor on which the code is to be run but this
+ define can be used to reverse this in special situations
+*/
+#if 1
+#define INTERNAL_BYTE_ORDER PLATFORM_BYTE_ORDER
+#elif defined(AES_LITTLE_ENDIAN)
+#define INTERNAL_BYTE_ORDER AES_LITTLE_ENDIAN
+#elif defined(AES_BIG_ENDIAN)
+#define INTERNAL_BYTE_ORDER AES_BIG_ENDIAN
+#endif
+
+/* 6. FAST INPUT/OUTPUT OPERATIONS.
+
+ On some machines it is possible to improve speed by transferring the
+ bytes in the input and output arrays to and from the internal 32-bit
+ variables by addressing these arrays as if they are arrays of 32-bit
+ words. On some machines this will always be possible but there may
+ be a large performance penalty if the byte arrays are not aligned on
+ the normal word boundaries. On other machines this technique will
+ lead to memory access errors when such 32-bit word accesses are not
+ properly aligned. The option SAFE_IO avoids such problems but will
+ often be slower on those machines that support misaligned access
+ (especially so if care is taken to align the input and output byte
+ arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
+ assumed that access to byte arrays as if they are arrays of 32-bit
+ words will not cause problems when such accesses are misaligned.
+*/
+#if 1
+#define SAFE_IO
+#endif
+
+/* 7. LOOP UNROLLING
+
+ The code for encryption and decrytpion cycles through a number of rounds
+ that can be implemented either in a loop or by expanding the code into a
+ long sequence of instructions, the latter producing a larger program but
+ one that will often be much faster. The latter is called loop unrolling.
+ There are also potential speed advantages in expanding two iterations in
+ a loop with half the number of iterations, which is called partial loop
+ unrolling. The following options allow partial or full loop unrolling
+ to be set independently for encryption and decryption
+*/
+#if 1
+#define ENC_UNROLL FULL
+#elif 0
+#define ENC_UNROLL PARTIAL
+#else
+#define ENC_UNROLL NONE
+#endif
+
+// 7-Zip: Small size for SFX
+#ifdef _SFX
+#define DEC_UNROLL NONE
+#else
+
+#if 1
+#define DEC_UNROLL FULL
+#elif 0
+#define DEC_UNROLL PARTIAL
+#else
+#define DEC_UNROLL NONE
+#endif
+
+#endif
+
+/* 8. FIXED OR DYNAMIC TABLES
+
+ When this section is included the tables used by the code are comipled
+ statically into the binary file. Otherwise they are computed once when
+ the code is first used.
+*/
+#if 0
+#define FIXED_TABLES
+#endif
+
+/* 9. FAST FINITE FIELD OPERATIONS
+
+ If this section is included, tables are used to provide faster finite
+ field arithmetic (this has no effect if FIXED_TABLES is defined).
+*/
+#if 1
+#define FF_TABLES
+#endif
+
+/* 10. INTERNAL STATE VARIABLE FORMAT
+
+ The internal state of Rijndael is stored in a number of local 32-bit
+ word varaibles which can be defined either as an array or as individual
+ names variables. Include this section if you want to store these local
+ varaibles in arrays. Otherwise individual local variables will be used.
+*/
+#if 1
+#define ARRAYS
+#endif
+
+/* In this implementation the columns of the state array are each held in
+ 32-bit words. The state array can be held in various ways: in an array
+ of words, in a number of individual word variables or in a number of
+ processor registers. The following define maps a variable name x and
+ a column number c to the way the state array variable is to be held.
+ The first define below maps the state into an array x[c] whereas the
+ second form maps the state into a number of individual variables x0,
+ x1, etc. Another form could map individual state colums to machine
+ register names.
+*/
+
+#if defined(ARRAYS)
+#define s(x,c) x[c]
+#else
+#define s(x,c) x##c
+#endif
+
+/* 11. VARIABLE BLOCK SIZE SPEED
+
+ This section is only relevant if you wish to use the variable block
+ length feature of the code. Include this section if you place more
+ emphasis on speed rather than code size.
+*/
+#if 1
+#define FAST_VARIABLE
+#endif
+
+/* 12. INTERNAL TABLE CONFIGURATION
+
+ This cipher proceeds by repeating in a number of cycles known as 'rounds'
+ which are implemented by a round function which can optionally be speeded
+ up using tables. The basic tables are each 256 32-bit words, with either
+ one or four tables being required for each round function depending on
+ how much speed is required. The encryption and decryption round functions
+ are different and the last encryption and decrytpion round functions are
+ different again making four different round functions in all.
+
+ This means that:
+ 1. Normal encryption and decryption rounds can each use either 0, 1
+ or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+ 2. The last encryption and decryption rounds can also use either 0, 1
+ or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+
+ Include or exclude the appropriate definitions below to set the number
+ of tables used by this implementation.
+*/
+
+#if 1 /* set tables for the normal encryption round */
+#define ENC_ROUND FOUR_TABLES
+#elif 0
+#define ENC_ROUND ONE_TABLE
+#else
+#define ENC_ROUND NO_TABLES
+#endif
+
+#if 1 /* set tables for the last encryption round */
+#define LAST_ENC_ROUND FOUR_TABLES
+#elif 0
+#define LAST_ENC_ROUND ONE_TABLE
+#else
+#define LAST_ENC_ROUND NO_TABLES
+#endif
+
+#if 1 /* set tables for the normal decryption round */
+#define DEC_ROUND FOUR_TABLES
+#elif 0
+#define DEC_ROUND ONE_TABLE
+#else
+#define DEC_ROUND NO_TABLES
+#endif
+
+#if 1 /* set tables for the last decryption round */
+#define LAST_DEC_ROUND FOUR_TABLES
+#elif 0
+#define LAST_DEC_ROUND ONE_TABLE
+#else
+#define LAST_DEC_ROUND NO_TABLES
+#endif
+
+/* The decryption key schedule can be speeded up with tables in the same
+ way that the round functions can. Include or exclude the following
+ defines to set this requirement.
+*/
+#if 1
+#define KEY_SCHED FOUR_TABLES
+#elif 0
+#define KEY_SCHED ONE_TABLE
+#else
+#define KEY_SCHED NO_TABLES
+#endif
+
+/* END OF CONFIGURATION OPTIONS */
+
+#define NO_TABLES 0 /* DO NOT CHANGE */
+#define ONE_TABLE 1 /* DO NOT CHANGE */
+#define FOUR_TABLES 4 /* DO NOT CHANGE */
+#define NONE 0 /* DO NOT CHANGE */
+#define PARTIAL 1 /* DO NOT CHANGE */
+#define FULL 2 /* DO NOT CHANGE */
+
+#if defined(BLOCK_SIZE) && ((BLOCK_SIZE & 3) || BLOCK_SIZE < 16 || BLOCK_SIZE > 32)
+#error An illegal block size has been specified.
+#endif
+
+#if !defined(BLOCK_SIZE)
+#define RC_LENGTH 29
+#else
+#define RC_LENGTH 5 * BLOCK_SIZE / 4 - (BLOCK_SIZE == 16 ? 10 : 11)
+#endif
+
+/* Disable at least some poor combinations of options */
+
+#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
+#undef LAST_ENC_ROUND
+#define LAST_ENC_ROUND NO_TABLES
+#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
+#undef LAST_ENC_ROUND
+#define LAST_ENC_ROUND ONE_TABLE
+#endif
+
+#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
+#undef ENC_UNROLL
+#define ENC_UNROLL NONE
+#endif
+
+#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
+#undef LAST_DEC_ROUND
+#define LAST_DEC_ROUND NO_TABLES
+#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
+#undef LAST_DEC_ROUND
+#define LAST_DEC_ROUND ONE_TABLE
+#endif
+
+#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
+#undef DEC_UNROLL
+#define DEC_UNROLL NONE
+#endif
+
+/* upr(x,n): rotates bytes within words by n positions, moving bytes to
+ higher index positions with wrap around into low positions
+ ups(x,n): moves bytes by n positions to higher index positions in
+ words but without wrap around
+ bval(x,n): extracts a byte from a word
+
+ NOTE: The definitions given here are intended only for use with
+ unsigned variables and with shift counts that are compile
+ time constants
+*/
+
+#if (INTERNAL_BYTE_ORDER == AES_LITTLE_ENDIAN)
+#if defined(_MSC_VER)
+#define upr(x,n) _lrotl((aes_32t)(x), 8 * (n))
+#else
+#define upr(x,n) ((aes_32t)(x) << 8 * (n) | (aes_32t)(x) >> 32 - 8 * (n))
+#endif
+#define ups(x,n) ((aes_32t)(x) << 8 * (n))
+#define bval(x,n) ((aes_08t)((x) >> 8 * (n)))
+#define bytes2word(b0, b1, b2, b3) \
+ (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0))
+#endif
+
+#if (INTERNAL_BYTE_ORDER == AES_BIG_ENDIAN)
+#define upr(x,n) ((aes_32t)(x) >> 8 * (n) | (aes_32t)(x) << 32 - 8 * (n))
+#define ups(x,n) ((aes_32t)(x) >> 8 * (n)))
+#define bval(x,n) ((aes_08t)((x) >> 24 - 8 * (n)))
+#define bytes2word(b0, b1, b2, b3) \
+ (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3))
+#endif
+
+#if defined(SAFE_IO)
+
+#define word_in(x) bytes2word((x)[0], (x)[1], (x)[2], (x)[3])
+#define word_out(x,v) { (x)[0] = bval(v,0); (x)[1] = bval(v,1); \
+ (x)[2] = bval(v,2); (x)[3] = bval(v,3); }
+
+#elif (INTERNAL_BYTE_ORDER == PLATFORM_BYTE_ORDER)
+
+#define word_in(x) *(aes_32t*)(x)
+#define word_out(x,v) *(aes_32t*)(x) = (v)
+
+#else
+
+#if !defined(bswap_32)
+#if !defined(_MSC_VER)
+#define _lrotl(x,n) ((aes_32t)(x) << n | (aes_32t)(x) >> 32 - n)
+#endif
+#define bswap_32(x) ((_lrotl((x),8) & 0x00ff00ff) | (_lrotl((x),24) & 0xff00ff00))
+#endif
+
+#define word_in(x) bswap_32(*(aes_32t*)(x))
+#define word_out(x,v) *(aes_32t*)(x) = bswap_32(v)
+
+#endif
+
+/* the finite field modular polynomial and elements */
+
+#define WPOLY 0x011b
+#define BPOLY 0x1b
+
+/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
+
+#define m1 0x80808080
+#define m2 0x7f7f7f7f
+#define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
+
+/* The following defines provide alternative definitions of FFmulX that might
+ give improved performance if a fast 32-bit multiply is not available. Note
+ that a temporary variable u needs to be defined where FFmulX is used.
+
+#define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
+#define m4 (0x01010101 * BPOLY)
+#define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
+*/
+
+/* Work out which tables are needed for the different options */
+
+#ifdef AES_ASM
+#ifdef ENC_ROUND
+#undef ENC_ROUND
+#endif
+#define ENC_ROUND FOUR_TABLES
+#ifdef LAST_ENC_ROUND
+#undef LAST_ENC_ROUND
+#endif
+#define LAST_ENC_ROUND FOUR_TABLES
+#ifdef DEC_ROUND
+#undef DEC_ROUND
+#endif
+#define DEC_ROUND FOUR_TABLES
+#ifdef LAST_DEC_ROUND
+#undef LAST_DEC_ROUND
+#endif
+#define LAST_DEC_ROUND FOUR_TABLES
+#ifdef KEY_SCHED
+#undef KEY_SCHED
+#define KEY_SCHED FOUR_TABLES
+#endif
+#endif
+
+#if defined(ENCRYPTION) || defined(AES_ASM)
+#if ENC_ROUND == ONE_TABLE
+#define FT1_SET
+#elif ENC_ROUND == FOUR_TABLES
+#define FT4_SET
+#else
+#define SBX_SET
+#endif
+#if LAST_ENC_ROUND == ONE_TABLE
+#define FL1_SET
+#elif LAST_ENC_ROUND == FOUR_TABLES
+#define FL4_SET
+#elif !defined(SBX_SET)
+#define SBX_SET
+#endif
+#endif
+
+#if defined(DECRYPTION) || defined(AES_ASM)
+#if DEC_ROUND == ONE_TABLE
+#define IT1_SET
+#elif DEC_ROUND == FOUR_TABLES
+#define IT4_SET
+#else
+#define ISB_SET
+#endif
+#if LAST_DEC_ROUND == ONE_TABLE
+#define IL1_SET
+#elif LAST_DEC_ROUND == FOUR_TABLES
+#define IL4_SET
+#elif !defined(ISB_SET)
+#define ISB_SET
+#endif
+#endif
+
+#if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE)
+#if KEY_SCHED == ONE_TABLE
+#define LS1_SET
+#define IM1_SET
+#elif KEY_SCHED == FOUR_TABLES
+#define LS4_SET
+#define IM4_SET
+#elif !defined(SBX_SET)
+#define SBX_SET
+#endif
+#endif
+
+#ifdef FIXED_TABLES
+#define prefx extern const
+#else
+#define prefx extern
+extern aes_08t tab_init;
+void gen_tabs(void);
+#endif
+
+prefx aes_32t rcon_tab[29];
+
+#ifdef SBX_SET
+prefx aes_08t s_box[256];
+#endif
+
+#ifdef ISB_SET
+prefx aes_08t inv_s_box[256];
+#endif
+
+#ifdef FT1_SET
+prefx aes_32t ft_tab[256];
+#endif
+
+#ifdef FT4_SET
+prefx aes_32t ft_tab[4][256];
+#endif
+
+#ifdef FL1_SET
+prefx aes_32t fl_tab[256];
+#endif
+
+#ifdef FL4_SET
+prefx aes_32t fl_tab[4][256];
+#endif
+
+#ifdef IT1_SET
+prefx aes_32t it_tab[256];
+#endif
+
+#ifdef IT4_SET
+prefx aes_32t it_tab[4][256];
+#endif
+
+#ifdef IL1_SET
+prefx aes_32t il_tab[256];
+#endif
+
+#ifdef IL4_SET
+prefx aes_32t il_tab[4][256];
+#endif
+
+#ifdef LS1_SET
+#ifdef FL1_SET
+#undef LS1_SET
+#else
+prefx aes_32t ls_tab[256];
+#endif
+#endif
+
+#ifdef LS4_SET
+#ifdef FL4_SET
+#undef LS4_SET
+#else
+prefx aes_32t ls_tab[4][256];
+#endif
+#endif
+
+#ifdef IM1_SET
+prefx aes_32t im_tab[256];
+#endif
+
+#ifdef IM4_SET
+prefx aes_32t im_tab[4][256];
+#endif
+
+/* Set the number of columns in nc. Note that it is important
+ that nc is a constant which is known at compile time if the
+ highest speed version of the code is needed.
+*/
+
+#if defined(BLOCK_SIZE)
+#define nc (BLOCK_SIZE >> 2)
+#else
+#define nc (cx->n_blk >> 2)
+#endif
+
+/* generic definitions of Rijndael macros that use tables */
+
+#define no_table(x,box,vf,rf,c) bytes2word( \
+ box[bval(vf(x,0,c),rf(0,c))], \
+ box[bval(vf(x,1,c),rf(1,c))], \
+ box[bval(vf(x,2,c),rf(2,c))], \
+ box[bval(vf(x,3,c),rf(3,c))])
+
+#define one_table(x,op,tab,vf,rf,c) \
+ ( tab[bval(vf(x,0,c),rf(0,c))] \
+ ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
+ ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
+ ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
+
+#define four_tables(x,tab,vf,rf,c) \
+ ( tab[0][bval(vf(x,0,c),rf(0,c))] \
+ ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
+ ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
+ ^ tab[3][bval(vf(x,3,c),rf(3,c))])
+
+#define vf1(x,r,c) (x)
+#define rf1(r,c) (r)
+#define rf2(r,c) ((r-c)&3)
+
+/* perform forward and inverse column mix operation on four bytes in long word x in */
+/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */
+
+#define dec_fmvars
+#if defined(FM4_SET) /* not currently used */
+#define fwd_mcol(x) four_tables(x,fm_tab,vf1,rf1,0)
+#elif defined(FM1_SET) /* not currently used */
+#define fwd_mcol(x) one_table(x,upr,fm_tab,vf1,rf1,0)
+#else
+#undef dec_fmvars
+#define dec_fmvars aes_32t f1, f2;
+#define fwd_mcol(x) (f1 = (x), f2 = FFmulX(f1), f2 ^ upr(f1 ^ f2, 3) ^ upr(f1, 2) ^ upr(f1, 1))
+#endif
+
+#define dec_imvars
+#if defined(IM4_SET)
+#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
+#elif defined(IM1_SET)
+#define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
+#else
+#undef dec_imvars
+#define dec_imvars aes_32t f2, f4, f8, f9;
+#define inv_mcol(x) \
+ (f9 = (x), f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
+ f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
+#endif
+
+#if defined(FL4_SET)
+#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
+#elif defined(LS4_SET)
+#define ls_box(x,c) four_tables(x,ls_tab,vf1,rf2,c)
+#elif defined(FL1_SET)
+#define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
+#elif defined(LS1_SET)
+#define ls_box(x,c) one_table(x,upr,ls_tab,vf1,rf2,c)
+#else
+#define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
+#endif
+
+#endif