Couple other benchmark comparisons for wp-x86_64.pl.
[openssl.git] / crypto / whrlpool / wp_block.c
1 /**
2  * The Whirlpool hashing function.
3  *
4  * <P>
5  * <b>References</b>
6  *
7  * <P>
8  * The Whirlpool algorithm was developed by
9  * <a href="mailto:pbarreto@scopus.com.br">Paulo S. L. M. Barreto</a> and
10  * <a href="mailto:vincent.rijmen@cryptomathic.com">Vincent Rijmen</a>.
11  *
12  * See
13  *      P.S.L.M. Barreto, V. Rijmen,
14  *      ``The Whirlpool hashing function,''
15  *      NESSIE submission, 2000 (tweaked version, 2001),
16  *      <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
17  *
18  * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
19  * Vincent Rijmen. Lookup "reference implementations" on
20  * <http://planeta.terra.com.br/informatica/paulobarreto/>
21  *
22  * =============================================================================
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
25  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
33  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
34  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37
38 #include "wp_locl.h"
39 #include <string.h>
40
41 typedef unsigned char           u8;
42 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
43 typedef unsigned __int64        u64;
44 #elif defined(__arch64__)
45 typedef unsigned long           u64;
46 #else
47 typedef unsigned long long      u64;
48 #endif
49
50 #define ROUNDS  10
51
52 #define STRICT_ALIGNMENT
53 #if defined(__i386) || defined(__i386__) || \
54     defined(__x86_64) || defined(__x86_64__) || \
55     defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)
56 /* Well, formally there're couple of other architectures, which permit
57  * unaligned loads, specifically those not crossing cache lines, IA-64
58  * and PowerPC... */
59 #  undef STRICT_ALIGNMENT
60 #endif
61
62 #undef SMALL_REGISTER_BANK
63 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
64 #  define SMALL_REGISTER_BANK
65 #  if defined(WHIRLPOOL_ASM)
66 #    ifndef OPENSSL_SMALL_FOOTPRINT
67 #      define OPENSSL_SMALL_FOOTPRINT   /* it appears that for elder non-MMX
68                                            CPUs this is actually faster! */
69 #    endif
70 #    define GO_FOR_MMX(ctx,inp,num)     do {                    \
71         extern unsigned long OPENSSL_ia32cap_P;                 \
72         void whirlpool_block_mmx(void *,const void *,size_t);   \
73         if (!(OPENSSL_ia32cap_P & (1<<23)))     break;          \
74         whirlpool_block_mmx(ctx->H.c,inp,num);  return;         \
75                                         } while (0)
76 #  endif
77 #endif
78
79 #undef ROTATE
80 #if defined(_MSC_VER)
81 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
82 #    pragma intrinsic(_rotl64)
83 #    define ROTATE(a,n) _rotl64((a),n)
84 #  endif
85 #elif defined(__GNUC__) && __GNUC__>=2
86 #  if defined(__x86_64) || defined(__x86_64__)
87 #    if defined(L_ENDIAN)
88 #      define ROTATE(a,n)       ({ u64 ret; asm ("rolq %1,%0"   \
89                                    : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
90 #    elif defined(B_ENDIAN)
91        /* Most will argue that x86_64 is always little-endian. Well,
92         * yes, but then we have stratus.com who has modified gcc to
93         * "emulate" big-endian on x86. Is there evidence that they
94         * [or somebody else] won't do same for x86_64? Naturally no.
95         * And this line is waiting ready for that brave soul:-) */
96 #      define ROTATE(a,n)       ({ u64 ret; asm ("rorq %1,%0"   \
97                                    : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
98 #    endif
99 #  elif defined(__ia64) || defined(__ia64__)
100 #    if defined(L_ENDIAN)
101 #      define ROTATE(a,n)       ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
102                                    : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
103 #    elif defined(B_ENDIAN)
104 #      define ROTATE(a,n)       ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
105                                    : "=r"(ret) : "r"(a),"M"(n)); ret; })
106 #    endif
107 #  endif
108 #endif
109
110 #if defined(OPENSSL_SMALL_FOOTPRINT)
111 #  if !defined(ROTATE)
112 #    if defined(L_ENDIAN)       /* little-endians have to rotate left */
113 #      define ROTATE(i,n)       ((i)<<(n) ^ (i)>>(64-n))
114 #    elif defined(B_ENDIAN)     /* big-endians have to rotate right */
115 #      define ROTATE(i,n)       ((i)>>(n) ^ (i)<<(64-n))
116 #    endif
117 #  endif
118 #  if defined(ROTATE) && !defined(STRICT_ALIGNMENT)
119 #    define STRICT_ALIGNMENT    /* ensure smallest table size */
120 #  endif
121 #endif
122
123 /*
124  * Table size depends on STRICT_ALIGNMENT and whether or not endian-
125  * specific ROTATE macro is defined. If STRICT_ALIGNMENT is not
126  * defined, which is normally the case on x86[_64] CPUs, the table is
127  * 4KB large unconditionally. Otherwise if ROTATE is defined, the
128  * table is 2KB large, and otherwise - 16KB. 2KB table requires a
129  * whole bunch of additional rotations, but I'm willing to "trade,"
130  * because 16KB table certainly trashes L1 cache. I wish all CPUs
131  * could handle unaligned load as 4KB table doesn't trash the cache,
132  * nor does it require additional rotations.
133  */
134 /*
135  * Note that every Cn macro expands as two loads: one byte load and
136  * one quadword load. One can argue that that many single-byte loads
137  * is too excessive, as one could load a quadword and "milk" it for
138  * eight 8-bit values instead. Well, yes, but in order to do so *and*
139  * avoid excessive loads you have to accomodate a handful of 64-bit
140  * values in the register bank and issue a bunch of shifts and mask.
141  * It's a tradeoff: loads vs. shift and mask in big register bank[!].
142  * On most CPUs eight single-byte loads are faster and I let other
143  * ones to depend on smart compiler to fold byte loads if beneficial.
144  * Hand-coded assembler would be another alternative:-)
145  */
146 #ifdef STRICT_ALIGNMENT
147 #  if defined(ROTATE)
148 #    define N   1
149 #    define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7
150 #    define C0(K,i)     (Cx.q[K.c[(i)*8+0]])
151 #    define C1(K,i)     ROTATE(Cx.q[K.c[(i)*8+1]],8)
152 #    define C2(K,i)     ROTATE(Cx.q[K.c[(i)*8+2]],16)
153 #    define C3(K,i)     ROTATE(Cx.q[K.c[(i)*8+3]],24)
154 #    define C4(K,i)     ROTATE(Cx.q[K.c[(i)*8+4]],32)
155 #    define C5(K,i)     ROTATE(Cx.q[K.c[(i)*8+5]],40)
156 #    define C6(K,i)     ROTATE(Cx.q[K.c[(i)*8+6]],48)
157 #    define C7(K,i)     ROTATE(Cx.q[K.c[(i)*8+7]],56)
158 #  else
159 #    define N   8
160 #    define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
161                                         c7,c0,c1,c2,c3,c4,c5,c6, \
162                                         c6,c7,c0,c1,c2,c3,c4,c5, \
163                                         c5,c6,c7,c0,c1,c2,c3,c4, \
164                                         c4,c5,c6,c7,c0,c1,c2,c3, \
165                                         c3,c4,c5,c6,c7,c0,c1,c2, \
166                                         c2,c3,c4,c5,c6,c7,c0,c1, \
167                                         c1,c2,c3,c4,c5,c6,c7,c0
168 #    define C0(K,i)     (Cx.q[0+8*K.c[(i)*8+0]])
169 #    define C1(K,i)     (Cx.q[1+8*K.c[(i)*8+1]])
170 #    define C2(K,i)     (Cx.q[2+8*K.c[(i)*8+2]])
171 #    define C3(K,i)     (Cx.q[3+8*K.c[(i)*8+3]])
172 #    define C4(K,i)     (Cx.q[4+8*K.c[(i)*8+4]])
173 #    define C5(K,i)     (Cx.q[5+8*K.c[(i)*8+5]])
174 #    define C6(K,i)     (Cx.q[6+8*K.c[(i)*8+6]])
175 #    define C7(K,i)     (Cx.q[7+8*K.c[(i)*8+7]])
176 #  endif
177 #else
178 #  define N     2
179 #  define LL(c0,c1,c2,c3,c4,c5,c6,c7)   c0,c1,c2,c3,c4,c5,c6,c7, \
180                                         c0,c1,c2,c3,c4,c5,c6,c7
181 #  define C0(K,i)       (((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
182 #  define C1(K,i)       (((u64*)(Cx.c+7))[2*K.c[(i)*8+1]])
183 #  define C2(K,i)       (((u64*)(Cx.c+6))[2*K.c[(i)*8+2]])
184 #  define C3(K,i)       (((u64*)(Cx.c+5))[2*K.c[(i)*8+3]])
185 #  define C4(K,i)       (((u64*)(Cx.c+4))[2*K.c[(i)*8+4]])
186 #  define C5(K,i)       (((u64*)(Cx.c+3))[2*K.c[(i)*8+5]])
187 #  define C6(K,i)       (((u64*)(Cx.c+2))[2*K.c[(i)*8+6]])
188 #  define C7(K,i)       (((u64*)(Cx.c+1))[2*K.c[(i)*8+7]])
189 #endif
190
191 static const
192 union   {
193         u8      c[(256*N+ROUNDS)*sizeof(u64)];
194         u64     q[(256*N+ROUNDS)];
195         } Cx = { {
196         /* Note endian-neutral representation:-) */
197         LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8),
198         LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26),
199         LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8),
200         LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb),
201         LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb),
202         LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11),
203         LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09),
204         LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d),
205         LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b),
206         LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff),
207         LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c),
208         LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e),
209         LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96),
210         LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30),
211         LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d),
212         LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8),
213         LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47),
214         LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35),
215         LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37),
216         LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a),
217         LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2),
218         LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c),
219         LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84),
220         LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80),
221         LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5),
222         LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3),
223         LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21),
224         LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c),
225         LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43),
226         LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29),
227         LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d),
228         LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5),
229         LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd),
230         LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8),
231         LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92),
232         LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e),
233         LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13),
234         LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23),
235         LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20),
236         LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44),
237         LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2),
238         LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf),
239         LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c),
240         LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a),
241         LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50),
242         LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9),
243         LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14),
244         LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9),
245         LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c),
246         LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f),
247         LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90),
248         LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07),
249         LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd),
250         LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3),
251         LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d),
252         LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78),
253         LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97),
254         LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02),
255         LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73),
256         LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7),
257         LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6),
258         LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2),
259         LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49),
260         LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56),
261         LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70),
262         LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd),
263         LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb),
264         LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71),
265         LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b),
266         LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf),
267         LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45),
268         LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a),
269         LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4),
270         LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58),
271         LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e),
272         LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f),
273         LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac),
274         LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0),
275         LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef),
276         LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6),
277         LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c),
278         LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12),
279         LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93),
280         LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde),
281         LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6),
282         LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1),
283         LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b),
284         LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f),
285         LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31),
286         LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8),
287         LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9),
288         LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc),
289         LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e),
290         LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b),
291         LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf),
292         LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59),
293         LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2),
294         LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77),
295         LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33),
296         LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4),
297         LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27),
298         LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb),
299         LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89),
300         LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32),
301         LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54),
302         LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d),
303         LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64),
304         LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d),
305         LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d),
306         LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f),
307         LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca),
308         LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7),
309         LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d),
310         LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce),
311         LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f),
312         LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f),
313         LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63),
314         LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a),
315         LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc),
316         LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82),
317         LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a),
318         LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48),
319         LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95),
320         LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf),
321         LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d),
322         LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0),
323         LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91),
324         LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8),
325         LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b),
326         LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00),
327         LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9),
328         LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e),
329         LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1),
330         LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6),
331         LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28),
332         LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3),
333         LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74),
334         LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe),
335         LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d),
336         LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea),
337         LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57),
338         LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38),
339         LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad),
340         LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4),
341         LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda),
342         LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7),
343         LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb),
344         LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9),
345         LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a),
346         LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03),
347         LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a),
348         LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e),
349         LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60),
350         LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc),
351         LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46),
352         LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f),
353         LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76),
354         LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa),
355         LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36),
356         LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae),
357         LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b),
358         LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85),
359         LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e),
360         LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7),
361         LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55),
362         LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a),
363         LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81),
364         LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52),
365         LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62),
366         LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3),
367         LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10),
368         LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab),
369         LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0),
370         LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5),
371         LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec),
372         LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16),
373         LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94),
374         LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f),
375         LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5),
376         LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98),
377         LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17),
378         LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4),
379         LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1),
380         LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e),
381         LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42),
382         LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34),
383         LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08),
384         LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee),
385         LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61),
386         LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1),
387         LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f),
388         LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24),
389         LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3),
390         LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25),
391         LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22),
392         LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65),
393         LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79),
394         LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69),
395         LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9),
396         LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19),
397         LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe),
398         LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a),
399         LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0),
400         LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99),
401         LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83),
402         LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04),
403         LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66),
404         LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0),
405         LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1),
406         LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd),
407         LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40),
408         LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c),
409         LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18),
410         LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b),
411         LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51),
412         LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05),
413         LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c),
414         LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39),
415         LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa),
416         LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b),
417         LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc),
418         LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e),
419         LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0),
420         LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88),
421         LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67),
422         LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a),
423         LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87),
424         LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1),
425         LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72),
426         LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53),
427         LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01),
428         LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b),
429         LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4),
430         LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3),
431         LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15),
432         LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c),
433         LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5),
434         LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5),
435         LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4),
436         LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba),
437         LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6),
438         LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7),
439         LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06),
440         LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41),
441         LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7),
442         LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f),
443         LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e),
444         LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6),
445         LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2),
446         LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68),
447         LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c),
448         LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed),
449         LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75),
450         LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86),
451         LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b),
452         LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2),
453 #define RC      (&(Cx.q[256*N]))
454         0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f,        /* rc[ROUNDS] */
455         0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52,
456         0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35,
457         0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57,
458         0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda,
459         0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85,
460         0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67,
461         0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8,
462         0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e,
463         0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33
464         }
465 };
466
467 void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
468         {
469         int     r;
470         const u8 *p=inp;
471         union   { u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;
472
473 #ifdef GO_FOR_MMX
474         GO_FOR_MMX(ctx,inp,n);
475 #endif
476                                                         do {
477 #ifdef OPENSSL_SMALL_FOOTPRINT
478         u64     L[8];
479         int     i;
480
481         for (i=0;i<64;i++)      S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
482         for (r=0;r<ROUNDS;r++)
483                 {
484                 for (i=0;i<8;i++)
485                         {
486                         L[i]  = i ? 0 : RC[r];
487                         L[i] ^= C0(K,i)       ^ C1(K,(i-1)&7) ^
488                                 C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
489                                 C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
490                                 C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
491                         }
492                 memcpy (K.q,L,64);
493                 for (i=0;i<8;i++)
494                         {
495                         L[i] ^= C0(S,i)       ^ C1(S,(i-1)&7) ^
496                                 C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
497                                 C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
498                                 C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
499                         }
500                 memcpy (S.q,L,64);
501                 }
502         for (i=0;i<64;i++)      H->c[i] ^= S.c[i] ^ p[i];
503 #else
504         u64     L0,L1,L2,L3,L4,L5,L6,L7;
505
506 #ifdef STRICT_ALIGNMENT
507         if ((size_t)p & 7)
508                 {
509                 memcpy (S.c,p,64);
510                 S.q[0] ^= (K.q[0] = H->q[0]);
511                 S.q[1] ^= (K.q[1] = H->q[1]);
512                 S.q[2] ^= (K.q[2] = H->q[2]);
513                 S.q[3] ^= (K.q[3] = H->q[3]);
514                 S.q[4] ^= (K.q[4] = H->q[4]);
515                 S.q[5] ^= (K.q[5] = H->q[5]);
516                 S.q[6] ^= (K.q[6] = H->q[6]);
517                 S.q[7] ^= (K.q[7] = H->q[7]);
518                 }
519         else
520 #endif
521                 {
522                 const u64 *pa = (const u64*)p;
523                 S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
524                 S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
525                 S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
526                 S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
527                 S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
528                 S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
529                 S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
530                 S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
531                 }
532
533         for(r=0;r<ROUNDS;r++)
534                 {
535 #ifdef SMALL_REGISTER_BANK
536                 L0 =    C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
537                         C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
538                 L1 =    C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
539                         C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
540                 L2 =    C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
541                         C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
542                 L3 =    C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
543                         C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
544                 L4 =    C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
545                         C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
546                 L5 =    C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
547                         C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
548                 L6 =    C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
549                         C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
550                 L7 =    C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
551                         C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);
552
553                 K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
554                 K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
555
556                 L0 ^=   C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
557                         C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
558                 L1 ^=   C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
559                         C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
560                 L2 ^=   C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
561                         C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
562                 L3 ^=   C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
563                         C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
564                 L4 ^=   C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
565                         C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
566                 L5 ^=   C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
567                         C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
568                 L6 ^=   C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
569                         C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
570                 L7 ^=   C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
571                         C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);
572
573                 S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
574                 S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
575 #else
576                 L0  = C0(K,0); L1  = C1(K,0); L2  = C2(K,0); L3  = C3(K,0);
577                 L4  = C4(K,0); L5  = C5(K,0); L6  = C6(K,0); L7  = C7(K,0);
578                 L0 ^= RC[r];
579
580                 L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
581                 L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);
582
583                 L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
584                 L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);
585
586                 L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
587                 L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);
588
589                 L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
590                 L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);
591
592                 L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
593                 L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);
594
595                 L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
596                 L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);
597
598                 L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
599                 L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);
600
601                 K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
602                 K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
603
604                 L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
605                 L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);
606
607                 L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
608                 L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);
609
610                 L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
611                 L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);
612
613                 L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
614                 L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);
615
616                 L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
617                 L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);
618
619                 L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
620                 L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);
621
622                 L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
623                 L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);
624
625                 L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
626                 L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);
627
628                 S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
629                 S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
630 #endif
631                 }
632
633 #ifdef STRICT_ALIGNMENT
634         if ((size_t)p & 7)
635                 {
636                 int i;
637                 for(i=0;i<64;i++)       H->c[i] ^= S.c[i] ^ p[i];
638                 }
639         else
640 #endif
641                 {
642                 const u64 *pa=(const u64 *)p;
643                 H->q[0] ^= S.q[0] ^ pa[0];
644                 H->q[1] ^= S.q[1] ^ pa[1];
645                 H->q[2] ^= S.q[2] ^ pa[2];
646                 H->q[3] ^= S.q[3] ^ pa[3];
647                 H->q[4] ^= S.q[4] ^ pa[4];
648                 H->q[5] ^= S.q[5] ^ pa[5];
649                 H->q[6] ^= S.q[6] ^ pa[6];
650                 H->q[7] ^= S.q[7] ^ pa[7];
651                 }
652 #endif
653                                                         p += 64;
654                                                         } while(--n);
655         }