indent has problems with comments that are on the right hand side of a line.
[openssl.git] / crypto / whrlpool / wp_block.c
1 /**
2  * The Whirlpool hashing function.
3  *
4  * <P>
5  * <b>References</b>
6  *
7  * <P>
8  * The Whirlpool algorithm was developed by
9  * <a href="mailto:pbarreto@scopus.com.br">Paulo S. L. M. Barreto</a> and
10  * <a href="mailto:vincent.rijmen@cryptomathic.com">Vincent Rijmen</a>.
11  *
12  * See
13  *      P.S.L.M. Barreto, V. Rijmen,
14  *      ``The Whirlpool hashing function,''
15  *      NESSIE submission, 2000 (tweaked version, 2001),
16  *      <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
17  *
18  * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
19  * Vincent Rijmen. Lookup "reference implementations" on
20  * <http://planeta.terra.com.br/informatica/paulobarreto/>
21  *
22  * =============================================================================
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
25  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
28  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
33  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
34  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37
38 #include "wp_locl.h"
39 #include <string.h>
40
41 typedef unsigned char           u8;
42 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
43 typedef unsigned __int64        u64;
44 #elif defined(__arch64__)
45 typedef unsigned long           u64;
46 #else
47 typedef unsigned long long      u64;
48 #endif
49
50 #define ROUNDS  10
51
52 #define STRICT_ALIGNMENT
53 #if defined(__i386) || defined(__i386__) || \
54     defined(__x86_64) || defined(__x86_64__) || \
55     defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)
56 /* Well, formally there're couple of other architectures, which permit
57  * unaligned loads, specifically those not crossing cache lines, IA-64
58  * and PowerPC... */
59 #  undef STRICT_ALIGNMENT
60 #endif
61
62 #undef SMALL_REGISTER_BANK
63 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
64 #  define SMALL_REGISTER_BANK
65 #  if defined(WHIRLPOOL_ASM)
66 #    ifndef OPENSSL_SMALL_FOOTPRINT
67 /*
68  * it appears that for elder non-MMX
69  * CPUs this is actually faster!
70  */
71 #      define OPENSSL_SMALL_FOOTPRINT
72 #    endif
73 #    define GO_FOR_MMX(ctx,inp,num)     do {                    \
74         extern unsigned long OPENSSL_ia32cap_P[];               \
75         void whirlpool_block_mmx(void *,const void *,size_t);   \
76         if (!(OPENSSL_ia32cap_P[0] & (1<<23)))  break;          \
77         whirlpool_block_mmx(ctx->H.c,inp,num);  return;         \
78                                         } while (0)
79 #  endif
80 #endif
81
82 #undef ROTATE
83 #if defined(_MSC_VER)
84 #  if defined(_WIN64)   /* applies to both IA-64 and AMD64 */
85 #    pragma intrinsic(_rotl64)
86 #    define ROTATE(a,n) _rotl64((a),n)
87 #  endif
88 #elif defined(__GNUC__) && __GNUC__>=2
89 #  if defined(__x86_64) || defined(__x86_64__)
90 #    if defined(L_ENDIAN)
91 #      define ROTATE(a,n)       ({ u64 ret; asm ("rolq %1,%0"   \
92                                    : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
93 #    elif defined(B_ENDIAN)
94        /* Most will argue that x86_64 is always little-endian. Well,
95         * yes, but then we have stratus.com who has modified gcc to
96         * "emulate" big-endian on x86. Is there evidence that they
97         * [or somebody else] won't do same for x86_64? Naturally no.
98         * And this line is waiting ready for that brave soul:-) */
99 #      define ROTATE(a,n)       ({ u64 ret; asm ("rorq %1,%0"   \
100                                    : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
101 #    endif
102 #  elif defined(__ia64) || defined(__ia64__)
103 #    if defined(L_ENDIAN)
104 #      define ROTATE(a,n)       ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
105                                    : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
106 #    elif defined(B_ENDIAN)
107 #      define ROTATE(a,n)       ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
108                                    : "=r"(ret) : "r"(a),"M"(n)); ret; })
109 #    endif
110 #  endif
111 #endif
112
113 #if defined(OPENSSL_SMALL_FOOTPRINT)
114 #  if !defined(ROTATE)
115 #    if defined(L_ENDIAN)       /* little-endians have to rotate left */
116 #      define ROTATE(i,n)       ((i)<<(n) ^ (i)>>(64-n))
117 #    elif defined(B_ENDIAN)     /* big-endians have to rotate right */
118 #      define ROTATE(i,n)       ((i)>>(n) ^ (i)<<(64-n))
119 #    endif
120 #  endif
121 #  if defined(ROTATE) && !defined(STRICT_ALIGNMENT)
122 #    define STRICT_ALIGNMENT    /* ensure smallest table size */
123 #  endif
124 #endif
125
126 /*
127  * Table size depends on STRICT_ALIGNMENT and whether or not endian-
128  * specific ROTATE macro is defined. If STRICT_ALIGNMENT is not
129  * defined, which is normally the case on x86[_64] CPUs, the table is
130  * 4KB large unconditionally. Otherwise if ROTATE is defined, the
131  * table is 2KB large, and otherwise - 16KB. 2KB table requires a
132  * whole bunch of additional rotations, but I'm willing to "trade,"
133  * because 16KB table certainly trashes L1 cache. I wish all CPUs
134  * could handle unaligned load as 4KB table doesn't trash the cache,
135  * nor does it require additional rotations.
136  */
137 /*
138  * Note that every Cn macro expands as two loads: one byte load and
139  * one quadword load. One can argue that that many single-byte loads
140  * is too excessive, as one could load a quadword and "milk" it for
141  * eight 8-bit values instead. Well, yes, but in order to do so *and*
142  * avoid excessive loads you have to accomodate a handful of 64-bit
143  * values in the register bank and issue a bunch of shifts and mask.
144  * It's a tradeoff: loads vs. shift and mask in big register bank[!].
145  * On most CPUs eight single-byte loads are faster and I let other
146  * ones to depend on smart compiler to fold byte loads if beneficial.
147  * Hand-coded assembler would be another alternative:-)
148  */
149 #ifdef STRICT_ALIGNMENT
150 #  if defined(ROTATE)
151 #    define N   1
152 #    define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7
153 #    define C0(K,i)     (Cx.q[K.c[(i)*8+0]])
154 #    define C1(K,i)     ROTATE(Cx.q[K.c[(i)*8+1]],8)
155 #    define C2(K,i)     ROTATE(Cx.q[K.c[(i)*8+2]],16)
156 #    define C3(K,i)     ROTATE(Cx.q[K.c[(i)*8+3]],24)
157 #    define C4(K,i)     ROTATE(Cx.q[K.c[(i)*8+4]],32)
158 #    define C5(K,i)     ROTATE(Cx.q[K.c[(i)*8+5]],40)
159 #    define C6(K,i)     ROTATE(Cx.q[K.c[(i)*8+6]],48)
160 #    define C7(K,i)     ROTATE(Cx.q[K.c[(i)*8+7]],56)
161 #  else
162 #    define N   8
163 #    define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
164                                         c7,c0,c1,c2,c3,c4,c5,c6, \
165                                         c6,c7,c0,c1,c2,c3,c4,c5, \
166                                         c5,c6,c7,c0,c1,c2,c3,c4, \
167                                         c4,c5,c6,c7,c0,c1,c2,c3, \
168                                         c3,c4,c5,c6,c7,c0,c1,c2, \
169                                         c2,c3,c4,c5,c6,c7,c0,c1, \
170                                         c1,c2,c3,c4,c5,c6,c7,c0
171 #    define C0(K,i)     (Cx.q[0+8*K.c[(i)*8+0]])
172 #    define C1(K,i)     (Cx.q[1+8*K.c[(i)*8+1]])
173 #    define C2(K,i)     (Cx.q[2+8*K.c[(i)*8+2]])
174 #    define C3(K,i)     (Cx.q[3+8*K.c[(i)*8+3]])
175 #    define C4(K,i)     (Cx.q[4+8*K.c[(i)*8+4]])
176 #    define C5(K,i)     (Cx.q[5+8*K.c[(i)*8+5]])
177 #    define C6(K,i)     (Cx.q[6+8*K.c[(i)*8+6]])
178 #    define C7(K,i)     (Cx.q[7+8*K.c[(i)*8+7]])
179 #  endif
180 #else
181 #  define N     2
182 #  define LL(c0,c1,c2,c3,c4,c5,c6,c7)   c0,c1,c2,c3,c4,c5,c6,c7, \
183                                         c0,c1,c2,c3,c4,c5,c6,c7
184 #  define C0(K,i)       (((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
185 #  define C1(K,i)       (((u64*)(Cx.c+7))[2*K.c[(i)*8+1]])
186 #  define C2(K,i)       (((u64*)(Cx.c+6))[2*K.c[(i)*8+2]])
187 #  define C3(K,i)       (((u64*)(Cx.c+5))[2*K.c[(i)*8+3]])
188 #  define C4(K,i)       (((u64*)(Cx.c+4))[2*K.c[(i)*8+4]])
189 #  define C5(K,i)       (((u64*)(Cx.c+3))[2*K.c[(i)*8+5]])
190 #  define C6(K,i)       (((u64*)(Cx.c+2))[2*K.c[(i)*8+6]])
191 #  define C7(K,i)       (((u64*)(Cx.c+1))[2*K.c[(i)*8+7]])
192 #endif
193
194 static const
195 union   {
196         u8      c[(256*N+ROUNDS)*sizeof(u64)];
197         u64     q[(256*N+ROUNDS)];
198         } Cx = { {
199         /* Note endian-neutral representation:-) */
200         LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8),
201         LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26),
202         LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8),
203         LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb),
204         LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb),
205         LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11),
206         LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09),
207         LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d),
208         LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b),
209         LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff),
210         LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c),
211         LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e),
212         LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96),
213         LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30),
214         LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d),
215         LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8),
216         LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47),
217         LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35),
218         LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37),
219         LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a),
220         LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2),
221         LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c),
222         LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84),
223         LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80),
224         LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5),
225         LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3),
226         LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21),
227         LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c),
228         LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43),
229         LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29),
230         LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d),
231         LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5),
232         LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd),
233         LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8),
234         LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92),
235         LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e),
236         LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13),
237         LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23),
238         LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20),
239         LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44),
240         LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2),
241         LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf),
242         LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c),
243         LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a),
244         LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50),
245         LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9),
246         LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14),
247         LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9),
248         LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c),
249         LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f),
250         LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90),
251         LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07),
252         LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd),
253         LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3),
254         LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d),
255         LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78),
256         LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97),
257         LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02),
258         LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73),
259         LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7),
260         LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6),
261         LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2),
262         LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49),
263         LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56),
264         LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70),
265         LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd),
266         LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb),
267         LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71),
268         LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b),
269         LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf),
270         LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45),
271         LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a),
272         LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4),
273         LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58),
274         LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e),
275         LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f),
276         LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac),
277         LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0),
278         LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef),
279         LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6),
280         LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c),
281         LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12),
282         LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93),
283         LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde),
284         LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6),
285         LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1),
286         LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b),
287         LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f),
288         LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31),
289         LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8),
290         LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9),
291         LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc),
292         LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e),
293         LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b),
294         LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf),
295         LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59),
296         LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2),
297         LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77),
298         LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33),
299         LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4),
300         LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27),
301         LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb),
302         LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89),
303         LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32),
304         LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54),
305         LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d),
306         LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64),
307         LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d),
308         LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d),
309         LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f),
310         LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca),
311         LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7),
312         LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d),
313         LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce),
314         LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f),
315         LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f),
316         LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63),
317         LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a),
318         LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc),
319         LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82),
320         LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a),
321         LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48),
322         LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95),
323         LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf),
324         LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d),
325         LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0),
326         LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91),
327         LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8),
328         LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b),
329         LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00),
330         LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9),
331         LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e),
332         LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1),
333         LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6),
334         LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28),
335         LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3),
336         LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74),
337         LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe),
338         LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d),
339         LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea),
340         LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57),
341         LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38),
342         LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad),
343         LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4),
344         LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda),
345         LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7),
346         LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb),
347         LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9),
348         LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a),
349         LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03),
350         LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a),
351         LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e),
352         LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60),
353         LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc),
354         LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46),
355         LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f),
356         LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76),
357         LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa),
358         LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36),
359         LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae),
360         LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b),
361         LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85),
362         LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e),
363         LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7),
364         LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55),
365         LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a),
366         LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81),
367         LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52),
368         LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62),
369         LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3),
370         LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10),
371         LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab),
372         LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0),
373         LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5),
374         LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec),
375         LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16),
376         LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94),
377         LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f),
378         LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5),
379         LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98),
380         LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17),
381         LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4),
382         LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1),
383         LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e),
384         LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42),
385         LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34),
386         LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08),
387         LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee),
388         LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61),
389         LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1),
390         LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f),
391         LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24),
392         LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3),
393         LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25),
394         LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22),
395         LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65),
396         LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79),
397         LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69),
398         LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9),
399         LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19),
400         LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe),
401         LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a),
402         LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0),
403         LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99),
404         LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83),
405         LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04),
406         LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66),
407         LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0),
408         LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1),
409         LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd),
410         LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40),
411         LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c),
412         LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18),
413         LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b),
414         LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51),
415         LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05),
416         LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c),
417         LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39),
418         LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa),
419         LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b),
420         LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc),
421         LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e),
422         LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0),
423         LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88),
424         LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67),
425         LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a),
426         LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87),
427         LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1),
428         LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72),
429         LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53),
430         LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01),
431         LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b),
432         LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4),
433         LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3),
434         LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15),
435         LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c),
436         LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5),
437         LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5),
438         LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4),
439         LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba),
440         LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6),
441         LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7),
442         LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06),
443         LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41),
444         LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7),
445         LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f),
446         LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e),
447         LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6),
448         LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2),
449         LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68),
450         LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c),
451         LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed),
452         LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75),
453         LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86),
454         LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b),
455         LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2),
456 #define RC      (&(Cx.q[256*N]))
457         0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f,        /* rc[ROUNDS] */
458         0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52,
459         0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35,
460         0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57,
461         0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda,
462         0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85,
463         0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67,
464         0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8,
465         0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e,
466         0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33
467         }
468 };
469
470 void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
471         {
472         int     r;
473         const u8 *p=inp;
474         union   { u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;
475
476 #ifdef GO_FOR_MMX
477         GO_FOR_MMX(ctx,inp,n);
478 #endif
479                                                         do {
480 #ifdef OPENSSL_SMALL_FOOTPRINT
481         u64     L[8];
482         int     i;
483
484         for (i=0;i<64;i++)      S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
485         for (r=0;r<ROUNDS;r++)
486                 {
487                 for (i=0;i<8;i++)
488                         {
489                         L[i]  = i ? 0 : RC[r];
490                         L[i] ^= C0(K,i)       ^ C1(K,(i-1)&7) ^
491                                 C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
492                                 C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
493                                 C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
494                         }
495                 memcpy (K.q,L,64);
496                 for (i=0;i<8;i++)
497                         {
498                         L[i] ^= C0(S,i)       ^ C1(S,(i-1)&7) ^
499                                 C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
500                                 C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
501                                 C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
502                         }
503                 memcpy (S.q,L,64);
504                 }
505         for (i=0;i<64;i++)      H->c[i] ^= S.c[i] ^ p[i];
506 #else
507         u64     L0,L1,L2,L3,L4,L5,L6,L7;
508
509 #ifdef STRICT_ALIGNMENT
510         if ((size_t)p & 7)
511                 {
512                 memcpy (S.c,p,64);
513                 S.q[0] ^= (K.q[0] = H->q[0]);
514                 S.q[1] ^= (K.q[1] = H->q[1]);
515                 S.q[2] ^= (K.q[2] = H->q[2]);
516                 S.q[3] ^= (K.q[3] = H->q[3]);
517                 S.q[4] ^= (K.q[4] = H->q[4]);
518                 S.q[5] ^= (K.q[5] = H->q[5]);
519                 S.q[6] ^= (K.q[6] = H->q[6]);
520                 S.q[7] ^= (K.q[7] = H->q[7]);
521                 }
522         else
523 #endif
524                 {
525                 const u64 *pa = (const u64*)p;
526                 S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
527                 S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
528                 S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
529                 S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
530                 S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
531                 S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
532                 S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
533                 S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
534                 }
535
536         for(r=0;r<ROUNDS;r++)
537                 {
538 #ifdef SMALL_REGISTER_BANK
539                 L0 =    C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
540                         C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
541                 L1 =    C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
542                         C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
543                 L2 =    C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
544                         C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
545                 L3 =    C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
546                         C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
547                 L4 =    C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
548                         C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
549                 L5 =    C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
550                         C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
551                 L6 =    C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
552                         C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
553                 L7 =    C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
554                         C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);
555
556                 K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
557                 K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
558
559                 L0 ^=   C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
560                         C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
561                 L1 ^=   C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
562                         C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
563                 L2 ^=   C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
564                         C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
565                 L3 ^=   C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
566                         C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
567                 L4 ^=   C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
568                         C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
569                 L5 ^=   C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
570                         C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
571                 L6 ^=   C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
572                         C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
573                 L7 ^=   C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
574                         C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);
575
576                 S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
577                 S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
578 #else
579                 L0  = C0(K,0); L1  = C1(K,0); L2  = C2(K,0); L3  = C3(K,0);
580                 L4  = C4(K,0); L5  = C5(K,0); L6  = C6(K,0); L7  = C7(K,0);
581                 L0 ^= RC[r];
582
583                 L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
584                 L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);
585
586                 L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
587                 L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);
588
589                 L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
590                 L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);
591
592                 L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
593                 L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);
594
595                 L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
596                 L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);
597
598                 L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
599                 L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);
600
601                 L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
602                 L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);
603
604                 K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
605                 K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
606
607                 L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
608                 L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);
609
610                 L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
611                 L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);
612
613                 L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
614                 L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);
615
616                 L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
617                 L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);
618
619                 L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
620                 L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);
621
622                 L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
623                 L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);
624
625                 L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
626                 L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);
627
628                 L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
629                 L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);
630
631                 S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
632                 S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
633 #endif
634                 }
635
636 #ifdef STRICT_ALIGNMENT
637         if ((size_t)p & 7)
638                 {
639                 int i;
640                 for(i=0;i<64;i++)       H->c[i] ^= S.c[i] ^ p[i];
641                 }
642         else
643 #endif
644                 {
645                 const u64 *pa=(const u64 *)p;
646                 H->q[0] ^= S.q[0] ^ pa[0];
647                 H->q[1] ^= S.q[1] ^ pa[1];
648                 H->q[2] ^= S.q[2] ^ pa[2];
649                 H->q[3] ^= S.q[3] ^ pa[3];
650                 H->q[4] ^= S.q[4] ^ pa[4];
651                 H->q[5] ^= S.q[5] ^ pa[5];
652                 H->q[6] ^= S.q[6] ^ pa[6];
653                 H->q[7] ^= S.q[7] ^ pa[7];
654                 }
655 #endif
656                                                         p += 64;
657                                                         } while(--n);
658         }