2 * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 #include <openssl/e_os2.h>
14 #define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
16 static uint64_t ROL64(uint64_t val, int offset)
20 } else if (sizeof(void *) == 8) {
21 return (val << offset) | (val >> (64-offset));
23 uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
29 hi = ROL32(lo, offset);
30 lo = ROL32(tmp, offset + 1);
33 lo = ROL32(lo, offset);
34 hi = ROL32(hi, offset);
37 return ((uint64_t)hi << 32) | lo;
41 static const unsigned char rhotates[5][5] = {
43 { 36, 44, 6, 55, 20 },
44 { 3, 10, 43, 25, 39 },
45 { 41, 45, 15, 21, 8 },
49 static const uint64_t iotas[] = {
50 sizeof(void *) == 8 ? 0x0000000000000001U : 0x0000000000000001U,
51 sizeof(void *) == 8 ? 0x0000000000008082U : 0x0000008900000000U,
52 sizeof(void *) == 8 ? 0x800000000000808aU : 0x8000008b00000000U,
53 sizeof(void *) == 8 ? 0x8000000080008000U : 0x8000808000000000U,
54 sizeof(void *) == 8 ? 0x000000000000808bU : 0x0000008b00000001U,
55 sizeof(void *) == 8 ? 0x0000000080000001U : 0x0000800000000001U,
56 sizeof(void *) == 8 ? 0x8000000080008081U : 0x8000808800000001U,
57 sizeof(void *) == 8 ? 0x8000000000008009U : 0x8000008200000001U,
58 sizeof(void *) == 8 ? 0x000000000000008aU : 0x0000000b00000000U,
59 sizeof(void *) == 8 ? 0x0000000000000088U : 0x0000000a00000000U,
60 sizeof(void *) == 8 ? 0x0000000080008009U : 0x0000808200000001U,
61 sizeof(void *) == 8 ? 0x000000008000000aU : 0x0000800300000000U,
62 sizeof(void *) == 8 ? 0x000000008000808bU : 0x0000808b00000001U,
63 sizeof(void *) == 8 ? 0x800000000000008bU : 0x8000000b00000001U,
64 sizeof(void *) == 8 ? 0x8000000000008089U : 0x8000008a00000001U,
65 sizeof(void *) == 8 ? 0x8000000000008003U : 0x8000008100000001U,
66 sizeof(void *) == 8 ? 0x8000000000008002U : 0x8000008100000000U,
67 sizeof(void *) == 8 ? 0x8000000000000080U : 0x8000000800000000U,
68 sizeof(void *) == 8 ? 0x000000000000800aU : 0x0000008300000000U,
69 sizeof(void *) == 8 ? 0x800000008000000aU : 0x8000800300000000U,
70 sizeof(void *) == 8 ? 0x8000000080008081U : 0x8000808800000001U,
71 sizeof(void *) == 8 ? 0x8000000000008080U : 0x8000008800000000U,
72 sizeof(void *) == 8 ? 0x0000000080000001U : 0x0000800000000001U,
73 sizeof(void *) == 8 ? 0x8000000080008008U : 0x8000808200000000U
76 #if defined(KECCAK_REF)
78 * This is straightforward or "maximum clarity" implementation aiming
79 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
80 * Permutation-Based Hash and Extendible-Output Functions" as much as
81 * possible. With one caveat. Because of the way C stores matrices,
82 * references to A[x,y] in the specification are presented as A[y][x].
83 * Implementation unrolls inner x-loops so that modulo 5 operations are
84 * explicitly pre-computed.
86 static void Theta(uint64_t A[5][5])
97 for (y = 1; y < 5; y++) {
105 D[0] = ROL64(C[1], 1) ^ C[4];
106 D[1] = ROL64(C[2], 1) ^ C[0];
107 D[2] = ROL64(C[3], 1) ^ C[1];
108 D[3] = ROL64(C[4], 1) ^ C[2];
109 D[4] = ROL64(C[0], 1) ^ C[3];
111 for (y = 0; y < 5; y++) {
120 static void Rho(uint64_t A[5][5])
124 for (y = 0; y < 5; y++) {
125 A[y][0] = ROL64(A[y][0], rhotates[y][0]);
126 A[y][1] = ROL64(A[y][1], rhotates[y][1]);
127 A[y][2] = ROL64(A[y][2], rhotates[y][2]);
128 A[y][3] = ROL64(A[y][3], rhotates[y][3]);
129 A[y][4] = ROL64(A[y][4], rhotates[y][4]);
133 static void Pi(uint64_t A[5][5])
139 * A[y][x] = T[x][(3*y+x)%5]
141 memcpy(T, A, sizeof(T));
174 static void Chi(uint64_t A[5][5])
179 for (y = 0; y < 5; y++) {
180 C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
181 C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
182 C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
183 C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
184 C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
194 static void Iota(uint64_t A[5][5], size_t i)
196 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
200 void KeccakF1600(uint64_t A[5][5])
204 for (i = 0; i < 24; i++) {
213 #elif defined(KECCAK_1X)
215 * This implementation is optimization of above code featuring unroll
216 * of even y-loops, their fusion and code motion. It also minimizes
217 * temporary storage. Compiler would normally do all these things for
218 * you, purpose of manual optimization is to provide "unobscured"
219 * reference for assembly implementation [in case this approach is
220 * chosen for implementation on some platform]. In the nutshell it's
221 * equivalent of "plane-per-plane processing" approach discussed in
222 * section 2.4 of "Keccak implementation overview".
224 static void Round(uint64_t A[5][5], size_t i)
226 uint64_t C[5], D[5], T[2][5];
228 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
230 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
231 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
232 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
233 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
234 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
236 D[0] = ROL64(C[1], 1) ^ C[4];
237 D[1] = ROL64(C[2], 1) ^ C[0];
238 D[2] = ROL64(C[3], 1) ^ C[1];
239 D[3] = ROL64(C[4], 1) ^ C[2];
240 D[4] = ROL64(C[0], 1) ^ C[3];
242 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
243 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
244 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
245 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
246 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
248 T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
249 T[0][1] = A[0][1] ^ D[1];
250 T[0][2] = A[0][2] ^ D[2];
251 T[0][3] = A[0][3] ^ D[3];
252 T[0][4] = A[0][4] ^ D[4];
254 A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
255 A[0][1] = C[1] ^ (~C[2] & C[3]);
256 A[0][2] = C[2] ^ (~C[3] & C[4]);
257 A[0][3] = C[3] ^ (~C[4] & C[0]);
258 A[0][4] = C[4] ^ (~C[0] & C[1]);
260 C[0] = ROL64(T[0][3], rhotates[0][3]);
261 C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
262 C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
263 C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
264 C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
266 T[1][0] = A[1][0] ^ D[0];
267 T[1][1] = A[2][1] ^ D[1]; /* borrow T[1][1] */
268 T[1][2] = A[1][2] ^ D[2];
269 T[1][3] = A[1][3] ^ D[3];
270 T[1][4] = A[2][4] ^ D[4]; /* borrow T[1][4] */
272 A[1][0] = C[0] ^ (~C[1] & C[2]);
273 A[1][1] = C[1] ^ (~C[2] & C[3]);
274 A[1][2] = C[2] ^ (~C[3] & C[4]);
275 A[1][3] = C[3] ^ (~C[4] & C[0]);
276 A[1][4] = C[4] ^ (~C[0] & C[1]);
278 C[0] = ROL64(T[0][1], rhotates[0][1]);
279 C[1] = ROL64(T[1][2], rhotates[1][2]);
280 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
281 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
282 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
284 A[2][0] = C[0] ^ (~C[1] & C[2]);
285 A[2][1] = C[1] ^ (~C[2] & C[3]);
286 A[2][2] = C[2] ^ (~C[3] & C[4]);
287 A[2][3] = C[3] ^ (~C[4] & C[0]);
288 A[2][4] = C[4] ^ (~C[0] & C[1]);
290 C[0] = ROL64(T[0][4], rhotates[0][4]);
291 C[1] = ROL64(T[1][0], rhotates[1][0]);
292 C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
293 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
294 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
296 A[3][0] = C[0] ^ (~C[1] & C[2]);
297 A[3][1] = C[1] ^ (~C[2] & C[3]);
298 A[3][2] = C[2] ^ (~C[3] & C[4]);
299 A[3][3] = C[3] ^ (~C[4] & C[0]);
300 A[3][4] = C[4] ^ (~C[0] & C[1]);
302 C[0] = ROL64(T[0][2], rhotates[0][2]);
303 C[1] = ROL64(T[1][3], rhotates[1][3]);
304 C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
305 C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
306 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
308 A[4][0] = C[0] ^ (~C[1] & C[2]);
309 A[4][1] = C[1] ^ (~C[2] & C[3]);
310 A[4][2] = C[2] ^ (~C[3] & C[4]);
311 A[4][3] = C[3] ^ (~C[4] & C[0]);
312 A[4][4] = C[4] ^ (~C[0] & C[1]);
315 void KeccakF1600(uint64_t A[5][5])
319 for (i = 0; i < 24; i++) {
324 #elif defined(KECCAK_2X)
326 * This implementation is variant of KECCAK_1X above with outer-most
327 * round loop unrolled twice. This allows to take temporary storage
328 * out of round procedure and simplify references to it by alternating
329 * it with actual data (see round loop below). Just like original, it's
330 * rather meant as reference for an assembly implementation. It's likely
331 * to provide best instruction per processed byte ratio at minimal
332 * round unroll factor...
334 static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
338 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
340 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
341 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
342 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
343 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
344 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
346 D[0] = ROL64(C[1], 1) ^ C[4];
347 D[1] = ROL64(C[2], 1) ^ C[0];
348 D[2] = ROL64(C[3], 1) ^ C[1];
349 D[3] = ROL64(C[4], 1) ^ C[2];
350 D[4] = ROL64(C[0], 1) ^ C[3];
352 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
353 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
354 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
355 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
356 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
358 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
359 R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i];
360 R[0][1] = C[1] ^ (~C[2] | C[3]);
361 R[0][2] = C[2] ^ ( C[3] & C[4]);
362 R[0][3] = C[3] ^ ( C[4] | C[0]);
363 R[0][4] = C[4] ^ ( C[0] & C[1]);
365 R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
366 R[0][1] = C[1] ^ (~C[2] & C[3]);
367 R[0][2] = C[2] ^ (~C[3] & C[4]);
368 R[0][3] = C[3] ^ (~C[4] & C[0]);
369 R[0][4] = C[4] ^ (~C[0] & C[1]);
372 C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
373 C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
374 C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
375 C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
376 C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
378 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
379 R[1][0] = C[0] ^ (C[1] | C[2]);
380 R[1][1] = C[1] ^ (C[2] & C[3]);
381 R[1][2] = C[2] ^ (C[3] | ~C[4]);
382 R[1][3] = C[3] ^ (C[4] | C[0]);
383 R[1][4] = C[4] ^ (C[0] & C[1]);
385 R[1][0] = C[0] ^ (~C[1] & C[2]);
386 R[1][1] = C[1] ^ (~C[2] & C[3]);
387 R[1][2] = C[2] ^ (~C[3] & C[4]);
388 R[1][3] = C[3] ^ (~C[4] & C[0]);
389 R[1][4] = C[4] ^ (~C[0] & C[1]);
392 C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
393 C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
394 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
395 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
396 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
398 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
399 R[2][0] = C[0] ^ ( C[1] | C[2]);
400 R[2][1] = C[1] ^ ( C[2] & C[3]);
401 R[2][2] = C[2] ^ (~C[3] & C[4]);
402 R[2][3] = ~C[3] ^ ( C[4] | C[0]);
403 R[2][4] = C[4] ^ ( C[0] & C[1]);
405 R[2][0] = C[0] ^ (~C[1] & C[2]);
406 R[2][1] = C[1] ^ (~C[2] & C[3]);
407 R[2][2] = C[2] ^ (~C[3] & C[4]);
408 R[2][3] = C[3] ^ (~C[4] & C[0]);
409 R[2][4] = C[4] ^ (~C[0] & C[1]);
412 C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
413 C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
414 C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
415 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
416 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
418 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
419 R[3][0] = C[0] ^ ( C[1] & C[2]);
420 R[3][1] = C[1] ^ ( C[2] | C[3]);
421 R[3][2] = C[2] ^ (~C[3] | C[4]);
422 R[3][3] = ~C[3] ^ ( C[4] & C[0]);
423 R[3][4] = C[4] ^ ( C[0] | C[1]);
425 R[3][0] = C[0] ^ (~C[1] & C[2]);
426 R[3][1] = C[1] ^ (~C[2] & C[3]);
427 R[3][2] = C[2] ^ (~C[3] & C[4]);
428 R[3][3] = C[3] ^ (~C[4] & C[0]);
429 R[3][4] = C[4] ^ (~C[0] & C[1]);
432 C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
433 C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
434 C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
435 C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
436 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
438 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
439 R[4][0] = C[0] ^ (~C[1] & C[2]);
440 R[4][1] = ~C[1] ^ ( C[2] | C[3]);
441 R[4][2] = C[2] ^ ( C[3] & C[4]);
442 R[4][3] = C[3] ^ ( C[4] | C[0]);
443 R[4][4] = C[4] ^ ( C[0] & C[1]);
445 R[4][0] = C[0] ^ (~C[1] & C[2]);
446 R[4][1] = C[1] ^ (~C[2] & C[3]);
447 R[4][2] = C[2] ^ (~C[3] & C[4]);
448 R[4][3] = C[3] ^ (~C[4] & C[0]);
449 R[4][4] = C[4] ^ (~C[0] & C[1]);
453 void KeccakF1600(uint64_t A[5][5])
458 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
467 for (i = 0; i < 24; i += 2) {
472 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
484 * This implementation is KECCAK_1X from above combined 4 times with
485 * a twist that allows to omit temporary storage and perform in-place
486 * processing. It's discussed in section 2.5 of "Keccak implementation
487 * overview". It's likely to be best suited for processors with large
490 static void FourRounds(uint64_t A[5][5], size_t i)
492 uint64_t B[5], C[5], D[5];
494 assert(i <= (sizeof(iotas) / sizeof(iotas[0]) - 4));
497 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
498 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
499 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
500 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
501 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
503 D[0] = ROL64(C[1], 1) ^ C[4];
504 D[1] = ROL64(C[2], 1) ^ C[0];
505 D[2] = ROL64(C[3], 1) ^ C[1];
506 D[3] = ROL64(C[4], 1) ^ C[2];
507 D[4] = ROL64(C[0], 1) ^ C[3];
509 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
510 B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
511 B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
512 B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
513 B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
515 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
516 C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
517 C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
518 C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
519 C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
521 B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
522 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
523 B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
524 B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
525 B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
527 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
528 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
529 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
530 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
531 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
533 B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
534 B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
535 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
536 B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
537 B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
539 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
540 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
541 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
542 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
543 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
545 B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
546 B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
547 B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
548 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
549 B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
551 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
552 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
553 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
554 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
555 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
557 B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
558 B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
559 B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
560 B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
561 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
563 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
564 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
565 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
566 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
567 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
570 D[0] = ROL64(C[1], 1) ^ C[4];
571 D[1] = ROL64(C[2], 1) ^ C[0];
572 D[2] = ROL64(C[3], 1) ^ C[1];
573 D[3] = ROL64(C[4], 1) ^ C[2];
574 D[4] = ROL64(C[0], 1) ^ C[3];
576 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
577 B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
578 B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
579 B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
580 B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
582 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
583 C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
584 C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
585 C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
586 C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
588 B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
589 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
590 B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
591 B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
592 B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
594 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
595 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
596 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
597 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
598 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
600 B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
601 B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
602 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
603 B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
604 B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
606 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
607 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
608 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
609 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
610 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
612 B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
613 B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
614 B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
615 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
616 B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
618 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
619 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
620 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
621 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
622 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
624 B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
625 B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
626 B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
627 B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
628 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
630 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
631 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
632 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
633 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
634 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
637 D[0] = ROL64(C[1], 1) ^ C[4];
638 D[1] = ROL64(C[2], 1) ^ C[0];
639 D[2] = ROL64(C[3], 1) ^ C[1];
640 D[3] = ROL64(C[4], 1) ^ C[2];
641 D[4] = ROL64(C[0], 1) ^ C[3];
643 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
644 B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
645 B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
646 B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
647 B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
649 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
650 C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
651 C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
652 C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
653 C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
655 B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
656 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
657 B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
658 B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
659 B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
661 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
662 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
663 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
664 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
665 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
667 B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
668 B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
669 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
670 B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
671 B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
673 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
674 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
675 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
676 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
677 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
679 B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
680 B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
681 B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
682 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
683 B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
685 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
686 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
687 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
688 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
689 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
691 B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
692 B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
693 B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
694 B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
695 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
697 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
698 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
699 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
700 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
701 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
704 D[0] = ROL64(C[1], 1) ^ C[4];
705 D[1] = ROL64(C[2], 1) ^ C[0];
706 D[2] = ROL64(C[3], 1) ^ C[1];
707 D[3] = ROL64(C[4], 1) ^ C[2];
708 D[4] = ROL64(C[0], 1) ^ C[3];
710 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
711 B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
712 B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
713 B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
714 B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
716 /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
717 /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
718 /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
719 /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
720 /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
722 B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
723 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
724 B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
725 B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
726 B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
728 /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
729 /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
730 /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
731 /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
732 /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
734 B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
735 B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
736 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
737 B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
738 B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
740 /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
741 /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
742 /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
743 /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
744 /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
746 B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
747 B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
748 B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
749 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
750 B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
752 /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
753 /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
754 /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
755 /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
756 /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
758 B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
759 B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
760 B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
761 B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
762 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
764 /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
765 /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
766 /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
767 /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
768 /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
771 void KeccakF1600(uint64_t A[5][5])
775 for (i = 0; i < 24; i += 4) {
782 static uint64_t BitInterleave(uint64_t Ai)
784 if (sizeof(void *) < 8) {
785 uint32_t hi = 0, lo = 0;
788 for (j = 0; j < 32; j++) {
789 lo |= ((uint32_t)(Ai >> (2 * j)) & 1) << j;
790 hi |= ((uint32_t)(Ai >> (2 * j + 1)) & 1) << j;
793 Ai = ((uint64_t)hi << 32) | lo;
799 static uint64_t BitDeinterleave(uint64_t Ai)
801 if (sizeof(void *) < 8) {
802 uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
806 for (j = 0; j < 32; j++) {
807 Ai |= (uint64_t)((lo >> j) & 1) << (2 * j);
808 Ai |= (uint64_t)((hi >> j) & 1) << (2 * j + 1);
816 * SHA3_absorb can be called multiple times, but at each invocation
817 * largest multiple of |r| out of |len| bytes are processed. Then
818 * remaining amount of bytes are returned. This is done to spare caller
819 * trouble of calculating the largest multiple of |r|, effectively the
820 * blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104, 72,
821 * but can also be (1600 - 448)/8 = 144. All this means that message
822 * padding and intermediate sub-block buffering, byte- or bitwise, is
823 * caller's reponsibility.
825 size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
828 uint64_t *A_flat = (uint64_t *)A;
831 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
834 for (i = 0; i < w; i++) {
835 uint64_t Ai = (uint64_t)inp[0] | (uint64_t)inp[1] << 8 |
836 (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 |
837 (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 |
838 (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
841 A_flat[i] ^= BitInterleave(Ai);
851 * SHA3_squeeze is called once at the end to generate |out| hash value
854 void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r)
856 uint64_t *A_flat = (uint64_t *)A;
857 size_t i, rem, w = r / 8;
859 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
862 for (i = 0; i < w; i++) {
863 uint64_t Ai = BitDeinterleave(A_flat[i]);
865 out[0] = (unsigned char)(Ai);
866 out[1] = (unsigned char)(Ai >> 8);
867 out[2] = (unsigned char)(Ai >> 16);
868 out[3] = (unsigned char)(Ai >> 24);
869 out[4] = (unsigned char)(Ai >> 32);
870 out[5] = (unsigned char)(Ai >> 40);
871 out[6] = (unsigned char)(Ai >> 48);
872 out[7] = (unsigned char)(Ai >> 56);
883 for (i = 0; i < len; i++) {
884 uint64_t Ai = BitDeinterleave(A_flat[i]);
886 out[0] = (unsigned char)(Ai);
887 out[1] = (unsigned char)(Ai >> 8);
888 out[2] = (unsigned char)(Ai >> 16);
889 out[3] = (unsigned char)(Ai >> 24);
890 out[4] = (unsigned char)(Ai >> 32);
891 out[5] = (unsigned char)(Ai >> 40);
892 out[6] = (unsigned char)(Ai >> 48);
893 out[7] = (unsigned char)(Ai >> 56);
898 uint64_t Ai = BitDeinterleave(A_flat[i]);
900 for (i = 0; i < rem; i++) {
901 *out++ = (unsigned char)Ai;
909 * Post-padding one-shot implementations would look as following:
911 * SHA3_224 SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
912 * SHA3_256 SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
913 * SHA3_384 SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
914 * SHA3_512 SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
915 * SHAKE_128 SHA3_sponge(inp, len, out, d, (1600-256)/8);
916 * SHAKE_256 SHA3_sponge(inp, len, out, d, (1600-512)/8);
919 void SHA3_sponge(const unsigned char *inp, size_t len,
920 unsigned char *out, size_t d, size_t r)
924 memset(A, 0, sizeof(A));
925 SHA3_absorb(A, inp, len, r);
926 SHA3_squeeze(A, out, d, r);
934 * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
936 unsigned char test[168] = { '\xf3', '\x3' };
937 unsigned char out[512];
939 static const unsigned char result[512] = {
940 0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
941 0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
942 0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
943 0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
944 0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
945 0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
946 0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
947 0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
948 0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
949 0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
950 0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
951 0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
952 0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
953 0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
954 0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
955 0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
956 0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
957 0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
958 0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
959 0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
960 0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
961 0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
962 0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
963 0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
964 0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
965 0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
966 0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
967 0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
968 0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
969 0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
970 0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
971 0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
972 0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
973 0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
974 0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
975 0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
976 0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
977 0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
978 0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
979 0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
980 0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
981 0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
982 0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
983 0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
984 0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
985 0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
986 0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
987 0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
988 0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
989 0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
990 0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
991 0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
992 0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
993 0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
994 0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
995 0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
996 0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
997 0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
998 0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
999 0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1000 0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1001 0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1002 0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1003 0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1007 SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1010 * Rationale behind keeping output [formatted as below] is that
1011 * one should be able to redirect it to a file, then copy-n-paste
1012 * final "output val" from official example to another file, and
1013 * compare the two with diff(1).
1015 for (i = 0; i < sizeof(out);) {
1016 printf("%02X", out[i]);
1017 printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1020 if (memcmp(out,result,sizeof(out))) {
1021 fprintf(stderr,"failure\n");
1024 fprintf(stderr,"success\n");