2 * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 #include <openssl/e_os2.h>
14 #define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
16 static uint64_t ROL64(uint64_t val, int offset)
20 } else if (sizeof(void *) == 8) {
21 return (val << offset) | (val >> (64-offset));
23 uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
29 hi = ROL32(lo, offset);
30 lo = ROL32(tmp, offset + 1);
33 lo = ROL32(lo, offset);
34 hi = ROL32(hi, offset);
37 return ((uint64_t)hi << 32) | lo;
41 static const unsigned char rhotates[5][5] = {
43 { 36, 44, 6, 55, 20 },
44 { 3, 10, 43, 25, 39 },
45 { 41, 45, 15, 21, 8 },
49 static const uint64_t iotas[] = {
50 sizeof(void *) == 8 ? 0x0000000000000001U : 0x0000000000000001U,
51 sizeof(void *) == 8 ? 0x0000000000008082U : 0x0000008900000000U,
52 sizeof(void *) == 8 ? 0x800000000000808aU : 0x8000008b00000000U,
53 sizeof(void *) == 8 ? 0x8000000080008000U : 0x8000808000000000U,
54 sizeof(void *) == 8 ? 0x000000000000808bU : 0x0000008b00000001U,
55 sizeof(void *) == 8 ? 0x0000000080000001U : 0x0000800000000001U,
56 sizeof(void *) == 8 ? 0x8000000080008081U : 0x8000808800000001U,
57 sizeof(void *) == 8 ? 0x8000000000008009U : 0x8000008200000001U,
58 sizeof(void *) == 8 ? 0x000000000000008aU : 0x0000000b00000000U,
59 sizeof(void *) == 8 ? 0x0000000000000088U : 0x0000000a00000000U,
60 sizeof(void *) == 8 ? 0x0000000080008009U : 0x0000808200000001U,
61 sizeof(void *) == 8 ? 0x000000008000000aU : 0x0000800300000000U,
62 sizeof(void *) == 8 ? 0x000000008000808bU : 0x0000808b00000001U,
63 sizeof(void *) == 8 ? 0x800000000000008bU : 0x8000000b00000001U,
64 sizeof(void *) == 8 ? 0x8000000000008089U : 0x8000008a00000001U,
65 sizeof(void *) == 8 ? 0x8000000000008003U : 0x8000008100000001U,
66 sizeof(void *) == 8 ? 0x8000000000008002U : 0x8000008100000000U,
67 sizeof(void *) == 8 ? 0x8000000000000080U : 0x8000000800000000U,
68 sizeof(void *) == 8 ? 0x000000000000800aU : 0x0000008300000000U,
69 sizeof(void *) == 8 ? 0x800000008000000aU : 0x8000800300000000U,
70 sizeof(void *) == 8 ? 0x8000000080008081U : 0x8000808800000001U,
71 sizeof(void *) == 8 ? 0x8000000000008080U : 0x8000008800000000U,
72 sizeof(void *) == 8 ? 0x0000000080000001U : 0x0000800000000001U,
73 sizeof(void *) == 8 ? 0x8000000080008008U : 0x8000808200000000U
76 #if defined(KECCAK_REF)
78 * This is straightforward or "maximum clarity" implementation aiming
79 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
80 * Permutation-Based Hash and Extendible-Output Functions" as much as
81 * possible. With one caveat. Because of the way C stores matrices,
82 * references to A[x,y] in the specification are presented as A[y][x].
83 * Implementation unrolls inner x-loops so that modulo 5 operations are
84 * explicitly pre-computed.
86 static void Theta(uint64_t A[5][5])
97 for (y = 1; y < 5; y++) {
105 D[0] = ROL64(C[1], 1) ^ C[4];
106 D[1] = ROL64(C[2], 1) ^ C[0];
107 D[2] = ROL64(C[3], 1) ^ C[1];
108 D[3] = ROL64(C[4], 1) ^ C[2];
109 D[4] = ROL64(C[0], 1) ^ C[3];
111 for (y = 0; y < 5; y++) {
120 static void Rho(uint64_t A[5][5])
124 for (y = 0; y < 5; y++) {
125 A[y][0] = ROL64(A[y][0], rhotates[y][0]);
126 A[y][1] = ROL64(A[y][1], rhotates[y][1]);
127 A[y][2] = ROL64(A[y][2], rhotates[y][2]);
128 A[y][3] = ROL64(A[y][3], rhotates[y][3]);
129 A[y][4] = ROL64(A[y][4], rhotates[y][4]);
133 static void Pi(uint64_t A[5][5])
139 * A[y][x] = T[x][(3*y+x)%5]
141 memcpy(T, A, sizeof(T));
174 static void Chi(uint64_t A[5][5])
179 for (y = 0; y < 5; y++) {
180 C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
181 C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
182 C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
183 C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
184 C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
194 static void Iota(uint64_t A[5][5], size_t i)
196 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
200 void KeccakF1600(uint64_t A[5][5])
204 for (i = 0; i < 24; i++) {
213 #elif defined(KECCAK_1X)
215 * This implementation is optimization of above code featuring unroll
216 * of even y-loops, their fusion and code motion. It also minimizes
217 * temporary storage. Compiler would normally do all these things for
218 * you, purpose of manual optimization is to provide "unobscured"
219 * reference for assembly implementation [in case this approach is
220 * chosen for implementation on some platform]. In the nutshell it's
221 * equivalent of "plane-per-plane processing" approach discussed in
222 * section 2.4 of "Keccak implementation overview".
224 static void Round(uint64_t A[5][5], size_t i)
226 uint64_t C[5], E[2]; /* registers */
227 uint64_t D[5], T[2][5]; /* memory */
229 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
231 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
232 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
233 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
234 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
235 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
238 D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
239 D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
240 D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
241 D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
242 D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
244 T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
245 T[0][1] = A[0][1] ^ E[0]; /* D[1] */
246 T[0][2] = A[0][2] ^ C[1]; /* D[2] */
247 T[0][3] = A[0][3] ^ C[2]; /* D[3] */
248 T[0][4] = A[0][4] ^ E[1]; /* D[4] */
250 C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
251 C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
252 C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
253 C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
254 C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
256 D[0] = ROL64(C[1], 1) ^ C[4];
257 D[1] = ROL64(C[2], 1) ^ C[0];
258 D[2] = ROL64(C[3], 1) ^ C[1];
259 D[3] = ROL64(C[4], 1) ^ C[2];
260 D[4] = ROL64(C[0], 1) ^ C[3];
262 T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
263 T[0][1] = A[0][1] ^ D[1];
264 T[0][2] = A[0][2] ^ D[2];
265 T[0][3] = A[0][3] ^ D[3];
266 T[0][4] = A[0][4] ^ D[4];
268 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
269 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
270 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
271 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
272 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
274 A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
275 A[0][1] = C[1] ^ (~C[2] & C[3]);
276 A[0][2] = C[2] ^ (~C[3] & C[4]);
277 A[0][3] = C[3] ^ (~C[4] & C[0]);
278 A[0][4] = C[4] ^ (~C[0] & C[1]);
280 T[1][0] = A[1][0] ^ (C[3] = D[0]);
281 T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
282 T[1][2] = A[1][2] ^ (E[0] = D[2]);
283 T[1][3] = A[1][3] ^ (E[1] = D[3]);
284 T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
286 C[0] = ROL64(T[0][3], rhotates[0][3]);
287 C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */
288 C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */
289 C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */
290 C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */
292 A[1][0] = C[0] ^ (~C[1] & C[2]);
293 A[1][1] = C[1] ^ (~C[2] & C[3]);
294 A[1][2] = C[2] ^ (~C[3] & C[4]);
295 A[1][3] = C[3] ^ (~C[4] & C[0]);
296 A[1][4] = C[4] ^ (~C[0] & C[1]);
298 C[0] = ROL64(T[0][1], rhotates[0][1]);
299 C[1] = ROL64(T[1][2], rhotates[1][2]);
300 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
301 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
302 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
304 A[2][0] = C[0] ^ (~C[1] & C[2]);
305 A[2][1] = C[1] ^ (~C[2] & C[3]);
306 A[2][2] = C[2] ^ (~C[3] & C[4]);
307 A[2][3] = C[3] ^ (~C[4] & C[0]);
308 A[2][4] = C[4] ^ (~C[0] & C[1]);
310 C[0] = ROL64(T[0][4], rhotates[0][4]);
311 C[1] = ROL64(T[1][0], rhotates[1][0]);
312 C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
313 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
314 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
316 A[3][0] = C[0] ^ (~C[1] & C[2]);
317 A[3][1] = C[1] ^ (~C[2] & C[3]);
318 A[3][2] = C[2] ^ (~C[3] & C[4]);
319 A[3][3] = C[3] ^ (~C[4] & C[0]);
320 A[3][4] = C[4] ^ (~C[0] & C[1]);
322 C[0] = ROL64(T[0][2], rhotates[0][2]);
323 C[1] = ROL64(T[1][3], rhotates[1][3]);
324 C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
325 C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
326 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
328 A[4][0] = C[0] ^ (~C[1] & C[2]);
329 A[4][1] = C[1] ^ (~C[2] & C[3]);
330 A[4][2] = C[2] ^ (~C[3] & C[4]);
331 A[4][3] = C[3] ^ (~C[4] & C[0]);
332 A[4][4] = C[4] ^ (~C[0] & C[1]);
335 void KeccakF1600(uint64_t A[5][5])
339 for (i = 0; i < 24; i++) {
344 #elif defined(KECCAK_1X_ALT)
346 * This is variant of above KECCAK_1X that reduces requirement for
347 * temporary storage even further, but at cost of more updates to A[][].
348 * It's less suitable if A[][] is memory bound, but better if it's
352 static void Round(uint64_t A[5][5], size_t i)
356 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
358 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
359 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
360 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
361 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
362 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
364 D[1] = C[0] ^ ROL64(C[2], 1);
365 D[2] = C[1] ^ ROL64(C[3], 1);
366 D[3] = C[2] ^= ROL64(C[4], 1);
367 D[4] = C[3] ^= ROL64(C[0], 1);
368 D[0] = C[4] ^= ROL64(C[1], 1);
405 A[0][1] = ROL64(A[1][1], rhotates[1][1]);
406 A[0][2] = ROL64(A[2][2], rhotates[2][2]);
407 A[0][3] = ROL64(A[3][3], rhotates[3][3]);
408 A[0][4] = ROL64(A[4][4], rhotates[4][4]);
410 A[1][1] = ROL64(A[1][4], rhotates[1][4]);
411 A[2][2] = ROL64(A[2][3], rhotates[2][3]);
412 A[3][3] = ROL64(A[3][2], rhotates[3][2]);
413 A[4][4] = ROL64(A[4][1], rhotates[4][1]);
415 A[1][4] = ROL64(A[4][2], rhotates[4][2]);
416 A[2][3] = ROL64(A[3][4], rhotates[3][4]);
417 A[3][2] = ROL64(A[2][1], rhotates[2][1]);
418 A[4][1] = ROL64(A[1][3], rhotates[1][3]);
420 A[4][2] = ROL64(A[2][4], rhotates[2][4]);
421 A[3][4] = ROL64(A[4][3], rhotates[4][3]);
422 A[2][1] = ROL64(A[1][2], rhotates[1][2]);
423 A[1][3] = ROL64(A[3][1], rhotates[3][1]);
425 A[2][4] = ROL64(A[4][0], rhotates[4][0]);
426 A[4][3] = ROL64(A[3][0], rhotates[3][0]);
427 A[1][2] = ROL64(A[2][0], rhotates[2][0]);
428 A[3][1] = ROL64(A[1][0], rhotates[1][0]);
430 A[1][0] = ROL64(C[3], rhotates[0][3]);
431 A[2][0] = ROL64(C[1], rhotates[0][1]);
432 A[3][0] = ROL64(C[4], rhotates[0][4]);
433 A[4][0] = ROL64(C[2], rhotates[0][2]);
440 A[0][0] ^= (~A[0][1] & A[0][2]);
441 A[1][0] ^= (~A[1][1] & A[1][2]);
442 A[0][1] ^= (~A[0][2] & A[0][3]);
443 A[1][1] ^= (~A[1][2] & A[1][3]);
444 A[0][2] ^= (~A[0][3] & A[0][4]);
445 A[1][2] ^= (~A[1][3] & A[1][4]);
446 A[0][3] ^= (~A[0][4] & C[0]);
447 A[1][3] ^= (~A[1][4] & C[1]);
448 A[0][4] ^= (~C[0] & D[0]);
449 A[1][4] ^= (~C[1] & D[1]);
456 A[2][0] ^= (~A[2][1] & A[2][2]);
457 A[3][0] ^= (~A[3][1] & A[3][2]);
458 A[2][1] ^= (~A[2][2] & A[2][3]);
459 A[3][1] ^= (~A[3][2] & A[3][3]);
460 A[2][2] ^= (~A[2][3] & A[2][4]);
461 A[3][2] ^= (~A[3][3] & A[3][4]);
462 A[2][3] ^= (~A[2][4] & C[2]);
463 A[3][3] ^= (~A[3][4] & C[3]);
464 A[2][4] ^= (~C[2] & D[2]);
465 A[3][4] ^= (~C[3] & D[3]);
470 A[4][0] ^= (~A[4][1] & A[4][2]);
471 A[4][1] ^= (~A[4][2] & A[4][3]);
472 A[4][2] ^= (~A[4][3] & A[4][4]);
473 A[4][3] ^= (~A[4][4] & C[4]);
474 A[4][4] ^= (~C[4] & D[4]);
478 void KeccakF1600(uint64_t A[5][5])
482 for (i = 0; i < 24; i++) {
487 #elif defined(KECCAK_2X)
489 * This implementation is variant of KECCAK_1X above with outer-most
490 * round loop unrolled twice. This allows to take temporary storage
491 * out of round procedure and simplify references to it by alternating
492 * it with actual data (see round loop below). Just like original, it's
493 * rather meant as reference for an assembly implementation. It's likely
494 * to provide best instruction per processed byte ratio at minimal
495 * round unroll factor...
497 static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
501 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
503 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
504 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
505 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
506 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
507 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
509 D[0] = ROL64(C[1], 1) ^ C[4];
510 D[1] = ROL64(C[2], 1) ^ C[0];
511 D[2] = ROL64(C[3], 1) ^ C[1];
512 D[3] = ROL64(C[4], 1) ^ C[2];
513 D[4] = ROL64(C[0], 1) ^ C[3];
515 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
516 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
517 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
518 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
519 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
521 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
522 R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i];
523 R[0][1] = C[1] ^ (~C[2] | C[3]);
524 R[0][2] = C[2] ^ ( C[3] & C[4]);
525 R[0][3] = C[3] ^ ( C[4] | C[0]);
526 R[0][4] = C[4] ^ ( C[0] & C[1]);
528 R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
529 R[0][1] = C[1] ^ (~C[2] & C[3]);
530 R[0][2] = C[2] ^ (~C[3] & C[4]);
531 R[0][3] = C[3] ^ (~C[4] & C[0]);
532 R[0][4] = C[4] ^ (~C[0] & C[1]);
535 C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
536 C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
537 C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
538 C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
539 C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
541 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
542 R[1][0] = C[0] ^ (C[1] | C[2]);
543 R[1][1] = C[1] ^ (C[2] & C[3]);
544 R[1][2] = C[2] ^ (C[3] | ~C[4]);
545 R[1][3] = C[3] ^ (C[4] | C[0]);
546 R[1][4] = C[4] ^ (C[0] & C[1]);
548 R[1][0] = C[0] ^ (~C[1] & C[2]);
549 R[1][1] = C[1] ^ (~C[2] & C[3]);
550 R[1][2] = C[2] ^ (~C[3] & C[4]);
551 R[1][3] = C[3] ^ (~C[4] & C[0]);
552 R[1][4] = C[4] ^ (~C[0] & C[1]);
555 C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
556 C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
557 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
558 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
559 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
561 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
562 R[2][0] = C[0] ^ ( C[1] | C[2]);
563 R[2][1] = C[1] ^ ( C[2] & C[3]);
564 R[2][2] = C[2] ^ (~C[3] & C[4]);
565 R[2][3] = ~C[3] ^ ( C[4] | C[0]);
566 R[2][4] = C[4] ^ ( C[0] & C[1]);
568 R[2][0] = C[0] ^ (~C[1] & C[2]);
569 R[2][1] = C[1] ^ (~C[2] & C[3]);
570 R[2][2] = C[2] ^ (~C[3] & C[4]);
571 R[2][3] = C[3] ^ (~C[4] & C[0]);
572 R[2][4] = C[4] ^ (~C[0] & C[1]);
575 C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
576 C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
577 C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
578 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
579 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
581 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
582 R[3][0] = C[0] ^ ( C[1] & C[2]);
583 R[3][1] = C[1] ^ ( C[2] | C[3]);
584 R[3][2] = C[2] ^ (~C[3] | C[4]);
585 R[3][3] = ~C[3] ^ ( C[4] & C[0]);
586 R[3][4] = C[4] ^ ( C[0] | C[1]);
588 R[3][0] = C[0] ^ (~C[1] & C[2]);
589 R[3][1] = C[1] ^ (~C[2] & C[3]);
590 R[3][2] = C[2] ^ (~C[3] & C[4]);
591 R[3][3] = C[3] ^ (~C[4] & C[0]);
592 R[3][4] = C[4] ^ (~C[0] & C[1]);
595 C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
596 C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
597 C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
598 C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
599 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
601 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
602 R[4][0] = C[0] ^ (~C[1] & C[2]);
603 R[4][1] = ~C[1] ^ ( C[2] | C[3]);
604 R[4][2] = C[2] ^ ( C[3] & C[4]);
605 R[4][3] = C[3] ^ ( C[4] | C[0]);
606 R[4][4] = C[4] ^ ( C[0] & C[1]);
608 R[4][0] = C[0] ^ (~C[1] & C[2]);
609 R[4][1] = C[1] ^ (~C[2] & C[3]);
610 R[4][2] = C[2] ^ (~C[3] & C[4]);
611 R[4][3] = C[3] ^ (~C[4] & C[0]);
612 R[4][4] = C[4] ^ (~C[0] & C[1]);
616 void KeccakF1600(uint64_t A[5][5])
621 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
630 for (i = 0; i < 24; i += 2) {
635 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
647 * This implementation is KECCAK_1X from above combined 4 times with
648 * a twist that allows to omit temporary storage and perform in-place
649 * processing. It's discussed in section 2.5 of "Keccak implementation
650 * overview". It's likely to be best suited for processors with large
653 static void FourRounds(uint64_t A[5][5], size_t i)
655 uint64_t B[5], C[5], D[5];
657 assert(i <= (sizeof(iotas) / sizeof(iotas[0]) - 4));
660 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
661 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
662 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
663 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
664 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
666 D[0] = ROL64(C[1], 1) ^ C[4];
667 D[1] = ROL64(C[2], 1) ^ C[0];
668 D[2] = ROL64(C[3], 1) ^ C[1];
669 D[3] = ROL64(C[4], 1) ^ C[2];
670 D[4] = ROL64(C[0], 1) ^ C[3];
672 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
673 B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
674 B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
675 B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
676 B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
678 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
679 C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
680 C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
681 C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
682 C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
684 B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
685 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
686 B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
687 B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
688 B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
690 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
691 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
692 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
693 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
694 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
696 B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
697 B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
698 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
699 B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
700 B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
702 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
703 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
704 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
705 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
706 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
708 B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
709 B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
710 B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
711 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
712 B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
714 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
715 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
716 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
717 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
718 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
720 B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
721 B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
722 B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
723 B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
724 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
726 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
727 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
728 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
729 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
730 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
733 D[0] = ROL64(C[1], 1) ^ C[4];
734 D[1] = ROL64(C[2], 1) ^ C[0];
735 D[2] = ROL64(C[3], 1) ^ C[1];
736 D[3] = ROL64(C[4], 1) ^ C[2];
737 D[4] = ROL64(C[0], 1) ^ C[3];
739 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
740 B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
741 B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
742 B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
743 B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
745 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
746 C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
747 C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
748 C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
749 C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
751 B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
752 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
753 B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
754 B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
755 B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
757 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
758 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
759 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
760 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
761 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
763 B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
764 B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
765 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
766 B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
767 B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
769 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
770 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
771 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
772 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
773 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
775 B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
776 B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
777 B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
778 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
779 B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
781 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
782 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
783 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
784 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
785 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
787 B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
788 B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
789 B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
790 B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
791 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
793 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
794 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
795 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
796 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
797 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
800 D[0] = ROL64(C[1], 1) ^ C[4];
801 D[1] = ROL64(C[2], 1) ^ C[0];
802 D[2] = ROL64(C[3], 1) ^ C[1];
803 D[3] = ROL64(C[4], 1) ^ C[2];
804 D[4] = ROL64(C[0], 1) ^ C[3];
806 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
807 B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
808 B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
809 B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
810 B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
812 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
813 C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
814 C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
815 C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
816 C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
818 B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
819 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
820 B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
821 B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
822 B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
824 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
825 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
826 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
827 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
828 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
830 B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
831 B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
832 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
833 B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
834 B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
836 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
837 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
838 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
839 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
840 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
842 B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
843 B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
844 B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
845 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
846 B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
848 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
849 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
850 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
851 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
852 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
854 B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
855 B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
856 B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
857 B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
858 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
860 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
861 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
862 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
863 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
864 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
867 D[0] = ROL64(C[1], 1) ^ C[4];
868 D[1] = ROL64(C[2], 1) ^ C[0];
869 D[2] = ROL64(C[3], 1) ^ C[1];
870 D[3] = ROL64(C[4], 1) ^ C[2];
871 D[4] = ROL64(C[0], 1) ^ C[3];
873 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
874 B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
875 B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
876 B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
877 B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
879 /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
880 /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
881 /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
882 /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
883 /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
885 B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
886 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
887 B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
888 B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
889 B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
891 /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
892 /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
893 /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
894 /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
895 /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
897 B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
898 B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
899 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
900 B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
901 B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
903 /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
904 /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
905 /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
906 /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
907 /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
909 B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
910 B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
911 B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
912 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
913 B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
915 /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
916 /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
917 /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
918 /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
919 /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
921 B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
922 B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
923 B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
924 B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
925 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
927 /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
928 /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
929 /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
930 /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
931 /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
934 void KeccakF1600(uint64_t A[5][5])
938 for (i = 0; i < 24; i += 4) {
945 static uint64_t BitInterleave(uint64_t Ai)
947 if (sizeof(void *) < 8) {
948 uint32_t hi = 0, lo = 0;
951 for (j = 0; j < 32; j++) {
952 lo |= ((uint32_t)(Ai >> (2 * j)) & 1) << j;
953 hi |= ((uint32_t)(Ai >> (2 * j + 1)) & 1) << j;
956 Ai = ((uint64_t)hi << 32) | lo;
962 static uint64_t BitDeinterleave(uint64_t Ai)
964 if (sizeof(void *) < 8) {
965 uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
969 for (j = 0; j < 32; j++) {
970 Ai |= (uint64_t)((lo >> j) & 1) << (2 * j);
971 Ai |= (uint64_t)((hi >> j) & 1) << (2 * j + 1);
979 * SHA3_absorb can be called multiple times, but at each invocation
980 * largest multiple of |r| out of |len| bytes are processed. Then
981 * remaining amount of bytes is returned. This is done to spare caller
982 * trouble of calculating the largest multiple of |r|. |r| can be viewed
983 * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
984 * 72, but can also be (1600 - 448)/8 = 144. All this means that message
985 * padding and intermediate sub-block buffering, byte- or bitwise, is
986 * caller's reponsibility.
988 size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
991 uint64_t *A_flat = (uint64_t *)A;
994 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
997 for (i = 0; i < w; i++) {
998 uint64_t Ai = (uint64_t)inp[0] | (uint64_t)inp[1] << 8 |
999 (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 |
1000 (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 |
1001 (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
1004 A_flat[i] ^= BitInterleave(Ai);
1014 * SHA3_squeeze is called once at the end to generate |out| hash value
1017 void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r)
1019 uint64_t *A_flat = (uint64_t *)A;
1020 size_t i, rem, w = r / 8;
1022 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1025 for (i = 0; i < w; i++) {
1026 uint64_t Ai = BitDeinterleave(A_flat[i]);
1028 out[0] = (unsigned char)(Ai);
1029 out[1] = (unsigned char)(Ai >> 8);
1030 out[2] = (unsigned char)(Ai >> 16);
1031 out[3] = (unsigned char)(Ai >> 24);
1032 out[4] = (unsigned char)(Ai >> 32);
1033 out[5] = (unsigned char)(Ai >> 40);
1034 out[6] = (unsigned char)(Ai >> 48);
1035 out[7] = (unsigned char)(Ai >> 56);
1046 for (i = 0; i < len; i++) {
1047 uint64_t Ai = BitDeinterleave(A_flat[i]);
1049 out[0] = (unsigned char)(Ai);
1050 out[1] = (unsigned char)(Ai >> 8);
1051 out[2] = (unsigned char)(Ai >> 16);
1052 out[3] = (unsigned char)(Ai >> 24);
1053 out[4] = (unsigned char)(Ai >> 32);
1054 out[5] = (unsigned char)(Ai >> 40);
1055 out[6] = (unsigned char)(Ai >> 48);
1056 out[7] = (unsigned char)(Ai >> 56);
1061 uint64_t Ai = BitDeinterleave(A_flat[i]);
1063 for (i = 0; i < rem; i++) {
1064 *out++ = (unsigned char)Ai;
1072 * Post-padding one-shot implementations would look as following:
1074 * SHA3_224 SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
1075 * SHA3_256 SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
1076 * SHA3_384 SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
1077 * SHA3_512 SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
1078 * SHAKE_128 SHA3_sponge(inp, len, out, d, (1600-256)/8);
1079 * SHAKE_256 SHA3_sponge(inp, len, out, d, (1600-512)/8);
1082 void SHA3_sponge(const unsigned char *inp, size_t len,
1083 unsigned char *out, size_t d, size_t r)
1087 memset(A, 0, sizeof(A));
1088 SHA3_absorb(A, inp, len, r);
1089 SHA3_squeeze(A, out, d, r);
1097 * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
1099 unsigned char test[168] = { '\xf3', '\x3' };
1100 unsigned char out[512];
1102 static const unsigned char result[512] = {
1103 0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
1104 0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
1105 0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
1106 0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
1107 0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
1108 0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
1109 0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
1110 0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
1111 0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
1112 0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
1113 0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
1114 0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
1115 0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
1116 0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
1117 0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
1118 0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
1119 0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
1120 0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
1121 0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
1122 0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
1123 0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
1124 0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
1125 0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
1126 0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
1127 0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
1128 0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
1129 0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
1130 0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
1131 0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
1132 0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
1133 0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
1134 0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
1135 0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
1136 0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
1137 0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
1138 0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
1139 0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
1140 0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
1141 0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
1142 0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
1143 0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
1144 0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
1145 0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
1146 0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
1147 0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
1148 0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
1149 0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
1150 0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
1151 0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
1152 0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
1153 0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
1154 0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
1155 0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
1156 0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
1157 0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
1158 0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
1159 0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
1160 0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
1161 0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
1162 0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1163 0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1164 0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1165 0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1166 0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1170 SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1173 * Rationale behind keeping output [formatted as below] is that
1174 * one should be able to redirect it to a file, then copy-n-paste
1175 * final "output val" from official example to another file, and
1176 * compare the two with diff(1).
1178 for (i = 0; i < sizeof(out);) {
1179 printf("%02X", out[i]);
1180 printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1183 if (memcmp(out,result,sizeof(out))) {
1184 fprintf(stderr,"failure\n");
1187 fprintf(stderr,"success\n");