2 * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 #include <openssl/e_os2.h>
14 #define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
16 static uint64_t ROL64(uint64_t val, int offset)
20 } else if (sizeof(void *) == 8) {
21 return (val << offset) | (val >> (64-offset));
23 uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
29 hi = ROL32(lo, offset);
30 lo = ROL32(tmp, offset + 1);
33 lo = ROL32(lo, offset);
34 hi = ROL32(hi, offset);
37 return ((uint64_t)hi << 32) | lo;
41 static const unsigned char rhotates[5][5] = {
43 { 36, 44, 6, 55, 20 },
44 { 3, 10, 43, 25, 39 },
45 { 41, 45, 15, 21, 8 },
49 static const uint64_t iotas[] = {
50 sizeof(void *) == 8 ? 0x0000000000000001U : 0x0000000000000001U,
51 sizeof(void *) == 8 ? 0x0000000000008082U : 0x0000008900000000U,
52 sizeof(void *) == 8 ? 0x800000000000808aU : 0x8000008b00000000U,
53 sizeof(void *) == 8 ? 0x8000000080008000U : 0x8000808000000000U,
54 sizeof(void *) == 8 ? 0x000000000000808bU : 0x0000008b00000001U,
55 sizeof(void *) == 8 ? 0x0000000080000001U : 0x0000800000000001U,
56 sizeof(void *) == 8 ? 0x8000000080008081U : 0x8000808800000001U,
57 sizeof(void *) == 8 ? 0x8000000000008009U : 0x8000008200000001U,
58 sizeof(void *) == 8 ? 0x000000000000008aU : 0x0000000b00000000U,
59 sizeof(void *) == 8 ? 0x0000000000000088U : 0x0000000a00000000U,
60 sizeof(void *) == 8 ? 0x0000000080008009U : 0x0000808200000001U,
61 sizeof(void *) == 8 ? 0x000000008000000aU : 0x0000800300000000U,
62 sizeof(void *) == 8 ? 0x000000008000808bU : 0x0000808b00000001U,
63 sizeof(void *) == 8 ? 0x800000000000008bU : 0x8000000b00000001U,
64 sizeof(void *) == 8 ? 0x8000000000008089U : 0x8000008a00000001U,
65 sizeof(void *) == 8 ? 0x8000000000008003U : 0x8000008100000001U,
66 sizeof(void *) == 8 ? 0x8000000000008002U : 0x8000008100000000U,
67 sizeof(void *) == 8 ? 0x8000000000000080U : 0x8000000800000000U,
68 sizeof(void *) == 8 ? 0x000000000000800aU : 0x0000008300000000U,
69 sizeof(void *) == 8 ? 0x800000008000000aU : 0x8000800300000000U,
70 sizeof(void *) == 8 ? 0x8000000080008081U : 0x8000808800000001U,
71 sizeof(void *) == 8 ? 0x8000000000008080U : 0x8000008800000000U,
72 sizeof(void *) == 8 ? 0x0000000080000001U : 0x0000800000000001U,
73 sizeof(void *) == 8 ? 0x8000000080008008U : 0x8000808200000000U
76 #if defined(KECCAK_REF)
78 * This is straightforward or "maximum clarity" implementation aiming
79 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
80 * Permutation-Based Hash and Extendible-Output Functions" as much as
81 * possible. With one caveat. Because of the way C stores matrices,
82 * references to A[x,y] in the specification are presented as A[y][x].
83 * Implementation unrolls inner x-loops so that modulo 5 operations are
84 * explicitly pre-computed.
86 static void Theta(uint64_t A[5][5])
97 for (y = 1; y < 5; y++) {
105 D[0] = ROL64(C[1], 1) ^ C[4];
106 D[1] = ROL64(C[2], 1) ^ C[0];
107 D[2] = ROL64(C[3], 1) ^ C[1];
108 D[3] = ROL64(C[4], 1) ^ C[2];
109 D[4] = ROL64(C[0], 1) ^ C[3];
111 for (y = 0; y < 5; y++) {
120 static void Rho(uint64_t A[5][5])
124 for (y = 0; y < 5; y++) {
125 A[y][0] = ROL64(A[y][0], rhotates[y][0]);
126 A[y][1] = ROL64(A[y][1], rhotates[y][1]);
127 A[y][2] = ROL64(A[y][2], rhotates[y][2]);
128 A[y][3] = ROL64(A[y][3], rhotates[y][3]);
129 A[y][4] = ROL64(A[y][4], rhotates[y][4]);
133 static void Pi(uint64_t A[5][5])
139 * A[y][x] = T[x][(3*y+x)%5]
141 memcpy(T, A, sizeof(T));
174 static void Chi(uint64_t A[5][5])
179 for (y = 0; y < 5; y++) {
180 C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
181 C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
182 C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
183 C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
184 C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
194 static void Iota(uint64_t A[5][5], size_t i)
196 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
200 void KeccakF1600(uint64_t A[5][5])
204 for (i = 0; i < 24; i++) {
213 #elif defined(KECCAK_1X)
215 * This implementation is optimization of above code featuring unroll
216 * of even y-loops, their fusion and code motion. It also minimizes
217 * temporary storage. Compiler would normally do all these things for
218 * you, purpose of manual optimization is to provide "unobscured"
219 * reference for assembly implementation [in case this approach is
220 * chosen for implementation on some platform]. In the nutshell it's
221 * equivalent of "plane-per-plane processing" approach discussed in
222 * section 2.4 of "Keccak implementation overview".
224 static void Round(uint64_t A[5][5], size_t i)
226 uint64_t C[5], E[2]; /* registers */
227 uint64_t D[5], T[2][5]; /* memory */
229 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
231 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
232 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
233 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
234 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
235 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
238 D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
239 D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
240 D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
241 D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
242 D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
244 T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
245 T[0][1] = A[0][1] ^ E[0]; /* D[1] */
246 T[0][2] = A[0][2] ^ C[1]; /* D[2] */
247 T[0][3] = A[0][3] ^ C[2]; /* D[3] */
248 T[0][4] = A[0][4] ^ E[1]; /* D[4] */
250 C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
251 C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
252 C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
253 C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
254 C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
256 D[0] = ROL64(C[1], 1) ^ C[4];
257 D[1] = ROL64(C[2], 1) ^ C[0];
258 D[2] = ROL64(C[3], 1) ^ C[1];
259 D[3] = ROL64(C[4], 1) ^ C[2];
260 D[4] = ROL64(C[0], 1) ^ C[3];
262 T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
263 T[0][1] = A[0][1] ^ D[1];
264 T[0][2] = A[0][2] ^ D[2];
265 T[0][3] = A[0][3] ^ D[3];
266 T[0][4] = A[0][4] ^ D[4];
268 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
269 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
270 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
271 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
272 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
274 A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
275 A[0][1] = C[1] ^ (~C[2] & C[3]);
276 A[0][2] = C[2] ^ (~C[3] & C[4]);
277 A[0][3] = C[3] ^ (~C[4] & C[0]);
278 A[0][4] = C[4] ^ (~C[0] & C[1]);
280 T[1][0] = A[1][0] ^ (C[3] = D[0]);
281 T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
282 T[1][2] = A[1][2] ^ (E[0] = D[2]);
283 T[1][3] = A[1][3] ^ (E[1] = D[3]);
284 T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
286 C[0] = ROL64(T[0][3], rhotates[0][3]);
287 C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */
288 C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */
289 C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */
290 C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */
292 A[1][0] = C[0] ^ (~C[1] & C[2]);
293 A[1][1] = C[1] ^ (~C[2] & C[3]);
294 A[1][2] = C[2] ^ (~C[3] & C[4]);
295 A[1][3] = C[3] ^ (~C[4] & C[0]);
296 A[1][4] = C[4] ^ (~C[0] & C[1]);
298 C[0] = ROL64(T[0][1], rhotates[0][1]);
299 C[1] = ROL64(T[1][2], rhotates[1][2]);
300 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
301 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
302 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
304 A[2][0] = C[0] ^ (~C[1] & C[2]);
305 A[2][1] = C[1] ^ (~C[2] & C[3]);
306 A[2][2] = C[2] ^ (~C[3] & C[4]);
307 A[2][3] = C[3] ^ (~C[4] & C[0]);
308 A[2][4] = C[4] ^ (~C[0] & C[1]);
310 C[0] = ROL64(T[0][4], rhotates[0][4]);
311 C[1] = ROL64(T[1][0], rhotates[1][0]);
312 C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
313 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
314 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
316 A[3][0] = C[0] ^ (~C[1] & C[2]);
317 A[3][1] = C[1] ^ (~C[2] & C[3]);
318 A[3][2] = C[2] ^ (~C[3] & C[4]);
319 A[3][3] = C[3] ^ (~C[4] & C[0]);
320 A[3][4] = C[4] ^ (~C[0] & C[1]);
322 C[0] = ROL64(T[0][2], rhotates[0][2]);
323 C[1] = ROL64(T[1][3], rhotates[1][3]);
324 C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
325 C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
326 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
328 A[4][0] = C[0] ^ (~C[1] & C[2]);
329 A[4][1] = C[1] ^ (~C[2] & C[3]);
330 A[4][2] = C[2] ^ (~C[3] & C[4]);
331 A[4][3] = C[3] ^ (~C[4] & C[0]);
332 A[4][4] = C[4] ^ (~C[0] & C[1]);
335 void KeccakF1600(uint64_t A[5][5])
339 for (i = 0; i < 24; i++) {
344 #elif defined(KECCAK_1X_ALT)
346 * This is variant of above KECCAK_1X that presses temporary storage
347 * even further, but at cost of more write references to A[5][5].
348 * It's less suitable if A[5][5] is memory bound, but better if it's
352 static void Round(uint64_t A[5][5], size_t i)
356 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
358 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
359 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
360 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
361 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
362 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
364 D[0] = ROL64(C[1], 1) ^ C[4];
371 D[1] = ROL64(C[2], 1) ^ C[0];
378 D[2] = ROL64(C[3], 1) ^ C[1];
385 D[3] = ROL64(C[4], 1) ^ C[2];
392 D[4] = ROL64(C[0], 1) ^ C[3];
404 A[0][1] = ROL64(A[1][1], rhotates[1][1]);
405 A[0][2] = ROL64(A[2][2], rhotates[2][2]);
406 A[0][3] = ROL64(A[3][3], rhotates[3][3]);
407 A[0][4] = ROL64(A[4][4], rhotates[4][4]);
409 A[1][1] = ROL64(A[1][4], rhotates[1][4]);
410 A[2][2] = ROL64(A[2][3], rhotates[2][3]);
411 A[3][3] = ROL64(A[3][2], rhotates[3][2]);
412 A[4][4] = ROL64(A[4][1], rhotates[4][1]);
414 A[1][4] = ROL64(A[4][2], rhotates[4][2]);
415 A[2][3] = ROL64(A[3][4], rhotates[3][4]);
416 A[3][2] = ROL64(A[2][1], rhotates[2][1]);
417 A[4][1] = ROL64(A[1][3], rhotates[1][3]);
419 A[4][2] = ROL64(A[2][4], rhotates[2][4]);
420 A[3][4] = ROL64(A[4][3], rhotates[4][3]);
421 A[2][1] = ROL64(A[1][2], rhotates[1][2]);
422 A[1][3] = ROL64(A[3][1], rhotates[3][1]);
424 A[2][4] = ROL64(A[4][0], rhotates[4][0]);
425 A[4][3] = ROL64(A[3][0], rhotates[3][0]);
426 A[1][2] = ROL64(A[2][0], rhotates[2][0]);
427 A[3][1] = ROL64(A[1][0], rhotates[1][0]);
429 A[1][0] = ROL64(C[3], rhotates[0][3]);
430 A[2][0] = ROL64(C[1], rhotates[0][1]);
431 A[3][0] = ROL64(C[4], rhotates[0][4]);
432 A[4][0] = ROL64(C[2], rhotates[0][2]);
446 A[0][0] ^= (~A[0][1] & A[0][2]);
447 A[1][0] ^= (~A[1][1] & A[1][2]);
448 A[2][0] ^= (~A[2][1] & A[2][2]);
449 A[3][0] ^= (~A[3][1] & A[3][2]);
450 A[4][0] ^= (~A[4][1] & A[4][2]);
452 A[0][1] ^= (~A[0][2] & A[0][3]);
453 A[1][1] ^= (~A[1][2] & A[1][3]);
454 A[2][1] ^= (~A[2][2] & A[2][3]);
455 A[3][1] ^= (~A[3][2] & A[3][3]);
456 A[4][1] ^= (~A[4][2] & A[4][3]);
458 A[0][2] ^= (~A[0][3] & A[0][4]);
459 A[1][2] ^= (~A[1][3] & A[1][4]);
460 A[2][2] ^= (~A[2][3] & A[2][4]);
461 A[3][2] ^= (~A[3][3] & A[3][4]);
462 A[4][2] ^= (~A[4][3] & A[4][4]);
464 A[0][3] ^= (~A[0][4] & C[0]);
465 A[1][3] ^= (~A[1][4] & C[1]);
466 A[2][3] ^= (~A[2][4] & C[2]);
467 A[3][3] ^= (~A[3][4] & C[3]);
468 A[4][3] ^= (~A[4][4] & C[4]);
470 A[0][4] ^= (~C[0] & D[0]);
471 A[1][4] ^= (~C[1] & D[1]);
472 A[2][4] ^= (~C[2] & D[2]);
473 A[3][4] ^= (~C[3] & D[3]);
474 A[4][4] ^= (~C[4] & D[4]);
479 void KeccakF1600(uint64_t A[5][5])
483 for (i = 0; i < 24; i++) {
488 #elif defined(KECCAK_2X)
490 * This implementation is variant of KECCAK_1X above with outer-most
491 * round loop unrolled twice. This allows to take temporary storage
492 * out of round procedure and simplify references to it by alternating
493 * it with actual data (see round loop below). Just like original, it's
494 * rather meant as reference for an assembly implementation. It's likely
495 * to provide best instruction per processed byte ratio at minimal
496 * round unroll factor...
498 static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
502 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
504 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
505 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
506 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
507 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
508 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
510 D[0] = ROL64(C[1], 1) ^ C[4];
511 D[1] = ROL64(C[2], 1) ^ C[0];
512 D[2] = ROL64(C[3], 1) ^ C[1];
513 D[3] = ROL64(C[4], 1) ^ C[2];
514 D[4] = ROL64(C[0], 1) ^ C[3];
516 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
517 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
518 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
519 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
520 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
522 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
523 R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i];
524 R[0][1] = C[1] ^ (~C[2] | C[3]);
525 R[0][2] = C[2] ^ ( C[3] & C[4]);
526 R[0][3] = C[3] ^ ( C[4] | C[0]);
527 R[0][4] = C[4] ^ ( C[0] & C[1]);
529 R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
530 R[0][1] = C[1] ^ (~C[2] & C[3]);
531 R[0][2] = C[2] ^ (~C[3] & C[4]);
532 R[0][3] = C[3] ^ (~C[4] & C[0]);
533 R[0][4] = C[4] ^ (~C[0] & C[1]);
536 C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
537 C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
538 C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
539 C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
540 C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
542 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
543 R[1][0] = C[0] ^ (C[1] | C[2]);
544 R[1][1] = C[1] ^ (C[2] & C[3]);
545 R[1][2] = C[2] ^ (C[3] | ~C[4]);
546 R[1][3] = C[3] ^ (C[4] | C[0]);
547 R[1][4] = C[4] ^ (C[0] & C[1]);
549 R[1][0] = C[0] ^ (~C[1] & C[2]);
550 R[1][1] = C[1] ^ (~C[2] & C[3]);
551 R[1][2] = C[2] ^ (~C[3] & C[4]);
552 R[1][3] = C[3] ^ (~C[4] & C[0]);
553 R[1][4] = C[4] ^ (~C[0] & C[1]);
556 C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
557 C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
558 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
559 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
560 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
562 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
563 R[2][0] = C[0] ^ ( C[1] | C[2]);
564 R[2][1] = C[1] ^ ( C[2] & C[3]);
565 R[2][2] = C[2] ^ (~C[3] & C[4]);
566 R[2][3] = ~C[3] ^ ( C[4] | C[0]);
567 R[2][4] = C[4] ^ ( C[0] & C[1]);
569 R[2][0] = C[0] ^ (~C[1] & C[2]);
570 R[2][1] = C[1] ^ (~C[2] & C[3]);
571 R[2][2] = C[2] ^ (~C[3] & C[4]);
572 R[2][3] = C[3] ^ (~C[4] & C[0]);
573 R[2][4] = C[4] ^ (~C[0] & C[1]);
576 C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
577 C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
578 C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
579 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
580 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
582 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
583 R[3][0] = C[0] ^ ( C[1] & C[2]);
584 R[3][1] = C[1] ^ ( C[2] | C[3]);
585 R[3][2] = C[2] ^ (~C[3] | C[4]);
586 R[3][3] = ~C[3] ^ ( C[4] & C[0]);
587 R[3][4] = C[4] ^ ( C[0] | C[1]);
589 R[3][0] = C[0] ^ (~C[1] & C[2]);
590 R[3][1] = C[1] ^ (~C[2] & C[3]);
591 R[3][2] = C[2] ^ (~C[3] & C[4]);
592 R[3][3] = C[3] ^ (~C[4] & C[0]);
593 R[3][4] = C[4] ^ (~C[0] & C[1]);
596 C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
597 C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
598 C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
599 C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
600 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
602 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
603 R[4][0] = C[0] ^ (~C[1] & C[2]);
604 R[4][1] = ~C[1] ^ ( C[2] | C[3]);
605 R[4][2] = C[2] ^ ( C[3] & C[4]);
606 R[4][3] = C[3] ^ ( C[4] | C[0]);
607 R[4][4] = C[4] ^ ( C[0] & C[1]);
609 R[4][0] = C[0] ^ (~C[1] & C[2]);
610 R[4][1] = C[1] ^ (~C[2] & C[3]);
611 R[4][2] = C[2] ^ (~C[3] & C[4]);
612 R[4][3] = C[3] ^ (~C[4] & C[0]);
613 R[4][4] = C[4] ^ (~C[0] & C[1]);
617 void KeccakF1600(uint64_t A[5][5])
622 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
631 for (i = 0; i < 24; i += 2) {
636 #ifdef KECCAK_COMPLEMENTING_TRANSFORM
648 * This implementation is KECCAK_1X from above combined 4 times with
649 * a twist that allows to omit temporary storage and perform in-place
650 * processing. It's discussed in section 2.5 of "Keccak implementation
651 * overview". It's likely to be best suited for processors with large
654 static void FourRounds(uint64_t A[5][5], size_t i)
656 uint64_t B[5], C[5], D[5];
658 assert(i <= (sizeof(iotas) / sizeof(iotas[0]) - 4));
661 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
662 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
663 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
664 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
665 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
667 D[0] = ROL64(C[1], 1) ^ C[4];
668 D[1] = ROL64(C[2], 1) ^ C[0];
669 D[2] = ROL64(C[3], 1) ^ C[1];
670 D[3] = ROL64(C[4], 1) ^ C[2];
671 D[4] = ROL64(C[0], 1) ^ C[3];
673 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
674 B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
675 B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
676 B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
677 B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
679 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
680 C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
681 C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
682 C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
683 C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
685 B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
686 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
687 B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
688 B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
689 B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
691 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
692 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
693 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
694 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
695 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
697 B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
698 B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
699 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
700 B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
701 B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
703 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
704 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
705 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
706 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
707 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
709 B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
710 B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
711 B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
712 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
713 B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
715 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
716 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
717 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
718 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
719 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
721 B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
722 B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
723 B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
724 B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
725 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
727 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
728 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
729 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
730 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
731 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
734 D[0] = ROL64(C[1], 1) ^ C[4];
735 D[1] = ROL64(C[2], 1) ^ C[0];
736 D[2] = ROL64(C[3], 1) ^ C[1];
737 D[3] = ROL64(C[4], 1) ^ C[2];
738 D[4] = ROL64(C[0], 1) ^ C[3];
740 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
741 B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
742 B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
743 B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
744 B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
746 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
747 C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
748 C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
749 C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
750 C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
752 B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
753 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
754 B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
755 B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
756 B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
758 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
759 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
760 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
761 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
762 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
764 B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
765 B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
766 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
767 B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
768 B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
770 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
771 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
772 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
773 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
774 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
776 B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
777 B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
778 B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
779 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
780 B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
782 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
783 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
784 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
785 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
786 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
788 B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
789 B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
790 B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
791 B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
792 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
794 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
795 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
796 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
797 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
798 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
801 D[0] = ROL64(C[1], 1) ^ C[4];
802 D[1] = ROL64(C[2], 1) ^ C[0];
803 D[2] = ROL64(C[3], 1) ^ C[1];
804 D[3] = ROL64(C[4], 1) ^ C[2];
805 D[4] = ROL64(C[0], 1) ^ C[3];
807 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
808 B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
809 B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
810 B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
811 B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
813 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
814 C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
815 C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
816 C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
817 C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
819 B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
820 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
821 B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
822 B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
823 B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
825 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
826 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
827 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
828 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
829 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
831 B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
832 B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
833 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
834 B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
835 B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
837 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
838 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
839 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
840 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
841 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
843 B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
844 B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
845 B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
846 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
847 B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
849 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
850 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
851 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
852 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
853 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
855 B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
856 B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
857 B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
858 B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
859 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
861 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
862 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
863 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
864 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
865 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
868 D[0] = ROL64(C[1], 1) ^ C[4];
869 D[1] = ROL64(C[2], 1) ^ C[0];
870 D[2] = ROL64(C[3], 1) ^ C[1];
871 D[3] = ROL64(C[4], 1) ^ C[2];
872 D[4] = ROL64(C[0], 1) ^ C[3];
874 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
875 B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
876 B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
877 B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
878 B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
880 /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
881 /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
882 /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
883 /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
884 /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
886 B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
887 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
888 B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
889 B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
890 B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
892 /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
893 /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
894 /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
895 /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
896 /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
898 B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
899 B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
900 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
901 B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
902 B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
904 /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
905 /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
906 /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
907 /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
908 /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
910 B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
911 B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
912 B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
913 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
914 B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
916 /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
917 /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
918 /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
919 /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
920 /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
922 B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
923 B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
924 B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
925 B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
926 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
928 /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
929 /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
930 /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
931 /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
932 /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
935 void KeccakF1600(uint64_t A[5][5])
939 for (i = 0; i < 24; i += 4) {
946 static uint64_t BitInterleave(uint64_t Ai)
948 if (sizeof(void *) < 8) {
949 uint32_t hi = 0, lo = 0;
952 for (j = 0; j < 32; j++) {
953 lo |= ((uint32_t)(Ai >> (2 * j)) & 1) << j;
954 hi |= ((uint32_t)(Ai >> (2 * j + 1)) & 1) << j;
957 Ai = ((uint64_t)hi << 32) | lo;
963 static uint64_t BitDeinterleave(uint64_t Ai)
965 if (sizeof(void *) < 8) {
966 uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
970 for (j = 0; j < 32; j++) {
971 Ai |= (uint64_t)((lo >> j) & 1) << (2 * j);
972 Ai |= (uint64_t)((hi >> j) & 1) << (2 * j + 1);
980 * SHA3_absorb can be called multiple times, but at each invocation
981 * largest multiple of |r| out of |len| bytes are processed. Then
982 * remaining amount of bytes is returned. This is done to spare caller
983 * trouble of calculating the largest multiple of |r|. |r| can be viewed
984 * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
985 * 72, but can also be (1600 - 448)/8 = 144. All this means that message
986 * padding and intermediate sub-block buffering, byte- or bitwise, is
987 * caller's reponsibility.
989 size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
992 uint64_t *A_flat = (uint64_t *)A;
995 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
998 for (i = 0; i < w; i++) {
999 uint64_t Ai = (uint64_t)inp[0] | (uint64_t)inp[1] << 8 |
1000 (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 |
1001 (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 |
1002 (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
1005 A_flat[i] ^= BitInterleave(Ai);
1015 * SHA3_squeeze is called once at the end to generate |out| hash value
1018 void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r)
1020 uint64_t *A_flat = (uint64_t *)A;
1021 size_t i, rem, w = r / 8;
1023 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1026 for (i = 0; i < w; i++) {
1027 uint64_t Ai = BitDeinterleave(A_flat[i]);
1029 out[0] = (unsigned char)(Ai);
1030 out[1] = (unsigned char)(Ai >> 8);
1031 out[2] = (unsigned char)(Ai >> 16);
1032 out[3] = (unsigned char)(Ai >> 24);
1033 out[4] = (unsigned char)(Ai >> 32);
1034 out[5] = (unsigned char)(Ai >> 40);
1035 out[6] = (unsigned char)(Ai >> 48);
1036 out[7] = (unsigned char)(Ai >> 56);
1047 for (i = 0; i < len; i++) {
1048 uint64_t Ai = BitDeinterleave(A_flat[i]);
1050 out[0] = (unsigned char)(Ai);
1051 out[1] = (unsigned char)(Ai >> 8);
1052 out[2] = (unsigned char)(Ai >> 16);
1053 out[3] = (unsigned char)(Ai >> 24);
1054 out[4] = (unsigned char)(Ai >> 32);
1055 out[5] = (unsigned char)(Ai >> 40);
1056 out[6] = (unsigned char)(Ai >> 48);
1057 out[7] = (unsigned char)(Ai >> 56);
1062 uint64_t Ai = BitDeinterleave(A_flat[i]);
1064 for (i = 0; i < rem; i++) {
1065 *out++ = (unsigned char)Ai;
1073 * Post-padding one-shot implementations would look as following:
1075 * SHA3_224 SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
1076 * SHA3_256 SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
1077 * SHA3_384 SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
1078 * SHA3_512 SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
1079 * SHAKE_128 SHA3_sponge(inp, len, out, d, (1600-256)/8);
1080 * SHAKE_256 SHA3_sponge(inp, len, out, d, (1600-512)/8);
1083 void SHA3_sponge(const unsigned char *inp, size_t len,
1084 unsigned char *out, size_t d, size_t r)
1088 memset(A, 0, sizeof(A));
1089 SHA3_absorb(A, inp, len, r);
1090 SHA3_squeeze(A, out, d, r);
1098 * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
1100 unsigned char test[168] = { '\xf3', '\x3' };
1101 unsigned char out[512];
1103 static const unsigned char result[512] = {
1104 0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
1105 0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
1106 0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
1107 0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
1108 0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
1109 0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
1110 0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
1111 0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
1112 0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
1113 0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
1114 0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
1115 0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
1116 0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
1117 0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
1118 0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
1119 0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
1120 0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
1121 0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
1122 0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
1123 0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
1124 0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
1125 0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
1126 0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
1127 0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
1128 0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
1129 0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
1130 0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
1131 0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
1132 0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
1133 0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
1134 0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
1135 0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
1136 0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
1137 0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
1138 0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
1139 0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
1140 0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
1141 0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
1142 0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
1143 0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
1144 0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
1145 0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
1146 0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
1147 0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
1148 0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
1149 0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
1150 0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
1151 0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
1152 0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
1153 0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
1154 0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
1155 0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
1156 0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
1157 0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
1158 0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
1159 0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
1160 0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
1161 0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
1162 0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
1163 0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1164 0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1165 0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1166 0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1167 0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1171 SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1174 * Rationale behind keeping output [formatted as below] is that
1175 * one should be able to redirect it to a file, then copy-n-paste
1176 * final "output val" from official example to another file, and
1177 * compare the two with diff(1).
1179 for (i = 0; i < sizeof(out);) {
1180 printf("%02X", out[i]);
1181 printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1184 if (memcmp(out,result,sizeof(out))) {
1185 fprintf(stderr,"failure\n");
1188 fprintf(stderr,"success\n");