2 # Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
9 # ====================================================================
10 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11 # project. The module is, however, dual licensed under OpenSSL and
12 # CRYPTOGAMS licenses depending on where you obtain it. For further
13 # details see http://www.openssl.org/~appro/cryptogams/.
14 # ====================================================================
16 # Keccak-1600 for ARMv4.
20 # This is KECCAK_1X variant (see keccak1600.c) with bit interleaving.
21 # How does it compare to Keccak Code Package? It's as fast, but several
22 # times smaller, and is endian- and ISA-neutral. ISA neutrality means
23 # that minimum ISA requirement is ARMv4, yet it can be assembled even
26 ########################################################################
27 # Numbers are cycles per processed byte accounting even for input bit
32 # Cortex-A7 71/+180% 103
33 # Cortex-A8 48/+290% 69
34 # Cortex-A15 34/+210% 49
36 # (*) Not used in real life, meaningful as estimate for single sponge
37 # operation performance. Numbers after slash are improvement over
38 # compiler-generated KECCAK_1X reference code.
40 my @C = map("r$_",(0..9));
41 my @E = map("r$_",(10..12,14));
43 ########################################################################
45 # ----->+-----------------------+
46 # | uint64_t A[5][5] |
48 # +200->+-----------------------+
51 # +240->+-----------------------+
52 # | uint64_t T[2][5] |
54 # +320->+-----------------------+
56 # +324->+-----------------------+
58 # +328->+-----------------------+
61 my @A = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (0,5,10,15,20));
62 my @D = map(8*$_, (25..29));
63 my @T = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (30,35));
68 #if defined(__thumb2__)
78 .long 0x00000001, 0x00000000
79 .long 0x00000000, 0x00000089
80 .long 0x00000000, 0x8000008b
81 .long 0x00000000, 0x80008080
82 .long 0x00000001, 0x0000008b
83 .long 0x00000001, 0x00008000
84 .long 0x00000001, 0x80008088
85 .long 0x00000001, 0x80000082
86 .long 0x00000000, 0x0000000b
87 .long 0x00000000, 0x0000000a
88 .long 0x00000001, 0x00008082
89 .long 0x00000000, 0x00008003
90 .long 0x00000001, 0x0000808b
91 .long 0x00000001, 0x8000000b
92 .long 0x00000001, 0x8000008a
93 .long 0x00000001, 0x80000081
94 .long 0x00000000, 0x80000081
95 .long 0x00000000, 0x80000008
96 .long 0x00000000, 0x00000083
97 .long 0x00000000, 0x80008003
98 .long 0x00000001, 0x80008088
99 .long 0x00000000, 0x80000088
100 .long 0x00000001, 0x00008000
101 .long 0x00000000, 0x80008082
104 .type KeccakF1600_int, %function
107 ldmia sp,{@C[0]-@C[9]} @ A[0][0..4]
108 add @E[0],sp,#$A[1][0]
111 eor @E[1],@E[1],@E[1]
117 ldmia sp,{@C[0]-@C[9]} @ A[0][0..4]
119 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[1][0..1]
120 eor @C[0],@C[0],@E[0]
121 add @E[0],sp,#$A[1][2]
122 eor @C[1],@C[1],@E[1]
123 eor @C[2],@C[2],@E[2]
124 eor @C[3],@C[3],@E[3]
125 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[1][2..3]
126 eor @C[4],@C[4],@E[0]
127 add @E[0],sp,#$A[1][4]
128 eor @C[5],@C[5],@E[1]
129 eor @C[6],@C[6],@E[2]
130 eor @C[7],@C[7],@E[3]
131 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[1][4]..A[2][0]
132 eor @C[8],@C[8],@E[0]
133 add @E[0],sp,#$A[2][1]
134 eor @C[9],@C[9],@E[1]
135 eor @C[0],@C[0],@E[2]
136 eor @C[1],@C[1],@E[3]
137 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[2][1..2]
138 eor @C[2],@C[2],@E[0]
139 add @E[0],sp,#$A[2][3]
140 eor @C[3],@C[3],@E[1]
141 eor @C[4],@C[4],@E[2]
142 eor @C[5],@C[5],@E[3]
143 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[2][3..4]
144 eor @C[6],@C[6],@E[0]
145 add @E[0],sp,#$A[3][0]
146 eor @C[7],@C[7],@E[1]
147 eor @C[8],@C[8],@E[2]
148 eor @C[9],@C[9],@E[3]
149 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[3][0..1]
150 eor @C[0],@C[0],@E[0]
151 add @E[0],sp,#$A[3][2]
152 eor @C[1],@C[1],@E[1]
153 eor @C[2],@C[2],@E[2]
154 eor @C[3],@C[3],@E[3]
155 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[3][2..3]
156 eor @C[4],@C[4],@E[0]
157 add @E[0],sp,#$A[3][4]
158 eor @C[5],@C[5],@E[1]
159 eor @C[6],@C[6],@E[2]
160 eor @C[7],@C[7],@E[3]
161 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[3][4]..A[4][0]
162 eor @C[8],@C[8],@E[0]
163 add @E[0],sp,#$A[4][1]
164 eor @C[9],@C[9],@E[1]
165 eor @C[0],@C[0],@E[2]
166 eor @C[1],@C[1],@E[3]
167 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[4][1..2]
168 eor @C[2],@C[2],@E[0]
169 add @E[0],sp,#$A[4][3]
170 eor @C[3],@C[3],@E[1]
171 eor @C[4],@C[4],@E[2]
172 eor @C[5],@C[5],@E[3]
173 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[4][3..4]
174 eor @C[6],@C[6],@E[0]
175 eor @C[7],@C[7],@E[1]
176 eor @C[8],@C[8],@E[2]
177 eor @C[9],@C[9],@E[3]
179 eor @E[0],@C[0],@C[5],ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
180 eor @E[1],@C[1],@C[4]
181 str @E[0],[sp,#$D[1]] @ D[1] = E[0]
182 eor @E[2],@C[6],@C[1],ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
183 str @E[1],[sp,#$D[1]+4]
184 eor @E[3],@C[7],@C[0]
185 str @E[2],[sp,#$D[4]] @ D[4] = E[1]
186 eor @C[0],@C[8],@C[3],ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
187 str @E[3],[sp,#$D[4]+4]
188 eor @C[1],@C[9],@C[2]
189 str @C[0],[sp,#$D[0]] @ D[0] = C[0]
190 eor @C[2],@C[2],@C[7],ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
191 str @C[1],[sp,#$D[0]+4]
192 eor @C[3],@C[3],@C[6]
193 str @C[2],[sp,#$D[2]] @ D[2] = C[1]
194 eor @C[4],@C[4],@C[9],ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
195 str @C[3],[sp,#$D[2]+4]
196 eor @C[5],@C[5],@C[8]
197 ldr @C[8],[sp,#$A[3][0]]
198 ldr @C[9],[sp,#$A[3][0]+4]
199 str @C[4],[sp,#$D[3]] @ D[3] = C[2]
200 str @C[5],[sp,#$D[3]+4]
202 ldr @C[6],[sp,#$A[0][1]]
203 eor @C[8],@C[8],@C[0]
204 ldr @C[7],[sp,#$A[0][1]+4]
205 eor @C[9],@C[9],@C[1]
206 str @C[8],[sp,#$T[0][0]] @ T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
207 ldr @C[8],[sp,#$A[0][2]]
208 str @C[9],[sp,#$T[0][0]+4]
209 ldr @C[9],[sp,#$A[0][2]+4]
210 eor @C[6],@C[6],@E[0]
211 eor @C[7],@C[7],@E[1]
212 str @C[6],[sp,#$T[0][1]] @ T[0][1] = A[0][1] ^ E[0]; /* D[1] */
213 ldr @C[6],[sp,#$A[0][3]]
214 str @C[7],[sp,#$T[0][1]+4]
215 ldr @C[7],[sp,#$A[0][3]+4]
216 eor @C[8],@C[8],@C[2]
217 eor @C[9],@C[9],@C[3]
218 str @C[8],[sp,#$T[0][2]] @ T[0][2] = A[0][2] ^ C[1]; /* D[2] */
219 ldr @C[8],[sp,#$A[0][4]]
220 str @C[9],[sp,#$T[0][2]+4]
221 ldr @C[9],[sp,#$A[0][4]+4]
222 eor @C[6],@C[6],@C[4]
223 eor @C[7],@C[7],@C[5]
224 str @C[6],[sp,#$T[0][3]] @ T[0][3] = A[0][3] ^ C[2]; /* D[3] */
225 eor @C[8],@C[8],@E[2]
226 str @C[7],[sp,#$T[0][3]+4]
227 eor @C[9],@C[9],@E[3]
228 ldr @C[6],[sp,#$A[3][3]]
229 ldr @C[7],[sp,#$A[3][3]+4]
230 str @C[8],[sp,#$T[0][4]] @ T[0][4] = A[0][4] ^ E[1]; /* D[4] */
231 str @C[9],[sp,#$T[0][4]+4]
233 ldr @C[8],[sp,#$A[4][4]]
234 eor @C[4],@C[4],@C[6]
235 ldr @C[9],[sp,#$A[4][4]+4]
236 eor @C[5],@C[5],@C[7]
237 ror @C[7],@C[4],#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
238 ldr @C[4],[sp,#$A[0][0]]
239 ror @C[6],@C[5],#32-11
240 ldr @C[5],[sp,#$A[0][0]+4]
241 eor @C[8],@C[8],@E[2]
242 eor @C[9],@C[9],@E[3]
243 ror @C[8],@C[8],#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
244 ldr @E[2],[sp,#$A[2][2]]
245 ror @C[9],@C[9],#32-7
246 ldr @E[3],[sp,#$A[2][2]+4]
247 eor @C[0],@C[0],@C[4]
248 eor @C[1],@C[1],@C[5] @ C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
249 eor @E[2],@E[2],@C[2]
250 ldr @C[2],[sp,#$A[1][1]]
251 eor @E[3],@E[3],@C[3]
252 ldr @C[3],[sp,#$A[1][1]+4]
253 ror @C[5],@E[2],#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
254 ldr @E[2],[sp,#324] @ load counter
255 eor @C[2],@C[2],@E[0]
256 ror @C[4],@E[3],#32-22
258 eor @C[3],@C[3],@E[1]
259 ror @C[2],@C[2],#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
260 add @E[3],@E[3],@E[2]
261 ror @C[3],@C[3],#32-22
263 ldr @E[0],[@E[3],#0] @ iotas[i].lo
265 ldr @E[1],[@E[3],#4] @ iotas[i].hi
267 str @E[2],[sp,#324] @ store counter
269 bic @E[2],@C[4],@C[2]
270 bic @E[3],@C[5],@C[3]
271 eor @E[2],@E[2],@C[0]
272 eor @E[3],@E[3],@C[1]
273 eor @E[0],@E[0],@E[2]
274 eor @E[1],@E[1],@E[3]
275 str @E[0],[sp,#$A[0][0]] @ A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
276 bic @E[2],@C[6],@C[4]
277 str @E[1],[sp,#$A[0][0]+4]
278 bic @E[3],@C[7],@C[5]
279 eor @E[2],@E[2],@C[2]
280 eor @E[3],@E[3],@C[3]
281 str @E[2],[sp,#$A[0][1]] @ A[0][1] = C[1] ^ (~C[2] & C[3]);
282 bic @E[0],@C[8],@C[6]
283 str @E[3],[sp,#$A[0][1]+4]
284 bic @E[1],@C[9],@C[7]
285 eor @E[0],@E[0],@C[4]
286 eor @E[1],@E[1],@C[5]
287 str @E[0],[sp,#$A[0][2]] @ A[0][2] = C[2] ^ (~C[3] & C[4]);
288 bic @E[2],@C[0],@C[8]
289 str @E[1],[sp,#$A[0][2]+4]
290 bic @E[3],@C[1],@C[9]
291 eor @E[2],@E[2],@C[6]
292 eor @E[3],@E[3],@C[7]
293 str @E[2],[sp,#$A[0][3]] @ A[0][3] = C[3] ^ (~C[4] & C[0]);
294 bic @E[0],@C[2],@C[0]
295 str @E[3],[sp,#$A[0][3]+4]
297 bic @E[1],@C[3],@C[1]
298 eor @E[0],@E[0],@C[8]
299 eor @E[1],@E[1],@C[9]
300 str @E[0],[sp,#$A[0][4]] @ A[0][4] = C[4] ^ (~C[0] & C[1]);
301 str @E[1],[sp,#$A[0][4]+4]
303 ldmia @E[3],{@C[6]-@C[9],@E[0],@E[1],@E[2],@E[3]} @ D[0..3]
304 ldr @C[0],[sp,#$A[1][0]]
305 ldr @C[1],[sp,#$A[1][0]+4]
306 ldr @C[2],[sp,#$A[2][1]]
307 ldr @C[3],[sp,#$A[2][1]+4]
308 ldr @C[4],[sp,#$D[4]]
309 eor @C[0],@C[0],@C[6]
310 ldr @C[5],[sp,#$D[4]+4]
311 eor @C[1],@C[1],@C[7]
312 str @C[0],[sp,#$T[1][0]] @ T[1][0] = A[1][0] ^ (C[3] = D[0]);
313 add @C[0],sp,#$A[1][2]
314 str @C[1],[sp,#$T[1][0]+4]
315 eor @C[2],@C[2],@C[8]
316 eor @C[3],@C[3],@C[9]
317 str @C[2],[sp,#$T[1][1]] @ T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
318 str @C[3],[sp,#$T[1][1]+4]
319 ldmia @C[0],{@C[0]-@C[3]} @ A[1][2..3]
320 eor @C[0],@C[0],@E[0]
321 eor @C[1],@C[1],@E[1]
322 str @C[0],[sp,#$T[1][2]] @ T[1][2] = A[1][2] ^ (E[0] = D[2]);
323 ldr @C[0],[sp,#$A[2][4]]
324 str @C[1],[sp,#$T[1][2]+4]
325 ldr @C[1],[sp,#$A[2][4]+4]
326 eor @C[2],@C[2],@E[2]
327 eor @C[3],@C[3],@E[3]
328 str @C[2],[sp,#$T[1][3]] @ T[1][3] = A[1][3] ^ (E[1] = D[3]);
329 ldr @C[2],[sp,#$T[0][3]]
330 str @C[3],[sp,#$T[1][3]+4]
331 ldr @C[3],[sp,#$T[0][3]+4]
332 eor @C[0],@C[0],@C[4]
333 ldr @E[2],[sp,#$A[1][4]]
334 eor @C[1],@C[1],@C[5]
335 ldr @E[3],[sp,#$A[1][4]+4]
336 str @C[0],[sp,#$T[1][4]] @ T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
338 ror @C[0],@C[2],#32-14 @ C[0] = ROL64(T[0][3], rhotates[0][3]);
339 str @C[1],[sp,#$T[1][4]+4]
340 ror @C[1],@C[3],#32-14
341 eor @C[2],@E[2],@C[4]
342 ldr @C[4],[sp,#$A[2][0]]
343 eor @C[3],@E[3],@C[5]
344 ldr @C[5],[sp,#$A[2][0]+4]
345 ror @C[2],@C[2],#32-10 @ C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */
346 ldr @E[2],[sp,#$A[3][1]]
347 ror @C[3],@C[3],#32-10
348 ldr @E[3],[sp,#$A[3][1]+4]
349 eor @C[6],@C[6],@C[4]
350 eor @C[7],@C[7],@C[5]
351 ror @C[5],@C[6],#32-1 @ C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */
352 eor @E[2],@E[2],@C[8]
353 ror @C[4],@C[7],#32-2
354 ldr @C[8],[sp,#$A[4][2]]
355 eor @E[3],@E[3],@C[9]
356 ldr @C[9],[sp,#$A[4][2]+4]
357 ror @C[7],@E[2],#32-22 @ C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */
358 eor @E[0],@E[0],@C[8]
359 ror @C[6],@E[3],#32-23
360 eor @E[1],@E[1],@C[9]
361 ror @C[9],@E[0],#32-30 @ C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */
363 bic @E[0],@C[4],@C[2]
364 ror @C[8],@E[1],#32-31
365 bic @E[1],@C[5],@C[3]
366 eor @E[0],@E[0],@C[0]
367 eor @E[1],@E[1],@C[1]
368 str @E[0],[sp,#$A[1][0]] @ A[1][0] = C[0] ^ (~C[1] & C[2])
369 bic @E[2],@C[6],@C[4]
370 str @E[1],[sp,#$A[1][0]+4]
371 bic @E[3],@C[7],@C[5]
372 eor @E[2],@E[2],@C[2]
373 eor @E[3],@E[3],@C[3]
374 str @E[2],[sp,#$A[1][1]] @ A[1][1] = C[1] ^ (~C[2] & C[3]);
375 bic @E[0],@C[8],@C[6]
376 str @E[3],[sp,#$A[1][1]+4]
377 bic @E[1],@C[9],@C[7]
378 eor @E[0],@E[0],@C[4]
379 eor @E[1],@E[1],@C[5]
380 str @E[0],[sp,#$A[1][2]] @ A[1][2] = C[2] ^ (~C[3] & C[4]);
381 bic @E[2],@C[0],@C[8]
382 str @E[1],[sp,#$A[1][2]+4]
383 bic @E[3],@C[1],@C[9]
384 eor @E[2],@E[2],@C[6]
385 eor @E[3],@E[3],@C[7]
386 str @E[2],[sp,#$A[1][3]] @ A[1][3] = C[3] ^ (~C[4] & C[0]);
387 bic @E[0],@C[2],@C[0]
388 str @E[3],[sp,#$A[1][3]+4]
390 bic @E[1],@C[3],@C[1]
391 ldr @C[1],[sp,#$T[0][1]]
392 eor @E[0],@E[0],@C[8]
393 ldr @C[0],[sp,#$T[0][1]+4]
394 eor @E[1],@E[1],@C[9]
395 str @E[0],[sp,#$A[1][4]] @ A[1][4] = C[4] ^ (~C[0] & C[1]);
396 str @E[1],[sp,#$A[1][4]+4]
398 ldr @C[2],[sp,#$T[1][2]]
399 ldr @C[3],[sp,#$T[1][2]+4]
400 ldmia @E[3],{@E[0]-@E[2],@E[3]} @ D[3..4]
401 ldr @C[4],[sp,#$A[2][3]]
402 ror @C[0],@C[0],#32-1 @ C[0] = ROL64(T[0][1], rhotates[0][1]);
403 ldr @C[5],[sp,#$A[2][3]+4]
404 ror @C[2],@C[2],#32-3 @ C[1] = ROL64(T[1][2], rhotates[1][2]);
405 ldr @C[6],[sp,#$A[3][4]]
406 ror @C[3],@C[3],#32-3
407 ldr @C[7],[sp,#$A[3][4]+4]
408 eor @E[0],@E[0],@C[4]
409 ldr @C[8],[sp,#$A[4][0]]
410 eor @E[1],@E[1],@C[5]
411 ldr @C[9],[sp,#$A[4][0]+4]
412 ror @C[5],@E[0],#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
413 ldr @E[0],[sp,#$D[0]]
414 ror @C[4],@E[1],#32-13
415 ldr @E[1],[sp,#$D[0]+4]
416 eor @C[6],@C[6],@E[2]
417 eor @C[7],@C[7],@E[3]
418 ror @C[6],@C[6],#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
419 eor @C[8],@C[8],@E[0]
420 ror @C[7],@C[7],#32-4
421 eor @C[9],@C[9],@E[1]
422 ror @C[8],@C[8],#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
424 bic @E[0],@C[4],@C[2]
425 ror @C[9],@C[9],#32-9
426 bic @E[1],@C[5],@C[3]
427 eor @E[0],@E[0],@C[0]
428 eor @E[1],@E[1],@C[1]
429 str @E[0],[sp,#$A[2][0]] @ A[2][0] = C[0] ^ (~C[1] & C[2])
430 bic @E[2],@C[6],@C[4]
431 str @E[1],[sp,#$A[2][0]+4]
432 bic @E[3],@C[7],@C[5]
433 eor @E[2],@E[2],@C[2]
434 eor @E[3],@E[3],@C[3]
435 str @E[2],[sp,#$A[2][1]] @ A[2][1] = C[1] ^ (~C[2] & C[3]);
436 bic @E[0],@C[8],@C[6]
437 str @E[3],[sp,#$A[2][1]+4]
438 bic @E[1],@C[9],@C[7]
439 eor @E[0],@E[0],@C[4]
440 eor @E[1],@E[1],@C[5]
441 str @E[0],[sp,#$A[2][2]] @ A[2][2] = C[2] ^ (~C[3] & C[4]);
442 bic @E[2],@C[0],@C[8]
443 str @E[1],[sp,#$A[2][2]+4]
444 bic @E[3],@C[1],@C[9]
445 eor @E[2],@E[2],@C[6]
446 eor @E[3],@E[3],@C[7]
447 str @E[2],[sp,#$A[2][3]] @ A[2][3] = C[3] ^ (~C[4] & C[0]);
448 bic @E[0],@C[2],@C[0]
449 str @E[3],[sp,#$A[2][3]+4]
450 bic @E[1],@C[3],@C[1]
451 eor @E[0],@E[0],@C[8]
452 eor @E[1],@E[1],@C[9]
453 str @E[0],[sp,#$A[2][4]] @ A[2][4] = C[4] ^ (~C[0] & C[1]);
454 add @C[2],sp,#$T[1][0]
455 str @E[1],[sp,#$A[2][4]+4]
458 ldr @C[1],[sp,#$T[0][4]]
459 ldr @C[0],[sp,#$T[0][4]+4]
460 ldmia @C[2],{@C[2]-@C[5]} @ T[1][0..1]
461 ldmia @E[3],{@E[0]-@E[2],@E[3]} @ D[2..3]
462 ror @C[1],@C[1],#32-13 @ C[0] = ROL64(T[0][4], rhotates[0][4]);
463 ldr @C[6],[sp,#$A[3][2]]
464 ror @C[0],@C[0],#32-14
465 ldr @C[7],[sp,#$A[3][2]+4]
466 ror @C[2],@C[2],#32-18 @ C[1] = ROL64(T[1][0], rhotates[1][0]);
467 ldr @C[8],[sp,#$A[4][3]]
468 ror @C[3],@C[3],#32-18
469 ldr @C[9],[sp,#$A[4][3]+4]
470 ror @C[4],@C[4],#32-5 @ C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
471 eor @E[0],@E[0],@C[6]
472 ror @C[5],@C[5],#32-5
473 eor @E[1],@E[1],@C[7]
474 ror @C[7],@E[0],#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
475 eor @C[8],@C[8],@E[2]
476 ror @C[6],@E[1],#32-8
477 eor @C[9],@C[9],@E[3]
478 ror @C[8],@C[8],#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
480 bic @E[0],@C[4],@C[2]
481 ror @C[9],@C[9],#32-28
482 bic @E[1],@C[5],@C[3]
483 eor @E[0],@E[0],@C[0]
484 eor @E[1],@E[1],@C[1]
485 str @E[0],[sp,#$A[3][0]] @ A[3][0] = C[0] ^ (~C[1] & C[2])
486 bic @E[2],@C[6],@C[4]
487 str @E[1],[sp,#$A[3][0]+4]
488 bic @E[3],@C[7],@C[5]
489 eor @E[2],@E[2],@C[2]
490 eor @E[3],@E[3],@C[3]
491 str @E[2],[sp,#$A[3][1]] @ A[3][1] = C[1] ^ (~C[2] & C[3]);
492 bic @E[0],@C[8],@C[6]
493 str @E[3],[sp,#$A[3][1]+4]
494 bic @E[1],@C[9],@C[7]
495 eor @E[0],@E[0],@C[4]
496 eor @E[1],@E[1],@C[5]
497 str @E[0],[sp,#$A[3][2]] @ A[3][2] = C[2] ^ (~C[3] & C[4]);
498 bic @E[2],@C[0],@C[8]
499 str @E[1],[sp,#$A[3][2]+4]
500 bic @E[3],@C[1],@C[9]
501 eor @E[2],@E[2],@C[6]
502 eor @E[3],@E[3],@C[7]
503 str @E[2],[sp,#$A[3][3]] @ A[3][3] = C[3] ^ (~C[4] & C[0]);
504 bic @E[0],@C[2],@C[0]
505 str @E[3],[sp,#$A[3][3]+4]
506 bic @E[1],@C[3],@C[1]
507 eor @E[0],@E[0],@C[8]
508 eor @E[1],@E[1],@C[9]
509 str @E[0],[sp,#$A[3][4]] @ A[3][4] = C[4] ^ (~C[0] & C[1]);
510 add @E[3],sp,#$T[1][3]
511 str @E[1],[sp,#$A[3][4]+4]
513 ldr @C[0],[sp,#$T[0][2]]
514 ldr @C[1],[sp,#$T[0][2]+4]
515 ldmia @E[3],{@E[0]-@E[2],@E[3]} @ T[1][3..4]
516 ldr @C[7],[sp,#$T[0][0]]
517 ror @C[0],@C[0],#32-31 @ C[0] = ROL64(T[0][2], rhotates[0][2]);
518 ldr @C[6],[sp,#$T[0][0]+4]
519 ror @C[1],@C[1],#32-31
520 ldr @C[8],[sp,#$A[4][1]]
521 ror @C[3],@E[0],#32-27 @ C[1] = ROL64(T[1][3], rhotates[1][3]);
522 ldr @E[0],[sp,#$D[1]]
523 ror @C[2],@E[1],#32-28
524 ldr @C[9],[sp,#$A[4][1]+4]
525 ror @C[5],@E[2],#32-19 @ C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
526 ldr @E[1],[sp,#$D[1]+4]
527 ror @C[4],@E[3],#32-20
528 eor @C[8],@C[8],@E[0]
529 ror @C[7],@C[7],#32-20 @ C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
530 eor @C[9],@C[9],@E[1]
531 ror @C[6],@C[6],#32-21
533 bic @E[0],@C[4],@C[2]
534 ror @C[8],@C[8],#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
535 bic @E[1],@C[5],@C[3]
536 ror @C[9],@C[9],#32-1
537 eor @E[0],@E[0],@C[0]
538 eor @E[1],@E[1],@C[1]
539 str @E[0],[sp,#$A[4][0]] @ A[4][0] = C[0] ^ (~C[1] & C[2])
540 bic @E[2],@C[6],@C[4]
541 str @E[1],[sp,#$A[4][0]+4]
542 bic @E[3],@C[7],@C[5]
543 eor @E[2],@E[2],@C[2]
544 eor @E[3],@E[3],@C[3]
545 str @E[2],[sp,#$A[4][1]] @ A[4][1] = C[1] ^ (~C[2] & C[3]);
546 bic @E[0],@C[8],@C[6]
547 str @E[3],[sp,#$A[4][1]+4]
548 bic @E[1],@C[9],@C[7]
549 eor @E[0],@E[0],@C[4]
550 eor @E[1],@E[1],@C[5]
551 str @E[0],[sp,#$A[4][2]] @ A[4][2] = C[2] ^ (~C[3] & C[4]);
552 bic @E[2],@C[0],@C[8]
553 str @E[1],[sp,#$A[4][2]+4]
554 bic @E[3],@C[1],@C[9]
555 eor @E[2],@E[2],@C[6]
556 eor @E[3],@E[3],@C[7]
557 str @E[2],[sp,#$A[4][3]] @ A[4][3] = C[3] ^ (~C[4] & C[0]);
558 bic @E[0],@C[2],@C[0]
559 str @E[3],[sp,#$A[4][3]+4]
560 bic @E[1],@C[3],@C[1]
561 eor @E[2],@E[0],@C[8]
562 eor @E[3],@E[1],@C[9]
563 str @E[2],[sp,#$A[4][4]] @ A[4][4] = C[4] ^ (~C[0] & C[1]);
564 add @E[0],sp,#$A[1][0]
565 str @E[3],[sp,#$A[4][4]+4]
570 .size KeccakF1600_int,.-KeccakF1600_int
572 .type KeccakF1600, %function
575 stmdb sp!,{r0,r4-r11,lr}
576 sub sp,sp,#320+16 @ space for A[5][5],D[5],T[2][5],...
578 add @E[0],r0,#$A[1][0]
579 add @E[1],sp,#$A[1][0]
581 ldmia @E[0]!,{@C[0]-@C[9]} @ copy A[5][5] to stack
582 stmia @E[1]!,{@C[0]-@C[9]}
583 ldmia @E[0]!,{@C[0]-@C[9]}
584 stmia @E[1]!,{@C[0]-@C[9]}
585 ldmia @E[0]!,{@C[0]-@C[9]}
586 stmia @E[1]!,{@C[0]-@C[9]}
587 ldmia @E[0], {@C[0]-@C[9]}
588 stmia @E[1], {@C[0]-@C[9]}
589 ldmia @E[2], {@C[0]-@C[9]} @ A[0][0..4]
590 add @E[0],sp,#$A[1][0]
591 stmia sp, {@C[0]-@C[9]}
595 ldr @E[1], [sp,#320+16] @ restore pointer to A
596 ldmia sp, {@C[0]-@C[9]}
597 stmia @E[1]!,{@C[0]-@C[9]} @ return A[5][5]
598 ldmia @E[0]!,{@C[0]-@C[9]}
599 stmia @E[1]!,{@C[0]-@C[9]}
600 ldmia @E[0]!,{@C[0]-@C[9]}
601 stmia @E[1]!,{@C[0]-@C[9]}
602 ldmia @E[0]!,{@C[0]-@C[9]}
603 stmia @E[1]!,{@C[0]-@C[9]}
604 ldmia @E[0], {@C[0]-@C[9]}
605 stmia @E[1], {@C[0]-@C[9]}
608 ldmia sp!,{r4-r11,pc}
609 .size KeccakF1600,.-KeccakF1600
611 { my ($hi,$lo,$i,$A_flat, $len,$bsz,$inp) = map("r$_",(5..8, 10..12));
613 ########################################################################
615 # ----->+-----------------------+
616 # | uint64_t A[5][5] |
619 # +336->+-----------------------+
621 # +340->+-----------------------+
622 # | const void *inp |
623 # +344->+-----------------------+
625 # +348->+-----------------------+
627 # +352->+-----------------------+
632 .type SHA3_absorb,%function
635 stmdb sp!,{r0-r12,lr}
643 ldmia r12!,{@C[0]-@C[9]} @ copy A[5][5] to stack
644 stmia r14!,{@C[0]-@C[9]}
645 ldmia r12!,{@C[0]-@C[9]}
646 stmia r14!,{@C[0]-@C[9]}
647 ldmia r12!,{@C[0]-@C[9]}
648 stmia r14!,{@C[0]-@C[9]}
649 ldmia r12!,{@C[0]-@C[9]}
650 stmia r14!,{@C[0]-@C[9]}
651 ldmia r12, {@C[0]-@C[9]}
652 stmia r14, {@C[0]-@C[9]}
660 str r0,[sp,#344] @ save len - bsz
663 ldmia $A_flat,{r2-r3} @ A_flat[i]
664 ldrb r0,[$inp,#7]! @ inp[7]
673 ldrbne r0,[$inp,#-1]!
675 ldrneb r0,[$inp,#-1]!
677 adds r1,r1,r1 @ sip through carry flag
699 stmia $A_flat!,{r2-r3} @ A_flat[i++] ^= BitInterleave(inp[0..7])
715 ldr r14, [sp,#336] @ pull pointer to A[5][5]
716 ldmia sp, {@C[0]-@C[9]}
717 stmia r14!,{@C[0]-@C[9]} @ return A[5][5]
718 ldmia r12!,{@C[0]-@C[9]}
719 stmia r14!,{@C[0]-@C[9]}
720 ldmia r12!,{@C[0]-@C[9]}
721 stmia r14!,{@C[0]-@C[9]}
722 ldmia r12!,{@C[0]-@C[9]}
723 stmia r14!,{@C[0]-@C[9]}
724 ldmia r12, {@C[0]-@C[9]}
725 stmia r14, {@C[0]-@C[9]}
728 mov r0,$len @ return value
729 ldmia sp!,{r4-r12,pc}
730 .size SHA3_absorb,.-SHA3_absorb
733 { my ($A_flat,$out,$len,$bsz, $byte,$shl) = map("r$_", (4..9));
737 .type SHA3_squeeze,%function
740 stmdb sp!,{r4-r10,lr}
751 ldmia r12!,{r0,r1} @ A_flat[i++]
757 eor $byte,$byte,$byte
758 adds r3,r3,r3 @ sip through carry flag
759 adc $byte,$byte,$byte
761 adc $byte,$byte,$byte
763 adc $byte,$byte,$byte
765 adc $byte,$byte,$byte
767 adc $byte,$byte,$byte
769 adc $byte,$byte,$byte
771 adc $byte,$byte,$byte
773 adc $byte,$byte,$byte
774 subs $len,$len,#1 @ len -= 1
780 subs r14,r14,#8 @ bsz -= 8
792 ldmia sp!,{r4-r10,pc}
793 .size SHA3_squeeze,.-SHA3_squeeze
794 .asciz "Keccak-1600 absorb and squeeze for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
801 close STDOUT; # enforce flush