2 # Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
11 my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
12 my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
15 $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1;
16 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
17 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate ) or
18 die "can't locate arm-xlate.pl";
20 open OUT,"| \"$^X\" $xlate $flavour $output";
26 close STDOUT or die "error closing STDOUT: $!"; # enforce flush
35 // Copyright 2021 The OpenSSL Project Authors. All Rights Reserved.
37 // Licensed under the OpenSSL license (the "License"). You may not use
38 // this file except in compliance with the License. You can obtain a copy
39 // in the file LICENSE in the source distribution or at
40 // https://www.openssl.org/source/license.html
42 // ====================================================================
43 // Written by Ben Avison <bavison@riscosopen.org> for the OpenSSL
44 // project. Rights for redistribution and usage in source and binary
45 // forms are granted according to the OpenSSL license.
46 // ====================================================================
48 // This implementation is a translation of bsaes-armv7 for AArch64.
49 // No attempt has been made to carry across the build switches for
50 // kernel targets, since the Linux kernel crypto support has moved on
51 // from when it was based on OpenSSL.
53 // A lot of hand-scheduling has been performed. Consequently, this code
54 // doesn't factor out neatly into macros in the same way that the
55 // AArch32 version did, and there is little to be gained by wrapping it
56 // up in Perl, and it is presented as pure assembly.
59 #include "crypto/arm_arch.h"
63 .extern AES_cbc_encrypt
67 .type _bsaes_decrypt8,%function
70 // x9 -> key (previously expanded using _bsaes_key_convert)
71 // x10 = number of rounds
75 // other general-purpose registers preserved
78 // other SIMD registers corrupted
87 eor v0.16b, v0.16b, v8.16b
88 eor v1.16b, v1.16b, v8.16b
89 eor v2.16b, v2.16b, v8.16b
90 eor v4.16b, v4.16b, v8.16b
91 eor v3.16b, v3.16b, v8.16b
92 eor v5.16b, v5.16b, v8.16b
93 tbl v0.16b, {v0.16b}, v10.16b
94 tbl v1.16b, {v1.16b}, v10.16b
95 tbl v2.16b, {v2.16b}, v10.16b
96 tbl v4.16b, {v4.16b}, v10.16b
97 eor v6.16b, v6.16b, v8.16b
98 eor v7.16b, v7.16b, v8.16b
99 tbl v3.16b, {v3.16b}, v10.16b
100 tbl v5.16b, {v5.16b}, v10.16b
101 tbl v6.16b, {v6.16b}, v10.16b
102 ushr v8.2d, v0.2d, #1
103 tbl v7.16b, {v7.16b}, v10.16b
104 ushr v10.2d, v4.2d, #1
105 ushr v18.2d, v2.2d, #1
106 eor v8.16b, v8.16b, v1.16b
107 ushr v19.2d, v6.2d, #1
108 eor v10.16b, v10.16b, v5.16b
109 eor v18.16b, v18.16b, v3.16b
110 and v8.16b, v8.16b, v9.16b
111 eor v19.16b, v19.16b, v7.16b
112 and v10.16b, v10.16b, v9.16b
113 and v18.16b, v18.16b, v9.16b
114 eor v1.16b, v1.16b, v8.16b
116 and v9.16b, v19.16b, v9.16b
117 eor v5.16b, v5.16b, v10.16b
118 shl v10.2d, v10.2d, #1
119 eor v3.16b, v3.16b, v18.16b
120 shl v18.2d, v18.2d, #1
121 eor v0.16b, v0.16b, v8.16b
123 eor v7.16b, v7.16b, v9.16b
124 eor v4.16b, v4.16b, v10.16b
125 eor v2.16b, v2.16b, v18.16b
126 ushr v9.2d, v1.2d, #2
127 eor v6.16b, v6.16b, v8.16b
128 ushr v8.2d, v0.2d, #2
129 ushr v10.2d, v5.2d, #2
130 ushr v18.2d, v4.2d, #2
131 eor v9.16b, v9.16b, v3.16b
132 eor v8.16b, v8.16b, v2.16b
133 eor v10.16b, v10.16b, v7.16b
134 eor v18.16b, v18.16b, v6.16b
135 and v9.16b, v9.16b, v16.16b
136 and v8.16b, v8.16b, v16.16b
137 and v10.16b, v10.16b, v16.16b
138 and v16.16b, v18.16b, v16.16b
139 eor v3.16b, v3.16b, v9.16b
141 eor v2.16b, v2.16b, v8.16b
143 eor v7.16b, v7.16b, v10.16b
144 shl v10.2d, v10.2d, #2
145 eor v6.16b, v6.16b, v16.16b
146 shl v16.2d, v16.2d, #2
147 eor v1.16b, v1.16b, v9.16b
148 eor v0.16b, v0.16b, v8.16b
149 eor v5.16b, v5.16b, v10.16b
150 eor v4.16b, v4.16b, v16.16b
151 ushr v8.2d, v3.2d, #4
152 ushr v9.2d, v2.2d, #4
153 ushr v10.2d, v1.2d, #4
154 ushr v16.2d, v0.2d, #4
155 eor v8.16b, v8.16b, v7.16b
156 eor v9.16b, v9.16b, v6.16b
157 eor v10.16b, v10.16b, v5.16b
158 eor v16.16b, v16.16b, v4.16b
159 and v8.16b, v8.16b, v17.16b
160 and v9.16b, v9.16b, v17.16b
161 and v10.16b, v10.16b, v17.16b
162 and v16.16b, v16.16b, v17.16b
163 eor v7.16b, v7.16b, v8.16b
165 eor v6.16b, v6.16b, v9.16b
167 eor v5.16b, v5.16b, v10.16b
168 shl v10.2d, v10.2d, #4
169 eor v4.16b, v4.16b, v16.16b
170 shl v16.2d, v16.2d, #4
171 eor v3.16b, v3.16b, v8.16b
172 eor v2.16b, v2.16b, v9.16b
173 eor v1.16b, v1.16b, v10.16b
174 eor v0.16b, v0.16b, v16.16b
178 ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64
179 ldp q8, q9, [x9], #32
180 eor v0.16b, v16.16b, v0.16b
182 eor v1.16b, v17.16b, v1.16b
184 eor v2.16b, v18.16b, v2.16b
185 eor v3.16b, v19.16b, v3.16b
186 eor v4.16b, v8.16b, v4.16b
187 eor v5.16b, v9.16b, v5.16b
188 eor v6.16b, v10.16b, v6.16b
189 eor v7.16b, v16.16b, v7.16b
190 tbl v0.16b, {v0.16b}, v28.16b
191 tbl v1.16b, {v1.16b}, v28.16b
192 tbl v2.16b, {v2.16b}, v28.16b
193 tbl v3.16b, {v3.16b}, v28.16b
194 tbl v4.16b, {v4.16b}, v28.16b
195 tbl v5.16b, {v5.16b}, v28.16b
196 tbl v6.16b, {v6.16b}, v28.16b
197 tbl v7.16b, {v7.16b}, v28.16b
199 eor v1.16b, v1.16b, v4.16b
200 eor v3.16b, v3.16b, v4.16b
202 eor v4.16b, v4.16b, v7.16b
203 eor v2.16b, v2.16b, v7.16b
204 eor v1.16b, v1.16b, v6.16b
205 eor v6.16b, v6.16b, v4.16b
206 eor v2.16b, v2.16b, v5.16b
207 eor v0.16b, v0.16b, v1.16b
208 eor v7.16b, v7.16b, v6.16b
209 eor v8.16b, v6.16b, v2.16b
210 and v9.16b, v4.16b, v6.16b
211 eor v10.16b, v2.16b, v6.16b
212 eor v3.16b, v3.16b, v0.16b
213 eor v5.16b, v5.16b, v0.16b
214 eor v16.16b, v7.16b, v4.16b
215 eor v17.16b, v4.16b, v0.16b
216 and v18.16b, v0.16b, v2.16b
217 eor v19.16b, v7.16b, v4.16b
218 eor v1.16b, v1.16b, v3.16b
219 eor v20.16b, v3.16b, v0.16b
220 eor v21.16b, v5.16b, v2.16b
221 eor v22.16b, v3.16b, v7.16b
222 and v8.16b, v17.16b, v8.16b
223 orr v17.16b, v3.16b, v5.16b
224 eor v23.16b, v1.16b, v6.16b
225 eor v24.16b, v20.16b, v16.16b
226 eor v25.16b, v1.16b, v5.16b
227 orr v26.16b, v20.16b, v21.16b
228 and v20.16b, v20.16b, v21.16b
229 and v27.16b, v7.16b, v1.16b
230 eor v21.16b, v21.16b, v23.16b
231 orr v28.16b, v16.16b, v23.16b
232 orr v29.16b, v22.16b, v25.16b
233 eor v26.16b, v26.16b, v8.16b
234 and v16.16b, v16.16b, v23.16b
235 and v22.16b, v22.16b, v25.16b
236 and v21.16b, v24.16b, v21.16b
237 eor v8.16b, v28.16b, v8.16b
238 eor v23.16b, v5.16b, v2.16b
239 eor v24.16b, v1.16b, v6.16b
240 eor v16.16b, v16.16b, v22.16b
241 eor v22.16b, v3.16b, v0.16b
242 eor v25.16b, v29.16b, v21.16b
243 eor v21.16b, v26.16b, v21.16b
244 eor v8.16b, v8.16b, v20.16b
245 eor v26.16b, v23.16b, v24.16b
246 eor v16.16b, v16.16b, v20.16b
247 eor v28.16b, v22.16b, v19.16b
248 eor v20.16b, v25.16b, v20.16b
249 eor v9.16b, v21.16b, v9.16b
250 eor v8.16b, v8.16b, v18.16b
251 eor v18.16b, v5.16b, v1.16b
252 eor v21.16b, v16.16b, v17.16b
253 eor v16.16b, v16.16b, v17.16b
254 eor v17.16b, v20.16b, v27.16b
255 eor v20.16b, v3.16b, v7.16b
256 eor v25.16b, v9.16b, v8.16b
257 eor v27.16b, v0.16b, v4.16b
258 and v29.16b, v9.16b, v17.16b
259 eor v30.16b, v8.16b, v29.16b
260 eor v31.16b, v21.16b, v29.16b
261 eor v29.16b, v21.16b, v29.16b
262 bsl v30.16b, v17.16b, v21.16b
263 bsl v31.16b, v9.16b, v8.16b
264 bsl v16.16b, v30.16b, v29.16b
265 bsl v21.16b, v29.16b, v30.16b
266 eor v8.16b, v31.16b, v30.16b
267 and v1.16b, v1.16b, v31.16b
268 and v9.16b, v16.16b, v31.16b
269 and v6.16b, v6.16b, v30.16b
270 eor v16.16b, v17.16b, v21.16b
271 and v4.16b, v4.16b, v30.16b
272 eor v17.16b, v8.16b, v30.16b
273 and v21.16b, v24.16b, v8.16b
274 eor v9.16b, v9.16b, v25.16b
275 and v19.16b, v19.16b, v8.16b
276 eor v24.16b, v30.16b, v16.16b
277 eor v25.16b, v30.16b, v16.16b
278 and v7.16b, v7.16b, v17.16b
279 and v10.16b, v10.16b, v16.16b
280 eor v29.16b, v9.16b, v16.16b
281 eor v30.16b, v31.16b, v9.16b
282 and v0.16b, v24.16b, v0.16b
283 and v9.16b, v18.16b, v9.16b
284 and v2.16b, v25.16b, v2.16b
285 eor v10.16b, v10.16b, v6.16b
286 eor v18.16b, v29.16b, v16.16b
287 and v5.16b, v30.16b, v5.16b
288 eor v24.16b, v8.16b, v29.16b
289 and v25.16b, v26.16b, v29.16b
290 and v26.16b, v28.16b, v29.16b
291 eor v8.16b, v8.16b, v29.16b
292 eor v17.16b, v17.16b, v18.16b
293 eor v5.16b, v1.16b, v5.16b
294 and v23.16b, v24.16b, v23.16b
295 eor v21.16b, v21.16b, v25.16b
296 eor v19.16b, v19.16b, v26.16b
297 eor v0.16b, v4.16b, v0.16b
298 and v3.16b, v17.16b, v3.16b
299 eor v1.16b, v9.16b, v1.16b
300 eor v9.16b, v25.16b, v23.16b
301 eor v5.16b, v5.16b, v21.16b
302 eor v2.16b, v6.16b, v2.16b
303 and v6.16b, v8.16b, v22.16b
304 eor v3.16b, v7.16b, v3.16b
305 and v8.16b, v20.16b, v18.16b
306 eor v10.16b, v10.16b, v9.16b
307 eor v0.16b, v0.16b, v19.16b
308 eor v9.16b, v1.16b, v9.16b
309 eor v1.16b, v2.16b, v21.16b
310 eor v3.16b, v3.16b, v19.16b
311 and v16.16b, v27.16b, v16.16b
312 eor v17.16b, v26.16b, v6.16b
313 eor v6.16b, v8.16b, v7.16b
314 eor v7.16b, v1.16b, v9.16b
315 eor v1.16b, v5.16b, v3.16b
316 eor v2.16b, v10.16b, v3.16b
317 eor v4.16b, v16.16b, v4.16b
318 eor v8.16b, v6.16b, v17.16b
319 eor v5.16b, v9.16b, v3.16b
320 eor v9.16b, v0.16b, v1.16b
321 eor v6.16b, v7.16b, v1.16b
322 eor v0.16b, v4.16b, v17.16b
323 eor v4.16b, v8.16b, v7.16b
324 eor v7.16b, v9.16b, v2.16b
325 eor v8.16b, v3.16b, v0.16b
326 eor v7.16b, v7.16b, v5.16b
327 eor v3.16b, v4.16b, v7.16b
328 eor v4.16b, v7.16b, v0.16b
329 eor v7.16b, v8.16b, v3.16b
331 ext v8.16b, v0.16b, v0.16b, #8
332 ext v9.16b, v1.16b, v1.16b, #8
333 ldr q28, [x11] // load from .LISR in common case (x10 > 0)
334 ext v10.16b, v6.16b, v6.16b, #8
335 ext v16.16b, v3.16b, v3.16b, #8
336 ext v17.16b, v5.16b, v5.16b, #8
337 ext v18.16b, v4.16b, v4.16b, #8
338 eor v8.16b, v8.16b, v0.16b
339 eor v9.16b, v9.16b, v1.16b
340 eor v10.16b, v10.16b, v6.16b
341 eor v16.16b, v16.16b, v3.16b
342 eor v17.16b, v17.16b, v5.16b
343 ext v19.16b, v2.16b, v2.16b, #8
344 ext v20.16b, v7.16b, v7.16b, #8
345 eor v18.16b, v18.16b, v4.16b
346 eor v6.16b, v6.16b, v8.16b
347 eor v8.16b, v2.16b, v10.16b
348 eor v4.16b, v4.16b, v9.16b
349 eor v2.16b, v19.16b, v2.16b
350 eor v9.16b, v20.16b, v7.16b
351 eor v0.16b, v0.16b, v16.16b
352 eor v1.16b, v1.16b, v16.16b
353 eor v6.16b, v6.16b, v17.16b
354 eor v8.16b, v8.16b, v16.16b
355 eor v7.16b, v7.16b, v18.16b
356 eor v4.16b, v4.16b, v16.16b
357 eor v2.16b, v3.16b, v2.16b
358 eor v1.16b, v1.16b, v17.16b
359 eor v3.16b, v5.16b, v9.16b
360 eor v5.16b, v8.16b, v17.16b
361 eor v7.16b, v7.16b, v17.16b
362 ext v8.16b, v0.16b, v0.16b, #12
363 ext v9.16b, v6.16b, v6.16b, #12
364 ext v10.16b, v4.16b, v4.16b, #12
365 ext v16.16b, v1.16b, v1.16b, #12
366 ext v17.16b, v5.16b, v5.16b, #12
367 ext v18.16b, v7.16b, v7.16b, #12
368 eor v0.16b, v0.16b, v8.16b
369 eor v6.16b, v6.16b, v9.16b
370 eor v4.16b, v4.16b, v10.16b
371 ext v19.16b, v2.16b, v2.16b, #12
372 ext v20.16b, v3.16b, v3.16b, #12
373 eor v1.16b, v1.16b, v16.16b
374 eor v5.16b, v5.16b, v17.16b
375 eor v7.16b, v7.16b, v18.16b
376 eor v2.16b, v2.16b, v19.16b
377 eor v16.16b, v16.16b, v0.16b
378 eor v3.16b, v3.16b, v20.16b
379 eor v17.16b, v17.16b, v4.16b
380 eor v10.16b, v10.16b, v6.16b
381 ext v0.16b, v0.16b, v0.16b, #8
382 eor v9.16b, v9.16b, v1.16b
383 ext v1.16b, v1.16b, v1.16b, #8
384 eor v8.16b, v8.16b, v3.16b
385 eor v16.16b, v16.16b, v3.16b
386 eor v18.16b, v18.16b, v5.16b
387 eor v19.16b, v19.16b, v7.16b
388 ext v21.16b, v5.16b, v5.16b, #8
389 ext v5.16b, v7.16b, v7.16b, #8
390 eor v7.16b, v20.16b, v2.16b
391 ext v4.16b, v4.16b, v4.16b, #8
392 ext v20.16b, v3.16b, v3.16b, #8
393 eor v17.16b, v17.16b, v3.16b
394 ext v2.16b, v2.16b, v2.16b, #8
395 eor v3.16b, v10.16b, v3.16b
396 ext v10.16b, v6.16b, v6.16b, #8
397 eor v0.16b, v0.16b, v8.16b
398 eor v1.16b, v1.16b, v16.16b
399 eor v5.16b, v5.16b, v18.16b
400 eor v3.16b, v3.16b, v4.16b
401 eor v7.16b, v20.16b, v7.16b
402 eor v6.16b, v2.16b, v19.16b
403 eor v4.16b, v21.16b, v17.16b
404 eor v2.16b, v10.16b, v9.16b
406 ldr q28, [x11, #16]! // load from .LISRM0 on last round (x10 == 0)
410 ushr v8.2d, v0.2d, #1
413 ushr v16.2d, v2.2d, #1
415 ushr v18.2d, v6.2d, #1
417 eor v8.16b, v8.16b, v1.16b
418 ushr v20.2d, v3.2d, #1
419 eor v16.16b, v16.16b, v7.16b
420 eor v18.16b, v18.16b, v4.16b
421 and v8.16b, v8.16b, v9.16b
422 eor v20.16b, v20.16b, v5.16b
423 and v16.16b, v16.16b, v9.16b
424 and v18.16b, v18.16b, v9.16b
425 shl v21.2d, v8.2d, #1
426 eor v1.16b, v1.16b, v8.16b
427 and v8.16b, v20.16b, v9.16b
428 eor v7.16b, v7.16b, v16.16b
429 shl v9.2d, v16.2d, #1
430 eor v4.16b, v4.16b, v18.16b
431 shl v16.2d, v18.2d, #1
432 eor v0.16b, v0.16b, v21.16b
433 shl v18.2d, v8.2d, #1
434 eor v5.16b, v5.16b, v8.16b
435 eor v2.16b, v2.16b, v9.16b
436 eor v6.16b, v6.16b, v16.16b
437 ushr v8.2d, v1.2d, #2
438 eor v3.16b, v3.16b, v18.16b
439 ushr v9.2d, v0.2d, #2
440 ushr v16.2d, v7.2d, #2
441 ushr v18.2d, v2.2d, #2
442 eor v8.16b, v8.16b, v4.16b
443 eor v9.16b, v9.16b, v6.16b
444 eor v16.16b, v16.16b, v5.16b
445 eor v18.16b, v18.16b, v3.16b
446 and v8.16b, v8.16b, v17.16b
447 and v9.16b, v9.16b, v17.16b
448 and v16.16b, v16.16b, v17.16b
449 and v17.16b, v18.16b, v17.16b
450 eor v4.16b, v4.16b, v8.16b
452 eor v6.16b, v6.16b, v9.16b
454 eor v5.16b, v5.16b, v16.16b
455 shl v16.2d, v16.2d, #2
456 eor v3.16b, v3.16b, v17.16b
457 shl v17.2d, v17.2d, #2
458 eor v1.16b, v1.16b, v8.16b
459 eor v0.16b, v0.16b, v9.16b
460 eor v7.16b, v7.16b, v16.16b
461 eor v2.16b, v2.16b, v17.16b
462 ushr v8.2d, v4.2d, #4
463 ushr v9.2d, v6.2d, #4
464 ushr v16.2d, v1.2d, #4
465 ushr v17.2d, v0.2d, #4
466 eor v8.16b, v8.16b, v5.16b
467 eor v9.16b, v9.16b, v3.16b
468 eor v16.16b, v16.16b, v7.16b
469 eor v17.16b, v17.16b, v2.16b
470 and v8.16b, v8.16b, v19.16b
471 and v9.16b, v9.16b, v19.16b
472 and v16.16b, v16.16b, v19.16b
473 and v17.16b, v17.16b, v19.16b
474 eor v5.16b, v5.16b, v8.16b
476 eor v3.16b, v3.16b, v9.16b
478 eor v7.16b, v7.16b, v16.16b
479 shl v16.2d, v16.2d, #4
480 eor v2.16b, v2.16b, v17.16b
481 shl v17.2d, v17.2d, #4
482 eor v4.16b, v4.16b, v8.16b
483 eor v6.16b, v6.16b, v9.16b
484 eor v7.16b, v7.16b, v10.16b
485 eor v1.16b, v1.16b, v16.16b
486 eor v2.16b, v2.16b, v10.16b
487 eor v0.16b, v0.16b, v17.16b
488 eor v4.16b, v4.16b, v10.16b
489 eor v6.16b, v6.16b, v10.16b
490 eor v3.16b, v3.16b, v10.16b
491 eor v5.16b, v5.16b, v10.16b
492 eor v1.16b, v1.16b, v10.16b
493 eor v0.16b, v0.16b, v10.16b
495 .size _bsaes_decrypt8,.-_bsaes_decrypt8
497 .type _bsaes_const,%object
500 // InvShiftRows constants
501 // Used in _bsaes_decrypt8, which assumes contiguity
502 // .LM0ISR used with round 0 key
503 // .LISR used with middle round keys
504 // .LISRM0 used with final round key
506 .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
508 .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
510 .quad 0x01040b0e0205080f, 0x0306090c00070a0d
512 // ShiftRows constants
513 // Used in _bsaes_encrypt8, which assumes contiguity
514 // .LM0SR used with round 0 key
515 // .LSR used with middle round keys
516 // .LSRM0 used with final round key
518 .quad 0x0a0e02060f03070b, 0x0004080c05090d01
520 .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
522 .quad 0x0304090e00050a0f, 0x01060b0c0207080d
525 .quad 0x02060a0e03070b0f, 0x0004080c0105090d
527 .quad 0x0105090d0004080c, 0x03070b0f02060a0e
529 // Used in ossl_bsaes_ctr32_encrypt_blocks, prior to dropping into
530 // _bsaes_encrypt8_alt, for round 0 key in place of .LM0SR
532 .quad 0x090d01050c000408, 0x03070b0f060a0e02
535 .size _bsaes_const,.-_bsaes_const
537 .type _bsaes_encrypt8,%function
540 // x9 -> key (previously expanded using _bsaes_key_convert)
541 // x10 = number of rounds
545 // other general-purpose registers preserved
548 // other SIMD registers corrupted
554 eor v0.16b, v0.16b, v8.16b
555 eor v1.16b, v1.16b, v8.16b
557 eor v2.16b, v2.16b, v8.16b
558 eor v4.16b, v4.16b, v8.16b
559 eor v3.16b, v3.16b, v8.16b
560 eor v5.16b, v5.16b, v8.16b
561 tbl v0.16b, {v0.16b}, v9.16b
562 tbl v1.16b, {v1.16b}, v9.16b
563 tbl v2.16b, {v2.16b}, v9.16b
564 tbl v4.16b, {v4.16b}, v9.16b
565 eor v6.16b, v6.16b, v8.16b
566 eor v7.16b, v7.16b, v8.16b
567 tbl v3.16b, {v3.16b}, v9.16b
568 tbl v5.16b, {v5.16b}, v9.16b
569 tbl v6.16b, {v6.16b}, v9.16b
570 ushr v8.2d, v0.2d, #1
572 tbl v7.16b, {v7.16b}, v9.16b
573 ushr v9.2d, v4.2d, #1
575 ushr v17.2d, v2.2d, #1
576 eor v8.16b, v8.16b, v1.16b
578 ushr v19.2d, v6.2d, #1
579 eor v9.16b, v9.16b, v5.16b
580 eor v17.16b, v17.16b, v3.16b
581 and v8.16b, v8.16b, v10.16b
582 eor v19.16b, v19.16b, v7.16b
583 and v9.16b, v9.16b, v10.16b
584 and v17.16b, v17.16b, v10.16b
585 eor v1.16b, v1.16b, v8.16b
587 and v10.16b, v19.16b, v10.16b
588 eor v5.16b, v5.16b, v9.16b
590 eor v3.16b, v3.16b, v17.16b
591 shl v17.2d, v17.2d, #1
592 eor v0.16b, v0.16b, v8.16b
593 shl v8.2d, v10.2d, #1
594 eor v7.16b, v7.16b, v10.16b
595 eor v4.16b, v4.16b, v9.16b
596 eor v2.16b, v2.16b, v17.16b
597 ushr v9.2d, v1.2d, #2
598 eor v6.16b, v6.16b, v8.16b
599 ushr v8.2d, v0.2d, #2
600 ushr v10.2d, v5.2d, #2
601 ushr v17.2d, v4.2d, #2
602 eor v9.16b, v9.16b, v3.16b
603 eor v8.16b, v8.16b, v2.16b
604 eor v10.16b, v10.16b, v7.16b
605 eor v17.16b, v17.16b, v6.16b
606 and v9.16b, v9.16b, v16.16b
607 and v8.16b, v8.16b, v16.16b
608 and v10.16b, v10.16b, v16.16b
609 and v16.16b, v17.16b, v16.16b
610 eor v3.16b, v3.16b, v9.16b
612 eor v2.16b, v2.16b, v8.16b
614 eor v7.16b, v7.16b, v10.16b
615 shl v10.2d, v10.2d, #2
616 eor v6.16b, v6.16b, v16.16b
617 shl v16.2d, v16.2d, #2
618 eor v1.16b, v1.16b, v9.16b
619 eor v0.16b, v0.16b, v8.16b
620 eor v5.16b, v5.16b, v10.16b
621 eor v4.16b, v4.16b, v16.16b
622 ushr v8.2d, v3.2d, #4
623 ushr v9.2d, v2.2d, #4
624 ushr v10.2d, v1.2d, #4
625 ushr v16.2d, v0.2d, #4
626 eor v8.16b, v8.16b, v7.16b
627 eor v9.16b, v9.16b, v6.16b
628 eor v10.16b, v10.16b, v5.16b
629 eor v16.16b, v16.16b, v4.16b
630 and v8.16b, v8.16b, v18.16b
631 and v9.16b, v9.16b, v18.16b
632 and v10.16b, v10.16b, v18.16b
633 and v16.16b, v16.16b, v18.16b
634 eor v7.16b, v7.16b, v8.16b
636 eor v6.16b, v6.16b, v9.16b
638 eor v5.16b, v5.16b, v10.16b
639 shl v10.2d, v10.2d, #4
640 eor v4.16b, v4.16b, v16.16b
641 shl v16.2d, v16.2d, #4
642 eor v3.16b, v3.16b, v8.16b
643 eor v2.16b, v2.16b, v9.16b
644 eor v1.16b, v1.16b, v10.16b
645 eor v0.16b, v0.16b, v16.16b
649 ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64
650 ldp q8, q9, [x9], #32
651 eor v0.16b, v16.16b, v0.16b
653 eor v1.16b, v17.16b, v1.16b
655 eor v2.16b, v18.16b, v2.16b
656 eor v3.16b, v19.16b, v3.16b
657 eor v4.16b, v8.16b, v4.16b
658 eor v5.16b, v9.16b, v5.16b
659 eor v6.16b, v10.16b, v6.16b
660 eor v7.16b, v16.16b, v7.16b
661 tbl v0.16b, {v0.16b}, v28.16b
662 tbl v1.16b, {v1.16b}, v28.16b
663 tbl v2.16b, {v2.16b}, v28.16b
664 tbl v3.16b, {v3.16b}, v28.16b
665 tbl v4.16b, {v4.16b}, v28.16b
666 tbl v5.16b, {v5.16b}, v28.16b
667 tbl v6.16b, {v6.16b}, v28.16b
668 tbl v7.16b, {v7.16b}, v28.16b
670 eor v5.16b, v5.16b, v6.16b
671 eor v3.16b, v3.16b, v0.16b
673 eor v2.16b, v2.16b, v1.16b
674 eor v5.16b, v5.16b, v0.16b
675 eor v8.16b, v3.16b, v7.16b
676 eor v6.16b, v6.16b, v2.16b
677 eor v7.16b, v7.16b, v5.16b
678 eor v8.16b, v8.16b, v4.16b
679 eor v3.16b, v6.16b, v3.16b
680 eor v4.16b, v4.16b, v5.16b
681 eor v6.16b, v1.16b, v5.16b
682 eor v2.16b, v2.16b, v7.16b
683 eor v1.16b, v8.16b, v1.16b
684 eor v8.16b, v7.16b, v4.16b
685 eor v9.16b, v3.16b, v0.16b
686 eor v10.16b, v7.16b, v6.16b
687 eor v16.16b, v5.16b, v3.16b
688 eor v17.16b, v6.16b, v2.16b
689 eor v18.16b, v5.16b, v1.16b
690 eor v19.16b, v2.16b, v4.16b
691 eor v20.16b, v1.16b, v0.16b
692 orr v21.16b, v8.16b, v9.16b
693 orr v22.16b, v10.16b, v16.16b
694 eor v23.16b, v8.16b, v17.16b
695 eor v24.16b, v9.16b, v18.16b
696 and v19.16b, v19.16b, v20.16b
697 orr v20.16b, v17.16b, v18.16b
698 and v8.16b, v8.16b, v9.16b
699 and v9.16b, v17.16b, v18.16b
700 and v17.16b, v23.16b, v24.16b
701 and v10.16b, v10.16b, v16.16b
702 eor v16.16b, v21.16b, v19.16b
703 eor v18.16b, v20.16b, v19.16b
704 and v19.16b, v2.16b, v1.16b
705 and v20.16b, v6.16b, v5.16b
706 eor v21.16b, v22.16b, v17.16b
707 eor v9.16b, v9.16b, v10.16b
708 eor v10.16b, v16.16b, v17.16b
709 eor v16.16b, v18.16b, v8.16b
710 and v17.16b, v4.16b, v0.16b
711 orr v18.16b, v7.16b, v3.16b
712 eor v21.16b, v21.16b, v8.16b
713 eor v8.16b, v9.16b, v8.16b
714 eor v9.16b, v10.16b, v19.16b
715 eor v10.16b, v3.16b, v0.16b
716 eor v16.16b, v16.16b, v17.16b
717 eor v17.16b, v5.16b, v1.16b
718 eor v19.16b, v21.16b, v20.16b
719 eor v20.16b, v8.16b, v18.16b
720 eor v8.16b, v8.16b, v18.16b
721 eor v18.16b, v7.16b, v4.16b
722 eor v21.16b, v9.16b, v16.16b
723 eor v22.16b, v6.16b, v2.16b
724 and v23.16b, v9.16b, v19.16b
725 eor v24.16b, v10.16b, v17.16b
726 eor v25.16b, v0.16b, v1.16b
727 eor v26.16b, v7.16b, v6.16b
728 eor v27.16b, v18.16b, v22.16b
729 eor v28.16b, v3.16b, v5.16b
730 eor v29.16b, v16.16b, v23.16b
731 eor v30.16b, v20.16b, v23.16b
732 eor v23.16b, v20.16b, v23.16b
733 eor v31.16b, v4.16b, v2.16b
734 bsl v29.16b, v19.16b, v20.16b
735 bsl v30.16b, v9.16b, v16.16b
736 bsl v8.16b, v29.16b, v23.16b
737 bsl v20.16b, v23.16b, v29.16b
738 eor v9.16b, v30.16b, v29.16b
739 and v5.16b, v5.16b, v30.16b
740 and v8.16b, v8.16b, v30.16b
741 and v1.16b, v1.16b, v29.16b
742 eor v16.16b, v19.16b, v20.16b
743 and v2.16b, v2.16b, v29.16b
744 eor v19.16b, v9.16b, v29.16b
745 and v17.16b, v17.16b, v9.16b
746 eor v8.16b, v8.16b, v21.16b
747 and v20.16b, v22.16b, v9.16b
748 eor v21.16b, v29.16b, v16.16b
749 eor v22.16b, v29.16b, v16.16b
750 and v23.16b, v25.16b, v16.16b
751 and v6.16b, v6.16b, v19.16b
752 eor v25.16b, v8.16b, v16.16b
753 eor v29.16b, v30.16b, v8.16b
754 and v4.16b, v21.16b, v4.16b
755 and v8.16b, v28.16b, v8.16b
756 and v0.16b, v22.16b, v0.16b
757 eor v21.16b, v23.16b, v1.16b
758 eor v22.16b, v9.16b, v25.16b
759 eor v9.16b, v9.16b, v25.16b
760 eor v23.16b, v25.16b, v16.16b
761 and v3.16b, v29.16b, v3.16b
762 and v24.16b, v24.16b, v25.16b
763 and v25.16b, v27.16b, v25.16b
764 and v10.16b, v22.16b, v10.16b
765 and v9.16b, v9.16b, v18.16b
766 eor v18.16b, v19.16b, v23.16b
767 and v19.16b, v26.16b, v23.16b
768 eor v3.16b, v5.16b, v3.16b
769 eor v17.16b, v17.16b, v24.16b
770 eor v10.16b, v24.16b, v10.16b
771 and v16.16b, v31.16b, v16.16b
772 eor v20.16b, v20.16b, v25.16b
773 eor v9.16b, v25.16b, v9.16b
774 eor v4.16b, v2.16b, v4.16b
775 and v7.16b, v18.16b, v7.16b
776 eor v18.16b, v19.16b, v6.16b
777 eor v5.16b, v8.16b, v5.16b
778 eor v0.16b, v1.16b, v0.16b
779 eor v1.16b, v21.16b, v10.16b
780 eor v8.16b, v3.16b, v17.16b
781 eor v2.16b, v16.16b, v2.16b
782 eor v3.16b, v6.16b, v7.16b
783 eor v6.16b, v18.16b, v9.16b
784 eor v4.16b, v4.16b, v20.16b
785 eor v10.16b, v5.16b, v10.16b
786 eor v0.16b, v0.16b, v17.16b
787 eor v9.16b, v2.16b, v9.16b
788 eor v3.16b, v3.16b, v20.16b
789 eor v7.16b, v6.16b, v1.16b
790 eor v5.16b, v8.16b, v4.16b
791 eor v6.16b, v10.16b, v1.16b
792 eor v2.16b, v4.16b, v0.16b
793 eor v4.16b, v3.16b, v10.16b
794 eor v9.16b, v9.16b, v7.16b
795 eor v3.16b, v0.16b, v5.16b
796 eor v0.16b, v1.16b, v4.16b
797 eor v1.16b, v4.16b, v8.16b
798 eor v4.16b, v9.16b, v5.16b
799 eor v6.16b, v6.16b, v3.16b
801 ext v8.16b, v0.16b, v0.16b, #12
802 ext v9.16b, v4.16b, v4.16b, #12
804 ext v10.16b, v6.16b, v6.16b, #12
805 ext v16.16b, v1.16b, v1.16b, #12
806 ext v17.16b, v3.16b, v3.16b, #12
807 ext v18.16b, v7.16b, v7.16b, #12
808 eor v0.16b, v0.16b, v8.16b
809 eor v4.16b, v4.16b, v9.16b
810 eor v6.16b, v6.16b, v10.16b
811 ext v19.16b, v2.16b, v2.16b, #12
812 ext v20.16b, v5.16b, v5.16b, #12
813 eor v1.16b, v1.16b, v16.16b
814 eor v3.16b, v3.16b, v17.16b
815 eor v7.16b, v7.16b, v18.16b
816 eor v2.16b, v2.16b, v19.16b
817 eor v16.16b, v16.16b, v0.16b
818 eor v5.16b, v5.16b, v20.16b
819 eor v17.16b, v17.16b, v6.16b
820 eor v10.16b, v10.16b, v4.16b
821 ext v0.16b, v0.16b, v0.16b, #8
822 eor v9.16b, v9.16b, v1.16b
823 ext v1.16b, v1.16b, v1.16b, #8
824 eor v8.16b, v8.16b, v5.16b
825 eor v16.16b, v16.16b, v5.16b
826 eor v18.16b, v18.16b, v3.16b
827 eor v19.16b, v19.16b, v7.16b
828 ext v3.16b, v3.16b, v3.16b, #8
829 ext v7.16b, v7.16b, v7.16b, #8
830 eor v20.16b, v20.16b, v2.16b
831 ext v6.16b, v6.16b, v6.16b, #8
832 ext v21.16b, v5.16b, v5.16b, #8
833 eor v17.16b, v17.16b, v5.16b
834 ext v2.16b, v2.16b, v2.16b, #8
835 eor v10.16b, v10.16b, v5.16b
836 ext v22.16b, v4.16b, v4.16b, #8
837 eor v0.16b, v0.16b, v8.16b
838 eor v1.16b, v1.16b, v16.16b
839 eor v5.16b, v7.16b, v18.16b
840 eor v4.16b, v3.16b, v17.16b
841 eor v3.16b, v6.16b, v10.16b
842 eor v7.16b, v21.16b, v20.16b
843 eor v6.16b, v2.16b, v19.16b
844 eor v2.16b, v22.16b, v9.16b
846 ldr q28, [x11, #16]! // load from .LSRM0 on last round (x10 == 0)
850 ushr v8.2d, v0.2d, #1
853 ushr v16.2d, v3.2d, #1
855 ushr v18.2d, v4.2d, #1
857 eor v8.16b, v8.16b, v1.16b
858 ushr v20.2d, v2.2d, #1
859 eor v16.16b, v16.16b, v7.16b
860 eor v18.16b, v18.16b, v6.16b
861 and v8.16b, v8.16b, v9.16b
862 eor v20.16b, v20.16b, v5.16b
863 and v16.16b, v16.16b, v9.16b
864 and v18.16b, v18.16b, v9.16b
865 shl v21.2d, v8.2d, #1
866 eor v1.16b, v1.16b, v8.16b
867 and v8.16b, v20.16b, v9.16b
868 eor v7.16b, v7.16b, v16.16b
869 shl v9.2d, v16.2d, #1
870 eor v6.16b, v6.16b, v18.16b
871 shl v16.2d, v18.2d, #1
872 eor v0.16b, v0.16b, v21.16b
873 shl v18.2d, v8.2d, #1
874 eor v5.16b, v5.16b, v8.16b
875 eor v3.16b, v3.16b, v9.16b
876 eor v4.16b, v4.16b, v16.16b
877 ushr v8.2d, v1.2d, #2
878 eor v2.16b, v2.16b, v18.16b
879 ushr v9.2d, v0.2d, #2
880 ushr v16.2d, v7.2d, #2
881 ushr v18.2d, v3.2d, #2
882 eor v8.16b, v8.16b, v6.16b
883 eor v9.16b, v9.16b, v4.16b
884 eor v16.16b, v16.16b, v5.16b
885 eor v18.16b, v18.16b, v2.16b
886 and v8.16b, v8.16b, v17.16b
887 and v9.16b, v9.16b, v17.16b
888 and v16.16b, v16.16b, v17.16b
889 and v17.16b, v18.16b, v17.16b
890 eor v6.16b, v6.16b, v8.16b
892 eor v4.16b, v4.16b, v9.16b
894 eor v5.16b, v5.16b, v16.16b
895 shl v16.2d, v16.2d, #2
896 eor v2.16b, v2.16b, v17.16b
897 shl v17.2d, v17.2d, #2
898 eor v1.16b, v1.16b, v8.16b
899 eor v0.16b, v0.16b, v9.16b
900 eor v7.16b, v7.16b, v16.16b
901 eor v3.16b, v3.16b, v17.16b
902 ushr v8.2d, v6.2d, #4
903 ushr v9.2d, v4.2d, #4
904 ushr v16.2d, v1.2d, #4
905 ushr v17.2d, v0.2d, #4
906 eor v8.16b, v8.16b, v5.16b
907 eor v9.16b, v9.16b, v2.16b
908 eor v16.16b, v16.16b, v7.16b
909 eor v17.16b, v17.16b, v3.16b
910 and v8.16b, v8.16b, v19.16b
911 and v9.16b, v9.16b, v19.16b
912 and v16.16b, v16.16b, v19.16b
913 and v17.16b, v17.16b, v19.16b
914 eor v5.16b, v5.16b, v8.16b
916 eor v2.16b, v2.16b, v9.16b
918 eor v7.16b, v7.16b, v16.16b
919 shl v16.2d, v16.2d, #4
920 eor v3.16b, v3.16b, v17.16b
921 shl v17.2d, v17.2d, #4
922 eor v6.16b, v6.16b, v8.16b
923 eor v4.16b, v4.16b, v9.16b
924 eor v7.16b, v7.16b, v10.16b
925 eor v1.16b, v1.16b, v16.16b
926 eor v3.16b, v3.16b, v10.16b
927 eor v0.16b, v0.16b, v17.16b
928 eor v6.16b, v6.16b, v10.16b
929 eor v4.16b, v4.16b, v10.16b
930 eor v2.16b, v2.16b, v10.16b
931 eor v5.16b, v5.16b, v10.16b
932 eor v1.16b, v1.16b, v10.16b
933 eor v0.16b, v0.16b, v10.16b
935 .size _bsaes_encrypt8,.-_bsaes_encrypt8
937 .type _bsaes_key_convert,%function
940 // x9 -> input key (big-endian)
941 // x10 = number of rounds
942 // x17 -> output key (native endianness)
945 // x11 -> .LM0_bigendian
946 // x17 -> last quadword of output key
947 // other general-purpose registers preserved
951 // v15 = last round key (converted to native endianness)
952 // other SIMD registers corrupted
955 adr x11, .LM0_littleendian
957 adr x11, .LM0_bigendian
959 ldr q0, [x9], #16 // load round 0 key
960 ldr q1, [x11] // .LM0
961 ldr q15, [x9], #16 // load round 1 key
963 movi v7.16b, #0x63 // compose .L63
964 movi v16.16b, #0x01 // bit masks
977 str q0, [x17], #16 // save round 0 key
981 tbl v0.16b, {v15.16b}, v1.16b
982 ldr q15, [x9], #16 // load next round key
984 eor v0.16b, v0.16b, v7.16b
985 cmtst v24.16b, v0.16b, v16.16b
986 cmtst v25.16b, v0.16b, v17.16b
987 cmtst v26.16b, v0.16b, v18.16b
988 cmtst v27.16b, v0.16b, v19.16b
989 cmtst v28.16b, v0.16b, v20.16b
990 cmtst v29.16b, v0.16b, v21.16b
991 cmtst v30.16b, v0.16b, v22.16b
992 cmtst v31.16b, v0.16b, v23.16b
994 st1 {v24.16b-v27.16b}, [x17], #64 // write bit-sliced round key
995 st1 {v28.16b-v31.16b}, [x17], #64
998 // don't save last round key
1000 rev32 v15.16b, v15.16b
1001 adr x11, .LM0_bigendian
1004 .size _bsaes_key_convert,.-_bsaes_key_convert
1006 .globl ossl_bsaes_cbc_encrypt
1007 .type ossl_bsaes_cbc_encrypt,%function
1010 // x0 -> input ciphertext
1011 // x1 -> output plaintext
1012 // x2 = size of ciphertext and plaintext in bytes (assumed a multiple of 16)
1014 // x4 -> 128-bit initialisation vector (or preceding 128-bit block of ciphertext if continuing after an earlier call)
1017 // Output plaintext filled in
1018 // Initialisation vector overwritten with last quadword of ciphertext
1019 // No output registers, usual AAPCS64 register preservation
1020 ossl_bsaes_cbc_encrypt:
1030 // it is up to the caller to make sure we are called with enc == 0
1032 stp fp, lr, [sp, #-48]!
1033 stp d8, d9, [sp, #16]
1034 stp d10, d15, [sp, #32]
1035 lsr x2, x2, #4 // len in 16 byte blocks
1037 ldr w15, [x3, #240] // get # of rounds
1040 // allocate the key schedule on the stack
1042 sub x17, x17, x15, lsl #7 // 128 bytes per inner round key, less 96 bytes
1044 // populate the key schedule
1045 mov x9, x3 // pass key
1046 mov x10, x15 // pass # of rounds
1047 mov sp, x17 // sp is sp
1048 bl _bsaes_key_convert
1050 str q15, [x17] // save last round key
1051 eor v6.16b, v6.16b, v7.16b // fix up round 0 key (by XORing with 0x63)
1054 ldr q15, [x4] // load IV
1060 bmi .Lcbc_dec_loop_finish
1062 ldr q0, [x0], #16 // load input
1063 mov x9, sp // pass the key
1071 ldr q7, [x0], #-7*16
1075 ldr q16, [x0], #16 // reload input
1076 eor v0.16b, v0.16b, v15.16b // ^= IV
1077 eor v1.16b, v1.16b, v16.16b
1078 str q0, [x1], #16 // write output
1082 eor v1.16b, v4.16b, v1.16b
1084 eor v2.16b, v2.16b, v4.16b
1085 eor v0.16b, v6.16b, v0.16b
1089 eor v0.16b, v7.16b, v4.16b
1095 eor v0.16b, v5.16b, v2.16b
1096 eor v1.16b, v3.16b, v1.16b
1102 .Lcbc_dec_loop_finish:
1106 ldr q0, [x0], #16 // load input
1110 mov x9, sp // pass the key
1123 ldr q6, [x0], #-6*16
1127 ldr q5, [x0], #16 // reload input
1128 eor v0.16b, v0.16b, v15.16b // ^= IV
1132 str q0, [x1], #16 // write output
1134 eor v1.16b, v1.16b, v5.16b
1136 eor v6.16b, v6.16b, v8.16b
1138 eor v4.16b, v4.16b, v9.16b
1139 eor v2.16b, v2.16b, v10.16b
1141 eor v0.16b, v7.16b, v0.16b
1143 eor v1.16b, v3.16b, v5.16b
1153 ldr q3, [x0], #16 // reload input
1154 eor v0.16b, v0.16b, v15.16b // ^= IV
1158 str q0, [x1], #16 // write output
1160 eor v1.16b, v1.16b, v3.16b
1162 eor v3.16b, v6.16b, v5.16b
1163 eor v4.16b, v4.16b, v8.16b
1164 eor v2.16b, v2.16b, v9.16b
1166 eor v0.16b, v7.16b, v0.16b
1176 ldr q3, [x0], #16 // reload input
1177 eor v0.16b, v0.16b, v15.16b // ^= IV
1181 str q0, [x1], #16 // write output
1183 eor v0.16b, v1.16b, v3.16b
1184 eor v1.16b, v6.16b, v5.16b
1185 eor v3.16b, v4.16b, v7.16b
1187 eor v0.16b, v2.16b, v8.16b
1196 ldr q2, [x0], #16 // reload input
1197 eor v0.16b, v0.16b, v15.16b // ^= IV
1200 str q0, [x1], #16 // write output
1202 eor v0.16b, v1.16b, v2.16b
1203 eor v1.16b, v6.16b, v3.16b
1204 eor v2.16b, v4.16b, v5.16b
1213 ldr q2, [x0], #16 // reload input
1214 eor v0.16b, v0.16b, v15.16b // ^= IV
1217 str q0, [x1], #16 // write output
1218 eor v0.16b, v1.16b, v2.16b
1219 eor v1.16b, v6.16b, v3.16b
1227 ldr q2, [x0], #16 // reload input
1228 eor v0.16b, v0.16b, v15.16b // ^= IV
1230 str q0, [x1], #16 // write output
1231 eor v0.16b, v1.16b, v2.16b
1237 stp x1, x4, [sp, #-32]!
1244 ldp x1, x4, [sp], #32
1245 ldr q0, [x1] // load result
1246 eor v0.16b, v0.16b, v8.16b // ^= IV
1247 str q0, [x1] // write output
1253 .Lcbc_dec_bzero:// wipe key schedule [if any]
1254 stp q0, q1, [sp], #32
1257 str q15, [x4] // return IV
1258 ldp d8, d9, [sp, #16]
1259 ldp d10, d15, [sp, #32]
1260 ldp fp, lr, [sp], #48
1262 .size ossl_bsaes_cbc_encrypt,.-ossl_bsaes_cbc_encrypt
1264 .globl ossl_bsaes_ctr32_encrypt_blocks
1265 .type ossl_bsaes_ctr32_encrypt_blocks,%function
1268 // x0 -> input text (whole 16-byte blocks)
1269 // x1 -> output text (whole 16-byte blocks)
1270 // x2 = number of 16-byte blocks to encrypt/decrypt (> 0)
1272 // x4 -> initial value of 128-bit counter (stored big-endian) which increments, modulo 2^32, for each block
1274 // Output text filled in
1275 // No output registers, usual AAPCS64 register preservation
1276 ossl_bsaes_ctr32_encrypt_blocks:
1278 cmp x2, #8 // use plain AES for
1279 blo .Lctr_enc_short // small sizes
1281 stp fp, lr, [sp, #-80]!
1282 stp d8, d9, [sp, #16]
1283 stp d10, d11, [sp, #32]
1284 stp d12, d13, [sp, #48]
1285 stp d14, d15, [sp, #64]
1287 ldr w15, [x3, #240] // get # of rounds
1290 // allocate the key schedule on the stack
1292 sub x17, x17, x15, lsl #7 // 128 bytes per inner round key, less 96 bytes
1294 // populate the key schedule
1295 mov x9, x3 // pass key
1296 mov x10, x15 // pass # of rounds
1297 mov sp, x17 // sp is sp
1298 bl _bsaes_key_convert
1299 eor v7.16b, v7.16b, v15.16b // fix up last round key
1300 str q7, [x17] // save last round key
1302 ldr q0, [x4] // load counter
1303 add x13, x11, #.LREVM0SR-.LM0_bigendian
1304 ldr q4, [sp] // load round0 key
1306 movi v8.4s, #1 // compose 1<<96
1308 rev32 v15.16b, v0.16b
1309 rev32 v0.16b, v0.16b
1310 ext v11.16b, v9.16b, v8.16b, #4
1311 rev32 v4.16b, v4.16b
1312 add v12.4s, v11.4s, v11.4s // compose 2<<96
1313 str q4, [sp] // save adjusted round0 key
1314 add v13.4s, v11.4s, v12.4s // compose 3<<96
1315 add v14.4s, v12.4s, v12.4s // compose 4<<96
1320 // Intermix prologue from _bsaes_encrypt8 to use the opportunity
1321 // to flip byte order in 32-bit counter
1323 add v1.4s, v15.4s, v11.4s // +1
1324 add x9, sp, #0x10 // pass next round key
1325 add v2.4s, v15.4s, v12.4s // +2
1326 ldr q9, [x13] // .LREVM0SR
1327 ldr q8, [sp] // load round0 key
1328 add v3.4s, v15.4s, v13.4s // +3
1329 mov x10, x15 // pass rounds
1330 sub x11, x13, #.LREVM0SR-.LSR // pass constants
1331 add v6.4s, v2.4s, v14.4s
1332 add v4.4s, v15.4s, v14.4s // +4
1333 add v7.4s, v3.4s, v14.4s
1334 add v15.4s, v4.4s, v14.4s // next counter
1335 add v5.4s, v1.4s, v14.4s
1337 bl _bsaes_encrypt8_alt
1340 blo .Lctr_enc_loop_done
1344 eor v1.16b, v1.16b, v17.16b
1346 eor v0.16b, v0.16b, v16.16b
1347 eor v4.16b, v4.16b, v17.16b
1354 eor v4.16b, v6.16b, v16.16b
1355 eor v1.16b, v3.16b, v1.16b
1357 eor v3.16b, v7.16b, v3.16b
1359 eor v2.16b, v2.16b, v6.16b
1361 eor v5.16b, v5.16b, v6.16b
1372 .Lctr_enc_loop_done:
1374 ldr q16, [x0], #16 // load input
1375 eor v0.16b, v0.16b, v16.16b
1376 str q0, [x1], #16 // write output
1380 eor v1.16b, v1.16b, v17.16b
1384 eor v4.16b, v4.16b, v18.16b
1389 eor v6.16b, v6.16b, v19.16b
1393 eor v3.16b, v3.16b, v20.16b
1398 eor v7.16b, v7.16b, v21.16b
1402 eor v2.16b, v2.16b, v22.16b
1408 .Lctr_enc_bzero: // wipe key schedule [if any]
1409 stp q0, q1, [sp], #32
1413 ldp d8, d9, [sp, #16]
1414 ldp d10, d11, [sp, #32]
1415 ldp d12, d13, [sp, #48]
1416 ldp d14, d15, [sp, #64]
1417 ldp fp, lr, [sp], #80
1421 stp fp, lr, [sp, #-96]!
1422 stp x19, x20, [sp, #16]
1423 stp x21, x22, [sp, #32]
1426 mov x19, x0 // copy arguments
1430 ldr w23, [x4, #12] // load counter .LSW
1431 ldr q1, [x4] // load whole counter value
1432 #ifdef __AARCH64EL__
1435 str q1, [sp, #80] // copy counter value
1437 .Lctr_enc_short_loop:
1438 add x0, sp, #80 // input counter value
1439 add x1, sp, #64 // output on the stack
1444 ldr q0, [x19], #16 // load input
1445 ldr q1, [sp, #64] // load encrypted counter
1447 #ifdef __AARCH64EL__
1449 str w0, [sp, #80+12] // next counter value
1451 str w23, [sp, #80+12] // next counter value
1453 eor v0.16b, v0.16b, v1.16b
1454 str q0, [x20], #16 // store output
1456 bne .Lctr_enc_short_loop
1460 stp q0, q1, [sp, #64]
1463 ldp x21, x22, [sp, #32]
1464 ldp x19, x20, [sp, #16]
1465 ldp fp, lr, [sp], #96
1467 .size ossl_bsaes_ctr32_encrypt_blocks,.-ossl_bsaes_ctr32_encrypt_blocks
1469 .globl ossl_bsaes_xts_encrypt
1470 .type ossl_bsaes_xts_encrypt,%function
1473 // x0 -> input plaintext
1474 // x1 -> output ciphertext
1475 // x2 -> length of text in bytes (must be at least 16)
1476 // x3 -> key1 (used to encrypt the XORed plaintext blocks)
1477 // x4 -> key2 (used to encrypt the initial vector to yield the initial tweak)
1478 // x5 -> 16-byte initial vector (typically, sector number)
1480 // Output ciphertext filled in
1481 // No output registers, usual AAPCS64 register preservation
1482 ossl_bsaes_xts_encrypt:
1485 // nrounds*128-96 bytes: key schedule
1487 // 16 bytes: frame record
1488 // 4*16 bytes: tweak storage across _bsaes_encrypt8
1489 // 6*8 bytes: storage for 5 callee-saved general-purpose registers
1490 // 8*8 bytes: storage for 8 callee-saved SIMD registers
1491 stp fp, lr, [sp, #-192]!
1492 stp x19, x20, [sp, #80]
1493 stp x21, x22, [sp, #96]
1495 stp d8, d9, [sp, #128]
1496 stp d10, d11, [sp, #144]
1497 stp d12, d13, [sp, #160]
1498 stp d14, d15, [sp, #176]
1506 // generate initial tweak
1514 ldr w1, [x23, #240] // get # of rounds
1515 // allocate the key schedule on the stack
1517 sub x17, x17, x1, lsl #7 // 128 bytes per inner round key, less 96 bytes
1519 // populate the key schedule
1520 mov x9, x23 // pass key
1521 mov x10, x1 // pass # of rounds
1523 bl _bsaes_key_convert
1524 eor v15.16b, v15.16b, v7.16b // fix up last round key
1525 str q15, [x17] // save last round key
1527 subs x22, x22, #0x80
1534 mov x10, x1 // pass rounds
1537 sshr v1.2d, v11.2d, #63
1538 mov x9, sp // pass key schedule
1539 ldr q6, .Lxts_magic+16
1540 add v2.2d, v11.2d, v11.2d
1541 cmtst v3.2d, v11.2d, v6.2d
1542 and v1.16b, v1.16b, v8.16b
1543 ext v1.16b, v1.16b, v1.16b, #8
1544 and v3.16b, v3.16b, v8.16b
1546 eor v12.16b, v2.16b, v1.16b
1547 eor v1.16b, v4.16b, v12.16b
1548 eor v0.16b, v0.16b, v11.16b
1549 cmtst v2.2d, v12.2d, v6.2d
1550 add v4.2d, v12.2d, v12.2d
1552 ext v3.16b, v3.16b, v3.16b, #8
1553 and v2.16b, v2.16b, v8.16b
1554 eor v13.16b, v4.16b, v3.16b
1556 ext v4.16b, v2.16b, v2.16b, #8
1557 eor v2.16b, v3.16b, v13.16b
1559 add v5.2d, v13.2d, v13.2d
1560 cmtst v7.2d, v13.2d, v6.2d
1561 and v7.16b, v7.16b, v8.16b
1563 ext v7.16b, v7.16b, v7.16b, #8
1565 eor v14.16b, v5.16b, v4.16b
1567 add v4.2d, v14.2d, v14.2d
1568 eor v3.16b, v3.16b, v14.16b
1569 eor v15.16b, v4.16b, v7.16b
1570 add v5.2d, v15.2d, v15.2d
1572 cmtst v4.2d, v14.2d, v6.2d
1573 and v17.16b, v4.16b, v8.16b
1574 cmtst v18.2d, v15.2d, v6.2d
1575 eor v4.16b, v9.16b, v15.16b
1576 ext v9.16b, v17.16b, v17.16b, #8
1577 eor v9.16b, v5.16b, v9.16b
1578 add v17.2d, v9.2d, v9.2d
1579 and v18.16b, v18.16b, v8.16b
1580 eor v5.16b, v10.16b, v9.16b
1582 ext v10.16b, v18.16b, v18.16b, #8
1583 cmtst v9.2d, v9.2d, v6.2d
1584 and v9.16b, v9.16b, v8.16b
1585 eor v10.16b, v17.16b, v10.16b
1586 cmtst v17.2d, v10.2d, v6.2d
1587 eor v6.16b, v16.16b, v10.16b
1589 ext v9.16b, v9.16b, v9.16b, #8
1590 add v10.2d, v10.2d, v10.2d
1591 eor v9.16b, v10.16b, v9.16b
1593 eor v7.16b, v7.16b, v9.16b
1594 add v9.2d, v9.2d, v9.2d
1595 and v8.16b, v17.16b, v8.16b
1596 ext v8.16b, v8.16b, v8.16b, #8
1597 eor v8.16b, v9.16b, v8.16b
1598 str q8, [x2] // next round tweak
1603 eor v0.16b, v0.16b, v11.16b
1604 eor v1.16b, v1.16b, v12.16b
1606 eor v4.16b, v4.16b, v13.16b
1607 eor v6.16b, v6.16b, v14.16b
1609 eor v3.16b, v3.16b, v15.16b
1610 subs x22, x22, #0x80
1612 ldr q11, [x0] // next round tweak
1614 eor v0.16b, v7.16b, v8.16b
1615 eor v1.16b, v2.16b, v9.16b
1617 eor v2.16b, v5.16b, v10.16b
1626 adds x22, x22, #0x70
1630 sshr v1.2d, v11.2d, #63
1631 add v2.2d, v11.2d, v11.2d
1632 ldr q9, .Lxts_magic+16
1633 subs x22, x22, #0x10
1635 and v1.16b, v1.16b, v8.16b
1636 cmtst v3.2d, v11.2d, v9.2d
1637 ext v1.16b, v1.16b, v1.16b, #8
1638 and v3.16b, v3.16b, v8.16b
1639 eor v12.16b, v2.16b, v1.16b
1640 ext v1.16b, v3.16b, v3.16b, #8
1641 add v2.2d, v12.2d, v12.2d
1642 cmtst v3.2d, v12.2d, v9.2d
1643 eor v13.16b, v2.16b, v1.16b
1644 and v22.16b, v3.16b, v8.16b
1647 ext v2.16b, v22.16b, v22.16b, #8
1648 add v3.2d, v13.2d, v13.2d
1650 cmtst v4.2d, v13.2d, v9.2d
1651 subs x22, x22, #0x10
1652 eor v14.16b, v3.16b, v2.16b
1653 and v23.16b, v4.16b, v8.16b
1656 ext v3.16b, v23.16b, v23.16b, #8
1657 add v4.2d, v14.2d, v14.2d
1659 cmtst v5.2d, v14.2d, v9.2d
1660 eor v0.16b, v0.16b, v11.16b
1661 subs x22, x22, #0x10
1662 eor v15.16b, v4.16b, v3.16b
1663 and v24.16b, v5.16b, v8.16b
1666 ext v4.16b, v24.16b, v24.16b, #8
1667 add v5.2d, v15.2d, v15.2d
1669 cmtst v6.2d, v15.2d, v9.2d
1670 eor v1.16b, v1.16b, v12.16b
1671 subs x22, x22, #0x10
1672 eor v16.16b, v5.16b, v4.16b
1673 and v25.16b, v6.16b, v8.16b
1676 ext v5.16b, v25.16b, v25.16b, #8
1677 add v6.2d, v16.2d, v16.2d
1679 cmtst v7.2d, v16.2d, v9.2d
1681 eor v2.16b, v2.16b, v13.16b
1683 subs x22, x22, #0x10
1684 eor v17.16b, v6.16b, v5.16b
1685 and v26.16b, v7.16b, v8.16b
1688 ext v7.16b, v26.16b, v26.16b, #8
1689 add v18.2d, v17.2d, v17.2d
1691 eor v3.16b, v3.16b, v14.16b
1693 subs x22, x22, #0x10
1694 eor v18.16b, v18.16b, v7.16b
1698 eor v4.16b, v4.16b, v15.16b
1699 eor v5.16b, v5.16b, v16.16b
1700 str q18, [x0] // next round tweak
1701 mov x9, sp // pass key schedule
1705 eor v6.16b, v6.16b, v17.16b
1710 eor v0.16b, v0.16b, v11.16b
1711 eor v1.16b, v1.16b, v12.16b
1713 eor v4.16b, v4.16b, v13.16b
1714 eor v6.16b, v6.16b, v14.16b
1715 eor v3.16b, v3.16b, v15.16b
1716 ldr q11, [x0] // next round tweak
1719 eor v0.16b, v7.16b, v16.16b
1720 eor v1.16b, v2.16b, v17.16b
1730 eor v4.16b, v4.16b, v15.16b
1731 eor v5.16b, v5.16b, v16.16b
1732 mov x9, sp // pass key schedule
1733 mov x10, x1 // pass rounds
1739 eor v0.16b, v0.16b, v11.16b
1740 eor v1.16b, v1.16b, v12.16b
1741 eor v4.16b, v4.16b, v13.16b
1742 eor v6.16b, v6.16b, v14.16b
1743 ldr q11, [x0] // next round tweak
1744 eor v3.16b, v3.16b, v15.16b
1747 eor v0.16b, v7.16b, v16.16b
1756 eor v3.16b, v3.16b, v14.16b
1757 eor v4.16b, v4.16b, v15.16b
1758 mov x9, sp // pass key schedule
1759 mov x10, x1 // pass rounds
1764 eor v0.16b, v0.16b, v11.16b
1765 eor v1.16b, v1.16b, v12.16b
1766 ldr q11, [x0] // next round tweak
1767 eor v4.16b, v4.16b, v13.16b
1768 eor v6.16b, v6.16b, v14.16b
1769 eor v3.16b, v3.16b, v15.16b
1779 eor v2.16b, v2.16b, v13.16b
1780 eor v3.16b, v3.16b, v14.16b
1781 mov x9, sp // pass key schedule
1782 mov x10, x1 // pass rounds
1787 eor v0.16b, v0.16b, v11.16b
1788 eor v1.16b, v1.16b, v12.16b
1789 eor v4.16b, v4.16b, v13.16b
1790 eor v6.16b, v6.16b, v14.16b
1791 mov v11.16b, v15.16b // next round tweak
1800 eor v1.16b, v1.16b, v12.16b
1801 eor v2.16b, v2.16b, v13.16b
1802 mov x9, sp // pass key schedule
1803 mov x10, x1 // pass rounds
1808 eor v0.16b, v0.16b, v11.16b
1809 eor v1.16b, v1.16b, v12.16b
1810 eor v4.16b, v4.16b, v13.16b
1811 mov v11.16b, v14.16b // next round tweak
1819 eor v0.16b, v0.16b, v11.16b
1820 eor v1.16b, v1.16b, v12.16b
1821 mov x9, sp // pass key schedule
1822 mov x10, x1 // pass rounds
1827 eor v0.16b, v0.16b, v11.16b
1828 eor v1.16b, v1.16b, v12.16b
1829 mov v11.16b, v13.16b // next round tweak
1836 eor v0.16b, v0.16b, v11.16b
1840 mov v13.d[0], v11.d[1] // just in case AES_encrypt corrupts top half of callee-saved SIMD registers
1841 mov v14.d[0], v12.d[1]
1847 trn1 v13.2d, v11.2d, v13.2d
1848 trn1 v11.2d, v12.2d, v14.2d // next round tweak
1849 eor v0.16b, v0.16b, v13.16b
1853 adds x22, x22, #0x10
1857 // Penultimate plaintext block produces final ciphertext part-block
1858 // plus remaining part of final plaintext block. Move ciphertext part
1859 // to final position and re-use penultimate ciphertext block buffer to
1860 // construct final plaintext block
1863 ldrb w1, [x21, #-0x10]
1864 strb w0, [x21, #-0x10]
1870 // Finally encrypt the penultimate ciphertext block using the
1873 eor v0.16b, v0.16b, v11.16b
1879 mov v13.d[0], v11.d[1] // just in case AES_encrypt corrupts top half of callee-saved SIMD registers
1883 trn1 v11.2d, v11.2d, v13.2d
1885 eor v0.16b, v0.16b, v11.16b
1892 .Lxts_enc_bzero: // wipe key schedule
1893 stp q0, q1, [sp], #32
1897 ldp x19, x20, [sp, #80]
1898 ldp x21, x22, [sp, #96]
1900 ldp d8, d9, [sp, #128]
1901 ldp d10, d11, [sp, #144]
1902 ldp d12, d13, [sp, #160]
1903 ldp d14, d15, [sp, #176]
1904 ldp fp, lr, [sp], #192
1906 .size ossl_bsaes_xts_encrypt,.-ossl_bsaes_xts_encrypt
1908 // The assembler doesn't seem capable of de-duplicating these when expressed
1909 // using `ldr qd,=` syntax, so assign a symbolic address
1912 .quad 1, 0x87, 0x4000000000000000, 0x4000000000000000
1914 .globl ossl_bsaes_xts_decrypt
1915 .type ossl_bsaes_xts_decrypt,%function
1918 // x0 -> input ciphertext
1919 // x1 -> output plaintext
1920 // x2 -> length of text in bytes (must be at least 16)
1921 // x3 -> key1 (used to decrypt the XORed ciphertext blocks)
1922 // x4 -> key2 (used to encrypt the initial vector to yield the initial tweak)
1923 // x5 -> 16-byte initial vector (typically, sector number)
1925 // Output plaintext filled in
1926 // No output registers, usual AAPCS64 register preservation
1927 ossl_bsaes_xts_decrypt:
1930 // nrounds*128-96 bytes: key schedule
1932 // 16 bytes: frame record
1933 // 4*16 bytes: tweak storage across _bsaes_decrypt8
1934 // 6*8 bytes: storage for 5 callee-saved general-purpose registers
1935 // 8*8 bytes: storage for 8 callee-saved SIMD registers
1936 stp fp, lr, [sp, #-192]!
1937 stp x19, x20, [sp, #80]
1938 stp x21, x22, [sp, #96]
1940 stp d8, d9, [sp, #128]
1941 stp d10, d11, [sp, #144]
1942 stp d12, d13, [sp, #160]
1943 stp d14, d15, [sp, #176]
1951 // generate initial tweak
1959 ldr w1, [x23, #240] // get # of rounds
1960 // allocate the key schedule on the stack
1962 sub x17, x17, x1, lsl #7 // 128 bytes per inner round key, less 96 bytes
1964 // populate the key schedule
1965 mov x9, x23 // pass key
1966 mov x10, x1 // pass # of rounds
1968 bl _bsaes_key_convert
1970 str q15, [x17] // save last round key
1971 eor v6.16b, v6.16b, v7.16b // fix up round 0 key (by XORing with 0x63)
1975 tst x22, #0xf // if not multiple of 16
1976 csel x22, x30, x22, ne // subtract another 16 bytes
1977 subs x22, x22, #0x80
1985 mov x10, x1 // pass rounds
1988 sshr v1.2d, v11.2d, #63
1989 mov x9, sp // pass key schedule
1990 ldr q6, .Lxts_magic+16
1991 add v2.2d, v11.2d, v11.2d
1992 cmtst v3.2d, v11.2d, v6.2d
1993 and v1.16b, v1.16b, v8.16b
1994 ext v1.16b, v1.16b, v1.16b, #8
1995 and v3.16b, v3.16b, v8.16b
1997 eor v12.16b, v2.16b, v1.16b
1998 eor v1.16b, v4.16b, v12.16b
1999 eor v0.16b, v0.16b, v11.16b
2000 cmtst v2.2d, v12.2d, v6.2d
2001 add v4.2d, v12.2d, v12.2d
2003 ext v3.16b, v3.16b, v3.16b, #8
2004 and v2.16b, v2.16b, v8.16b
2005 eor v13.16b, v4.16b, v3.16b
2007 ext v4.16b, v2.16b, v2.16b, #8
2008 eor v2.16b, v3.16b, v13.16b
2010 add v5.2d, v13.2d, v13.2d
2011 cmtst v7.2d, v13.2d, v6.2d
2012 and v7.16b, v7.16b, v8.16b
2014 ext v7.16b, v7.16b, v7.16b, #8
2016 eor v14.16b, v5.16b, v4.16b
2018 add v4.2d, v14.2d, v14.2d
2019 eor v3.16b, v3.16b, v14.16b
2020 eor v15.16b, v4.16b, v7.16b
2021 add v5.2d, v15.2d, v15.2d
2023 cmtst v4.2d, v14.2d, v6.2d
2024 and v17.16b, v4.16b, v8.16b
2025 cmtst v18.2d, v15.2d, v6.2d
2026 eor v4.16b, v9.16b, v15.16b
2027 ext v9.16b, v17.16b, v17.16b, #8
2028 eor v9.16b, v5.16b, v9.16b
2029 add v17.2d, v9.2d, v9.2d
2030 and v18.16b, v18.16b, v8.16b
2031 eor v5.16b, v10.16b, v9.16b
2033 ext v10.16b, v18.16b, v18.16b, #8
2034 cmtst v9.2d, v9.2d, v6.2d
2035 and v9.16b, v9.16b, v8.16b
2036 eor v10.16b, v17.16b, v10.16b
2037 cmtst v17.2d, v10.2d, v6.2d
2038 eor v6.16b, v16.16b, v10.16b
2040 ext v9.16b, v9.16b, v9.16b, #8
2041 add v10.2d, v10.2d, v10.2d
2042 eor v9.16b, v10.16b, v9.16b
2044 eor v7.16b, v7.16b, v9.16b
2045 add v9.2d, v9.2d, v9.2d
2046 and v8.16b, v17.16b, v8.16b
2047 ext v8.16b, v8.16b, v8.16b, #8
2048 eor v8.16b, v9.16b, v8.16b
2049 str q8, [x2] // next round tweak
2053 eor v6.16b, v6.16b, v13.16b
2054 eor v0.16b, v0.16b, v11.16b
2056 eor v7.16b, v7.16b, v8.16b
2058 eor v0.16b, v1.16b, v12.16b
2060 eor v1.16b, v3.16b, v1.16b
2061 subs x22, x22, #0x80
2062 eor v2.16b, v2.16b, v15.16b
2063 eor v3.16b, v4.16b, v14.16b
2066 ldr q11, [x0] // next round tweak
2067 eor v0.16b, v5.16b, v4.16b
2077 adds x22, x22, #0x70
2081 sshr v1.2d, v11.2d, #63
2082 add v2.2d, v11.2d, v11.2d
2083 ldr q9, .Lxts_magic+16
2084 subs x22, x22, #0x10
2086 and v1.16b, v1.16b, v8.16b
2087 cmtst v3.2d, v11.2d, v9.2d
2088 ext v1.16b, v1.16b, v1.16b, #8
2089 and v3.16b, v3.16b, v8.16b
2090 eor v12.16b, v2.16b, v1.16b
2091 ext v1.16b, v3.16b, v3.16b, #8
2092 add v2.2d, v12.2d, v12.2d
2093 cmtst v3.2d, v12.2d, v9.2d
2094 eor v13.16b, v2.16b, v1.16b
2095 and v22.16b, v3.16b, v8.16b
2098 ext v2.16b, v22.16b, v22.16b, #8
2099 add v3.2d, v13.2d, v13.2d
2101 cmtst v4.2d, v13.2d, v9.2d
2102 subs x22, x22, #0x10
2103 eor v14.16b, v3.16b, v2.16b
2104 and v23.16b, v4.16b, v8.16b
2107 ext v3.16b, v23.16b, v23.16b, #8
2108 add v4.2d, v14.2d, v14.2d
2110 cmtst v5.2d, v14.2d, v9.2d
2111 eor v0.16b, v0.16b, v11.16b
2112 subs x22, x22, #0x10
2113 eor v15.16b, v4.16b, v3.16b
2114 and v24.16b, v5.16b, v8.16b
2117 ext v4.16b, v24.16b, v24.16b, #8
2118 add v5.2d, v15.2d, v15.2d
2120 cmtst v6.2d, v15.2d, v9.2d
2121 eor v1.16b, v1.16b, v12.16b
2122 subs x22, x22, #0x10
2123 eor v16.16b, v5.16b, v4.16b
2124 and v25.16b, v6.16b, v8.16b
2127 ext v5.16b, v25.16b, v25.16b, #8
2128 add v6.2d, v16.2d, v16.2d
2130 cmtst v7.2d, v16.2d, v9.2d
2132 eor v2.16b, v2.16b, v13.16b
2134 subs x22, x22, #0x10
2135 eor v17.16b, v6.16b, v5.16b
2136 and v26.16b, v7.16b, v8.16b
2139 ext v7.16b, v26.16b, v26.16b, #8
2140 add v18.2d, v17.2d, v17.2d
2142 eor v3.16b, v3.16b, v14.16b
2144 subs x22, x22, #0x10
2145 eor v18.16b, v18.16b, v7.16b
2149 eor v4.16b, v4.16b, v15.16b
2150 eor v5.16b, v5.16b, v16.16b
2151 str q18, [x0] // next round tweak
2152 mov x9, sp // pass key schedule
2156 eor v6.16b, v6.16b, v17.16b
2161 eor v0.16b, v0.16b, v11.16b
2162 eor v1.16b, v1.16b, v12.16b
2164 eor v6.16b, v6.16b, v13.16b
2165 eor v4.16b, v4.16b, v14.16b
2166 eor v2.16b, v2.16b, v15.16b
2167 ldr q11, [x0] // next round tweak
2170 eor v0.16b, v7.16b, v16.16b
2171 eor v1.16b, v3.16b, v17.16b
2181 eor v4.16b, v4.16b, v15.16b
2182 eor v5.16b, v5.16b, v16.16b
2183 mov x9, sp // pass key schedule
2184 mov x10, x1 // pass rounds
2190 eor v0.16b, v0.16b, v11.16b
2191 eor v1.16b, v1.16b, v12.16b
2192 eor v6.16b, v6.16b, v13.16b
2193 eor v4.16b, v4.16b, v14.16b
2194 ldr q11, [x0] // next round tweak
2195 eor v2.16b, v2.16b, v15.16b
2198 eor v0.16b, v7.16b, v16.16b
2207 eor v3.16b, v3.16b, v14.16b
2208 eor v4.16b, v4.16b, v15.16b
2209 mov x9, sp // pass key schedule
2210 mov x10, x1 // pass rounds
2215 eor v0.16b, v0.16b, v11.16b
2216 eor v1.16b, v1.16b, v12.16b
2217 ldr q11, [x0] // next round tweak
2218 eor v6.16b, v6.16b, v13.16b
2219 eor v4.16b, v4.16b, v14.16b
2220 eor v2.16b, v2.16b, v15.16b
2230 eor v2.16b, v2.16b, v13.16b
2231 eor v3.16b, v3.16b, v14.16b
2232 mov x9, sp // pass key schedule
2233 mov x10, x1 // pass rounds
2238 eor v0.16b, v0.16b, v11.16b
2239 eor v1.16b, v1.16b, v12.16b
2240 eor v6.16b, v6.16b, v13.16b
2241 eor v4.16b, v4.16b, v14.16b
2242 mov v11.16b, v15.16b // next round tweak
2251 eor v1.16b, v1.16b, v12.16b
2252 eor v2.16b, v2.16b, v13.16b
2253 mov x9, sp // pass key schedule
2254 mov x10, x1 // pass rounds
2259 eor v0.16b, v0.16b, v11.16b
2260 eor v1.16b, v1.16b, v12.16b
2261 eor v6.16b, v6.16b, v13.16b
2262 mov v11.16b, v14.16b // next round tweak
2270 eor v0.16b, v0.16b, v11.16b
2271 eor v1.16b, v1.16b, v12.16b
2272 mov x9, sp // pass key schedule
2273 mov x10, x1 // pass rounds
2278 eor v0.16b, v0.16b, v11.16b
2279 eor v1.16b, v1.16b, v12.16b
2280 mov v11.16b, v13.16b // next round tweak
2287 eor v0.16b, v0.16b, v11.16b
2291 mov v13.d[0], v11.d[1] // just in case AES_decrypt corrupts top half of callee-saved SIMD registers
2292 mov v14.d[0], v12.d[1]
2298 trn1 v13.2d, v11.2d, v13.2d
2299 trn1 v11.2d, v12.2d, v14.2d // next round tweak
2300 eor v0.16b, v0.16b, v13.16b
2304 adds x22, x22, #0x10
2307 // calculate one round of extra tweak for the stolen ciphertext
2309 sshr v6.2d, v11.2d, #63
2310 and v6.16b, v6.16b, v8.16b
2311 add v12.2d, v11.2d, v11.2d
2312 ext v6.16b, v6.16b, v6.16b, #8
2313 eor v12.16b, v12.16b, v6.16b
2315 // perform the final decryption with the last tweak value
2317 eor v0.16b, v0.16b, v12.16b
2322 mov v13.d[0], v11.d[1] // just in case AES_decrypt corrupts top half of callee-saved SIMD registers
2323 mov v14.d[0], v12.d[1]
2327 trn1 v12.2d, v12.2d, v14.2d
2328 trn1 v11.2d, v11.2d, v13.2d
2330 eor v0.16b, v0.16b, v12.16b
2334 // Penultimate ciphertext block produces final plaintext part-block
2335 // plus remaining part of final ciphertext block. Move plaintext part
2336 // to final position and re-use penultimate plaintext block buffer to
2337 // construct final ciphertext block
2341 strb w1, [x21, #0x10]
2347 // Finally decrypt the penultimate plaintext block using the
2348 // penultimate tweak
2350 eor v0.16b, v0.16b, v11.16b
2359 trn1 v11.2d, v11.2d, v13.2d
2361 eor v0.16b, v0.16b, v11.16b
2368 .Lxts_dec_bzero: // wipe key schedule
2369 stp q0, q1, [sp], #32
2373 ldp x19, x20, [sp, #80]
2374 ldp x21, x22, [sp, #96]
2376 ldp d8, d9, [sp, #128]
2377 ldp d10, d11, [sp, #144]
2378 ldp d12, d13, [sp, #160]
2379 ldp d14, d15, [sp, #176]
2380 ldp fp, lr, [sp], #192
2382 .size ossl_bsaes_xts_decrypt,.-ossl_bsaes_xts_decrypt