2 # Copyright 2020-2023 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
11 my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
12 my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
15 $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1;
16 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
17 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate ) or
18 die "can't locate arm-xlate.pl";
20 open OUT,"| \"$^X\" $xlate $flavour $output";
26 close STDOUT or die "error closing STDOUT: $!"; # enforce flush
35 // Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved.
37 // Licensed under the OpenSSL license (the "License"). You may not use
38 // this file except in compliance with the License. You can obtain a copy
39 // in the file LICENSE in the source distribution or at
40 // https://www.openssl.org/source/license.html
42 // ====================================================================
43 // Written by Ben Avison <bavison@riscosopen.org> for the OpenSSL
44 // project. Rights for redistribution and usage in source and binary
45 // forms are granted according to the OpenSSL license.
46 // ====================================================================
48 // This implementation is a translation of bsaes-armv7 for AArch64.
49 // No attempt has been made to carry across the build switches for
50 // kernel targets, since the Linux kernel crypto support has moved on
51 // from when it was based on OpenSSL.
53 // A lot of hand-scheduling has been performed. Consequently, this code
54 // doesn't factor out neatly into macros in the same way that the
55 // AArch32 version did, and there is little to be gained by wrapping it
56 // up in Perl, and it is presented as pure assembly.
59 #include "crypto/arm_arch.h"
63 .extern AES_cbc_encrypt
67 .type _bsaes_decrypt8,%function
70 // x9 -> key (previously expanded using _bsaes_key_convert)
71 // x10 = number of rounds
75 // other general-purpose registers preserved
78 // other SIMD registers corrupted
87 eor v0.16b, v0.16b, v8.16b
88 eor v1.16b, v1.16b, v8.16b
89 eor v2.16b, v2.16b, v8.16b
90 eor v4.16b, v4.16b, v8.16b
91 eor v3.16b, v3.16b, v8.16b
92 eor v5.16b, v5.16b, v8.16b
93 tbl v0.16b, {v0.16b}, v10.16b
94 tbl v1.16b, {v1.16b}, v10.16b
95 tbl v2.16b, {v2.16b}, v10.16b
96 tbl v4.16b, {v4.16b}, v10.16b
97 eor v6.16b, v6.16b, v8.16b
98 eor v7.16b, v7.16b, v8.16b
99 tbl v3.16b, {v3.16b}, v10.16b
100 tbl v5.16b, {v5.16b}, v10.16b
101 tbl v6.16b, {v6.16b}, v10.16b
102 ushr v8.2d, v0.2d, #1
103 tbl v7.16b, {v7.16b}, v10.16b
104 ushr v10.2d, v4.2d, #1
105 ushr v18.2d, v2.2d, #1
106 eor v8.16b, v8.16b, v1.16b
107 ushr v19.2d, v6.2d, #1
108 eor v10.16b, v10.16b, v5.16b
109 eor v18.16b, v18.16b, v3.16b
110 and v8.16b, v8.16b, v9.16b
111 eor v19.16b, v19.16b, v7.16b
112 and v10.16b, v10.16b, v9.16b
113 and v18.16b, v18.16b, v9.16b
114 eor v1.16b, v1.16b, v8.16b
116 and v9.16b, v19.16b, v9.16b
117 eor v5.16b, v5.16b, v10.16b
118 shl v10.2d, v10.2d, #1
119 eor v3.16b, v3.16b, v18.16b
120 shl v18.2d, v18.2d, #1
121 eor v0.16b, v0.16b, v8.16b
123 eor v7.16b, v7.16b, v9.16b
124 eor v4.16b, v4.16b, v10.16b
125 eor v2.16b, v2.16b, v18.16b
126 ushr v9.2d, v1.2d, #2
127 eor v6.16b, v6.16b, v8.16b
128 ushr v8.2d, v0.2d, #2
129 ushr v10.2d, v5.2d, #2
130 ushr v18.2d, v4.2d, #2
131 eor v9.16b, v9.16b, v3.16b
132 eor v8.16b, v8.16b, v2.16b
133 eor v10.16b, v10.16b, v7.16b
134 eor v18.16b, v18.16b, v6.16b
135 and v9.16b, v9.16b, v16.16b
136 and v8.16b, v8.16b, v16.16b
137 and v10.16b, v10.16b, v16.16b
138 and v16.16b, v18.16b, v16.16b
139 eor v3.16b, v3.16b, v9.16b
141 eor v2.16b, v2.16b, v8.16b
143 eor v7.16b, v7.16b, v10.16b
144 shl v10.2d, v10.2d, #2
145 eor v6.16b, v6.16b, v16.16b
146 shl v16.2d, v16.2d, #2
147 eor v1.16b, v1.16b, v9.16b
148 eor v0.16b, v0.16b, v8.16b
149 eor v5.16b, v5.16b, v10.16b
150 eor v4.16b, v4.16b, v16.16b
151 ushr v8.2d, v3.2d, #4
152 ushr v9.2d, v2.2d, #4
153 ushr v10.2d, v1.2d, #4
154 ushr v16.2d, v0.2d, #4
155 eor v8.16b, v8.16b, v7.16b
156 eor v9.16b, v9.16b, v6.16b
157 eor v10.16b, v10.16b, v5.16b
158 eor v16.16b, v16.16b, v4.16b
159 and v8.16b, v8.16b, v17.16b
160 and v9.16b, v9.16b, v17.16b
161 and v10.16b, v10.16b, v17.16b
162 and v16.16b, v16.16b, v17.16b
163 eor v7.16b, v7.16b, v8.16b
165 eor v6.16b, v6.16b, v9.16b
167 eor v5.16b, v5.16b, v10.16b
168 shl v10.2d, v10.2d, #4
169 eor v4.16b, v4.16b, v16.16b
170 shl v16.2d, v16.2d, #4
171 eor v3.16b, v3.16b, v8.16b
172 eor v2.16b, v2.16b, v9.16b
173 eor v1.16b, v1.16b, v10.16b
174 eor v0.16b, v0.16b, v16.16b
178 ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64
179 ldp q8, q9, [x9], #32
180 eor v0.16b, v16.16b, v0.16b
182 eor v1.16b, v17.16b, v1.16b
184 eor v2.16b, v18.16b, v2.16b
185 eor v3.16b, v19.16b, v3.16b
186 eor v4.16b, v8.16b, v4.16b
187 eor v5.16b, v9.16b, v5.16b
188 eor v6.16b, v10.16b, v6.16b
189 eor v7.16b, v16.16b, v7.16b
190 tbl v0.16b, {v0.16b}, v28.16b
191 tbl v1.16b, {v1.16b}, v28.16b
192 tbl v2.16b, {v2.16b}, v28.16b
193 tbl v3.16b, {v3.16b}, v28.16b
194 tbl v4.16b, {v4.16b}, v28.16b
195 tbl v5.16b, {v5.16b}, v28.16b
196 tbl v6.16b, {v6.16b}, v28.16b
197 tbl v7.16b, {v7.16b}, v28.16b
199 eor v1.16b, v1.16b, v4.16b
200 eor v3.16b, v3.16b, v4.16b
202 eor v4.16b, v4.16b, v7.16b
203 eor v2.16b, v2.16b, v7.16b
204 eor v1.16b, v1.16b, v6.16b
205 eor v6.16b, v6.16b, v4.16b
206 eor v2.16b, v2.16b, v5.16b
207 eor v0.16b, v0.16b, v1.16b
208 eor v7.16b, v7.16b, v6.16b
209 eor v8.16b, v6.16b, v2.16b
210 and v9.16b, v4.16b, v6.16b
211 eor v10.16b, v2.16b, v6.16b
212 eor v3.16b, v3.16b, v0.16b
213 eor v5.16b, v5.16b, v0.16b
214 eor v16.16b, v7.16b, v4.16b
215 eor v17.16b, v4.16b, v0.16b
216 and v18.16b, v0.16b, v2.16b
217 eor v19.16b, v7.16b, v4.16b
218 eor v1.16b, v1.16b, v3.16b
219 eor v20.16b, v3.16b, v0.16b
220 eor v21.16b, v5.16b, v2.16b
221 eor v22.16b, v3.16b, v7.16b
222 and v8.16b, v17.16b, v8.16b
223 orr v17.16b, v3.16b, v5.16b
224 eor v23.16b, v1.16b, v6.16b
225 eor v24.16b, v20.16b, v16.16b
226 eor v25.16b, v1.16b, v5.16b
227 orr v26.16b, v20.16b, v21.16b
228 and v20.16b, v20.16b, v21.16b
229 and v27.16b, v7.16b, v1.16b
230 eor v21.16b, v21.16b, v23.16b
231 orr v28.16b, v16.16b, v23.16b
232 orr v29.16b, v22.16b, v25.16b
233 eor v26.16b, v26.16b, v8.16b
234 and v16.16b, v16.16b, v23.16b
235 and v22.16b, v22.16b, v25.16b
236 and v21.16b, v24.16b, v21.16b
237 eor v8.16b, v28.16b, v8.16b
238 eor v23.16b, v5.16b, v2.16b
239 eor v24.16b, v1.16b, v6.16b
240 eor v16.16b, v16.16b, v22.16b
241 eor v22.16b, v3.16b, v0.16b
242 eor v25.16b, v29.16b, v21.16b
243 eor v21.16b, v26.16b, v21.16b
244 eor v8.16b, v8.16b, v20.16b
245 eor v26.16b, v23.16b, v24.16b
246 eor v16.16b, v16.16b, v20.16b
247 eor v28.16b, v22.16b, v19.16b
248 eor v20.16b, v25.16b, v20.16b
249 eor v9.16b, v21.16b, v9.16b
250 eor v8.16b, v8.16b, v18.16b
251 eor v18.16b, v5.16b, v1.16b
252 eor v21.16b, v16.16b, v17.16b
253 eor v16.16b, v16.16b, v17.16b
254 eor v17.16b, v20.16b, v27.16b
255 eor v20.16b, v3.16b, v7.16b
256 eor v25.16b, v9.16b, v8.16b
257 eor v27.16b, v0.16b, v4.16b
258 and v29.16b, v9.16b, v17.16b
259 eor v30.16b, v8.16b, v29.16b
260 eor v31.16b, v21.16b, v29.16b
261 eor v29.16b, v21.16b, v29.16b
262 bsl v30.16b, v17.16b, v21.16b
263 bsl v31.16b, v9.16b, v8.16b
264 bsl v16.16b, v30.16b, v29.16b
265 bsl v21.16b, v29.16b, v30.16b
266 eor v8.16b, v31.16b, v30.16b
267 and v1.16b, v1.16b, v31.16b
268 and v9.16b, v16.16b, v31.16b
269 and v6.16b, v6.16b, v30.16b
270 eor v16.16b, v17.16b, v21.16b
271 and v4.16b, v4.16b, v30.16b
272 eor v17.16b, v8.16b, v30.16b
273 and v21.16b, v24.16b, v8.16b
274 eor v9.16b, v9.16b, v25.16b
275 and v19.16b, v19.16b, v8.16b
276 eor v24.16b, v30.16b, v16.16b
277 eor v25.16b, v30.16b, v16.16b
278 and v7.16b, v7.16b, v17.16b
279 and v10.16b, v10.16b, v16.16b
280 eor v29.16b, v9.16b, v16.16b
281 eor v30.16b, v31.16b, v9.16b
282 and v0.16b, v24.16b, v0.16b
283 and v9.16b, v18.16b, v9.16b
284 and v2.16b, v25.16b, v2.16b
285 eor v10.16b, v10.16b, v6.16b
286 eor v18.16b, v29.16b, v16.16b
287 and v5.16b, v30.16b, v5.16b
288 eor v24.16b, v8.16b, v29.16b
289 and v25.16b, v26.16b, v29.16b
290 and v26.16b, v28.16b, v29.16b
291 eor v8.16b, v8.16b, v29.16b
292 eor v17.16b, v17.16b, v18.16b
293 eor v5.16b, v1.16b, v5.16b
294 and v23.16b, v24.16b, v23.16b
295 eor v21.16b, v21.16b, v25.16b
296 eor v19.16b, v19.16b, v26.16b
297 eor v0.16b, v4.16b, v0.16b
298 and v3.16b, v17.16b, v3.16b
299 eor v1.16b, v9.16b, v1.16b
300 eor v9.16b, v25.16b, v23.16b
301 eor v5.16b, v5.16b, v21.16b
302 eor v2.16b, v6.16b, v2.16b
303 and v6.16b, v8.16b, v22.16b
304 eor v3.16b, v7.16b, v3.16b
305 and v8.16b, v20.16b, v18.16b
306 eor v10.16b, v10.16b, v9.16b
307 eor v0.16b, v0.16b, v19.16b
308 eor v9.16b, v1.16b, v9.16b
309 eor v1.16b, v2.16b, v21.16b
310 eor v3.16b, v3.16b, v19.16b
311 and v16.16b, v27.16b, v16.16b
312 eor v17.16b, v26.16b, v6.16b
313 eor v6.16b, v8.16b, v7.16b
314 eor v7.16b, v1.16b, v9.16b
315 eor v1.16b, v5.16b, v3.16b
316 eor v2.16b, v10.16b, v3.16b
317 eor v4.16b, v16.16b, v4.16b
318 eor v8.16b, v6.16b, v17.16b
319 eor v5.16b, v9.16b, v3.16b
320 eor v9.16b, v0.16b, v1.16b
321 eor v6.16b, v7.16b, v1.16b
322 eor v0.16b, v4.16b, v17.16b
323 eor v4.16b, v8.16b, v7.16b
324 eor v7.16b, v9.16b, v2.16b
325 eor v8.16b, v3.16b, v0.16b
326 eor v7.16b, v7.16b, v5.16b
327 eor v3.16b, v4.16b, v7.16b
328 eor v4.16b, v7.16b, v0.16b
329 eor v7.16b, v8.16b, v3.16b
331 ext v8.16b, v0.16b, v0.16b, #8
332 ext v9.16b, v1.16b, v1.16b, #8
333 ldr q28, [x11] // load from .LISR in common case (x10 > 0)
334 ext v10.16b, v6.16b, v6.16b, #8
335 ext v16.16b, v3.16b, v3.16b, #8
336 ext v17.16b, v5.16b, v5.16b, #8
337 ext v18.16b, v4.16b, v4.16b, #8
338 eor v8.16b, v8.16b, v0.16b
339 eor v9.16b, v9.16b, v1.16b
340 eor v10.16b, v10.16b, v6.16b
341 eor v16.16b, v16.16b, v3.16b
342 eor v17.16b, v17.16b, v5.16b
343 ext v19.16b, v2.16b, v2.16b, #8
344 ext v20.16b, v7.16b, v7.16b, #8
345 eor v18.16b, v18.16b, v4.16b
346 eor v6.16b, v6.16b, v8.16b
347 eor v8.16b, v2.16b, v10.16b
348 eor v4.16b, v4.16b, v9.16b
349 eor v2.16b, v19.16b, v2.16b
350 eor v9.16b, v20.16b, v7.16b
351 eor v0.16b, v0.16b, v16.16b
352 eor v1.16b, v1.16b, v16.16b
353 eor v6.16b, v6.16b, v17.16b
354 eor v8.16b, v8.16b, v16.16b
355 eor v7.16b, v7.16b, v18.16b
356 eor v4.16b, v4.16b, v16.16b
357 eor v2.16b, v3.16b, v2.16b
358 eor v1.16b, v1.16b, v17.16b
359 eor v3.16b, v5.16b, v9.16b
360 eor v5.16b, v8.16b, v17.16b
361 eor v7.16b, v7.16b, v17.16b
362 ext v8.16b, v0.16b, v0.16b, #12
363 ext v9.16b, v6.16b, v6.16b, #12
364 ext v10.16b, v4.16b, v4.16b, #12
365 ext v16.16b, v1.16b, v1.16b, #12
366 ext v17.16b, v5.16b, v5.16b, #12
367 ext v18.16b, v7.16b, v7.16b, #12
368 eor v0.16b, v0.16b, v8.16b
369 eor v6.16b, v6.16b, v9.16b
370 eor v4.16b, v4.16b, v10.16b
371 ext v19.16b, v2.16b, v2.16b, #12
372 ext v20.16b, v3.16b, v3.16b, #12
373 eor v1.16b, v1.16b, v16.16b
374 eor v5.16b, v5.16b, v17.16b
375 eor v7.16b, v7.16b, v18.16b
376 eor v2.16b, v2.16b, v19.16b
377 eor v16.16b, v16.16b, v0.16b
378 eor v3.16b, v3.16b, v20.16b
379 eor v17.16b, v17.16b, v4.16b
380 eor v10.16b, v10.16b, v6.16b
381 ext v0.16b, v0.16b, v0.16b, #8
382 eor v9.16b, v9.16b, v1.16b
383 ext v1.16b, v1.16b, v1.16b, #8
384 eor v8.16b, v8.16b, v3.16b
385 eor v16.16b, v16.16b, v3.16b
386 eor v18.16b, v18.16b, v5.16b
387 eor v19.16b, v19.16b, v7.16b
388 ext v21.16b, v5.16b, v5.16b, #8
389 ext v5.16b, v7.16b, v7.16b, #8
390 eor v7.16b, v20.16b, v2.16b
391 ext v4.16b, v4.16b, v4.16b, #8
392 ext v20.16b, v3.16b, v3.16b, #8
393 eor v17.16b, v17.16b, v3.16b
394 ext v2.16b, v2.16b, v2.16b, #8
395 eor v3.16b, v10.16b, v3.16b
396 ext v10.16b, v6.16b, v6.16b, #8
397 eor v0.16b, v0.16b, v8.16b
398 eor v1.16b, v1.16b, v16.16b
399 eor v5.16b, v5.16b, v18.16b
400 eor v3.16b, v3.16b, v4.16b
401 eor v7.16b, v20.16b, v7.16b
402 eor v6.16b, v2.16b, v19.16b
403 eor v4.16b, v21.16b, v17.16b
404 eor v2.16b, v10.16b, v9.16b
406 ldr q28, [x11, #16]! // load from .LISRM0 on last round (x10 == 0)
410 ushr v8.2d, v0.2d, #1
413 ushr v16.2d, v2.2d, #1
415 ushr v18.2d, v6.2d, #1
417 eor v8.16b, v8.16b, v1.16b
418 ushr v20.2d, v3.2d, #1
419 eor v16.16b, v16.16b, v7.16b
420 eor v18.16b, v18.16b, v4.16b
421 and v8.16b, v8.16b, v9.16b
422 eor v20.16b, v20.16b, v5.16b
423 and v16.16b, v16.16b, v9.16b
424 and v18.16b, v18.16b, v9.16b
425 shl v21.2d, v8.2d, #1
426 eor v1.16b, v1.16b, v8.16b
427 and v8.16b, v20.16b, v9.16b
428 eor v7.16b, v7.16b, v16.16b
429 shl v9.2d, v16.2d, #1
430 eor v4.16b, v4.16b, v18.16b
431 shl v16.2d, v18.2d, #1
432 eor v0.16b, v0.16b, v21.16b
433 shl v18.2d, v8.2d, #1
434 eor v5.16b, v5.16b, v8.16b
435 eor v2.16b, v2.16b, v9.16b
436 eor v6.16b, v6.16b, v16.16b
437 ushr v8.2d, v1.2d, #2
438 eor v3.16b, v3.16b, v18.16b
439 ushr v9.2d, v0.2d, #2
440 ushr v16.2d, v7.2d, #2
441 ushr v18.2d, v2.2d, #2
442 eor v8.16b, v8.16b, v4.16b
443 eor v9.16b, v9.16b, v6.16b
444 eor v16.16b, v16.16b, v5.16b
445 eor v18.16b, v18.16b, v3.16b
446 and v8.16b, v8.16b, v17.16b
447 and v9.16b, v9.16b, v17.16b
448 and v16.16b, v16.16b, v17.16b
449 and v17.16b, v18.16b, v17.16b
450 eor v4.16b, v4.16b, v8.16b
452 eor v6.16b, v6.16b, v9.16b
454 eor v5.16b, v5.16b, v16.16b
455 shl v16.2d, v16.2d, #2
456 eor v3.16b, v3.16b, v17.16b
457 shl v17.2d, v17.2d, #2
458 eor v1.16b, v1.16b, v8.16b
459 eor v0.16b, v0.16b, v9.16b
460 eor v7.16b, v7.16b, v16.16b
461 eor v2.16b, v2.16b, v17.16b
462 ushr v8.2d, v4.2d, #4
463 ushr v9.2d, v6.2d, #4
464 ushr v16.2d, v1.2d, #4
465 ushr v17.2d, v0.2d, #4
466 eor v8.16b, v8.16b, v5.16b
467 eor v9.16b, v9.16b, v3.16b
468 eor v16.16b, v16.16b, v7.16b
469 eor v17.16b, v17.16b, v2.16b
470 and v8.16b, v8.16b, v19.16b
471 and v9.16b, v9.16b, v19.16b
472 and v16.16b, v16.16b, v19.16b
473 and v17.16b, v17.16b, v19.16b
474 eor v5.16b, v5.16b, v8.16b
476 eor v3.16b, v3.16b, v9.16b
478 eor v7.16b, v7.16b, v16.16b
479 shl v16.2d, v16.2d, #4
480 eor v2.16b, v2.16b, v17.16b
481 shl v17.2d, v17.2d, #4
482 eor v4.16b, v4.16b, v8.16b
483 eor v6.16b, v6.16b, v9.16b
484 eor v7.16b, v7.16b, v10.16b
485 eor v1.16b, v1.16b, v16.16b
486 eor v2.16b, v2.16b, v10.16b
487 eor v0.16b, v0.16b, v17.16b
488 eor v4.16b, v4.16b, v10.16b
489 eor v6.16b, v6.16b, v10.16b
490 eor v3.16b, v3.16b, v10.16b
491 eor v5.16b, v5.16b, v10.16b
492 eor v1.16b, v1.16b, v10.16b
493 eor v0.16b, v0.16b, v10.16b
495 .size _bsaes_decrypt8,.-_bsaes_decrypt8
497 .type _bsaes_const,%object
500 // InvShiftRows constants
501 // Used in _bsaes_decrypt8, which assumes contiguity
502 // .LM0ISR used with round 0 key
503 // .LISR used with middle round keys
504 // .LISRM0 used with final round key
506 .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
508 .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
510 .quad 0x01040b0e0205080f, 0x0306090c00070a0d
512 // ShiftRows constants
513 // Used in _bsaes_encrypt8, which assumes contiguity
514 // .LM0SR used with round 0 key
515 // .LSR used with middle round keys
516 // .LSRM0 used with final round key
518 .quad 0x0a0e02060f03070b, 0x0004080c05090d01
520 .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
522 .quad 0x0304090e00050a0f, 0x01060b0c0207080d
525 .quad 0x02060a0e03070b0f, 0x0004080c0105090d
527 .quad 0x0105090d0004080c, 0x03070b0f02060a0e
529 // Used in ossl_bsaes_ctr32_encrypt_blocks, prior to dropping into
530 // _bsaes_encrypt8_alt, for round 0 key in place of .LM0SR
532 .quad 0x090d01050c000408, 0x03070b0f060a0e02
535 .size _bsaes_const,.-_bsaes_const
537 .type _bsaes_encrypt8,%function
540 // x9 -> key (previously expanded using _bsaes_key_convert)
541 // x10 = number of rounds
545 // other general-purpose registers preserved
548 // other SIMD registers corrupted
554 eor v0.16b, v0.16b, v8.16b
555 eor v1.16b, v1.16b, v8.16b
557 eor v2.16b, v2.16b, v8.16b
558 eor v4.16b, v4.16b, v8.16b
559 eor v3.16b, v3.16b, v8.16b
560 eor v5.16b, v5.16b, v8.16b
561 tbl v0.16b, {v0.16b}, v9.16b
562 tbl v1.16b, {v1.16b}, v9.16b
563 tbl v2.16b, {v2.16b}, v9.16b
564 tbl v4.16b, {v4.16b}, v9.16b
565 eor v6.16b, v6.16b, v8.16b
566 eor v7.16b, v7.16b, v8.16b
567 tbl v3.16b, {v3.16b}, v9.16b
568 tbl v5.16b, {v5.16b}, v9.16b
569 tbl v6.16b, {v6.16b}, v9.16b
570 ushr v8.2d, v0.2d, #1
572 tbl v7.16b, {v7.16b}, v9.16b
573 ushr v9.2d, v4.2d, #1
575 ushr v17.2d, v2.2d, #1
576 eor v8.16b, v8.16b, v1.16b
578 ushr v19.2d, v6.2d, #1
579 eor v9.16b, v9.16b, v5.16b
580 eor v17.16b, v17.16b, v3.16b
581 and v8.16b, v8.16b, v10.16b
582 eor v19.16b, v19.16b, v7.16b
583 and v9.16b, v9.16b, v10.16b
584 and v17.16b, v17.16b, v10.16b
585 eor v1.16b, v1.16b, v8.16b
587 and v10.16b, v19.16b, v10.16b
588 eor v5.16b, v5.16b, v9.16b
590 eor v3.16b, v3.16b, v17.16b
591 shl v17.2d, v17.2d, #1
592 eor v0.16b, v0.16b, v8.16b
593 shl v8.2d, v10.2d, #1
594 eor v7.16b, v7.16b, v10.16b
595 eor v4.16b, v4.16b, v9.16b
596 eor v2.16b, v2.16b, v17.16b
597 ushr v9.2d, v1.2d, #2
598 eor v6.16b, v6.16b, v8.16b
599 ushr v8.2d, v0.2d, #2
600 ushr v10.2d, v5.2d, #2
601 ushr v17.2d, v4.2d, #2
602 eor v9.16b, v9.16b, v3.16b
603 eor v8.16b, v8.16b, v2.16b
604 eor v10.16b, v10.16b, v7.16b
605 eor v17.16b, v17.16b, v6.16b
606 and v9.16b, v9.16b, v16.16b
607 and v8.16b, v8.16b, v16.16b
608 and v10.16b, v10.16b, v16.16b
609 and v16.16b, v17.16b, v16.16b
610 eor v3.16b, v3.16b, v9.16b
612 eor v2.16b, v2.16b, v8.16b
614 eor v7.16b, v7.16b, v10.16b
615 shl v10.2d, v10.2d, #2
616 eor v6.16b, v6.16b, v16.16b
617 shl v16.2d, v16.2d, #2
618 eor v1.16b, v1.16b, v9.16b
619 eor v0.16b, v0.16b, v8.16b
620 eor v5.16b, v5.16b, v10.16b
621 eor v4.16b, v4.16b, v16.16b
622 ushr v8.2d, v3.2d, #4
623 ushr v9.2d, v2.2d, #4
624 ushr v10.2d, v1.2d, #4
625 ushr v16.2d, v0.2d, #4
626 eor v8.16b, v8.16b, v7.16b
627 eor v9.16b, v9.16b, v6.16b
628 eor v10.16b, v10.16b, v5.16b
629 eor v16.16b, v16.16b, v4.16b
630 and v8.16b, v8.16b, v18.16b
631 and v9.16b, v9.16b, v18.16b
632 and v10.16b, v10.16b, v18.16b
633 and v16.16b, v16.16b, v18.16b
634 eor v7.16b, v7.16b, v8.16b
636 eor v6.16b, v6.16b, v9.16b
638 eor v5.16b, v5.16b, v10.16b
639 shl v10.2d, v10.2d, #4
640 eor v4.16b, v4.16b, v16.16b
641 shl v16.2d, v16.2d, #4
642 eor v3.16b, v3.16b, v8.16b
643 eor v2.16b, v2.16b, v9.16b
644 eor v1.16b, v1.16b, v10.16b
645 eor v0.16b, v0.16b, v16.16b
649 ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64
650 ldp q8, q9, [x9], #32
651 eor v0.16b, v16.16b, v0.16b
653 eor v1.16b, v17.16b, v1.16b
655 eor v2.16b, v18.16b, v2.16b
656 eor v3.16b, v19.16b, v3.16b
657 eor v4.16b, v8.16b, v4.16b
658 eor v5.16b, v9.16b, v5.16b
659 eor v6.16b, v10.16b, v6.16b
660 eor v7.16b, v16.16b, v7.16b
661 tbl v0.16b, {v0.16b}, v28.16b
662 tbl v1.16b, {v1.16b}, v28.16b
663 tbl v2.16b, {v2.16b}, v28.16b
664 tbl v3.16b, {v3.16b}, v28.16b
665 tbl v4.16b, {v4.16b}, v28.16b
666 tbl v5.16b, {v5.16b}, v28.16b
667 tbl v6.16b, {v6.16b}, v28.16b
668 tbl v7.16b, {v7.16b}, v28.16b
670 eor v5.16b, v5.16b, v6.16b
671 eor v3.16b, v3.16b, v0.16b
673 eor v2.16b, v2.16b, v1.16b
674 eor v5.16b, v5.16b, v0.16b
675 eor v8.16b, v3.16b, v7.16b
676 eor v6.16b, v6.16b, v2.16b
677 eor v7.16b, v7.16b, v5.16b
678 eor v8.16b, v8.16b, v4.16b
679 eor v3.16b, v6.16b, v3.16b
680 eor v4.16b, v4.16b, v5.16b
681 eor v6.16b, v1.16b, v5.16b
682 eor v2.16b, v2.16b, v7.16b
683 eor v1.16b, v8.16b, v1.16b
684 eor v8.16b, v7.16b, v4.16b
685 eor v9.16b, v3.16b, v0.16b
686 eor v10.16b, v7.16b, v6.16b
687 eor v16.16b, v5.16b, v3.16b
688 eor v17.16b, v6.16b, v2.16b
689 eor v18.16b, v5.16b, v1.16b
690 eor v19.16b, v2.16b, v4.16b
691 eor v20.16b, v1.16b, v0.16b
692 orr v21.16b, v8.16b, v9.16b
693 orr v22.16b, v10.16b, v16.16b
694 eor v23.16b, v8.16b, v17.16b
695 eor v24.16b, v9.16b, v18.16b
696 and v19.16b, v19.16b, v20.16b
697 orr v20.16b, v17.16b, v18.16b
698 and v8.16b, v8.16b, v9.16b
699 and v9.16b, v17.16b, v18.16b
700 and v17.16b, v23.16b, v24.16b
701 and v10.16b, v10.16b, v16.16b
702 eor v16.16b, v21.16b, v19.16b
703 eor v18.16b, v20.16b, v19.16b
704 and v19.16b, v2.16b, v1.16b
705 and v20.16b, v6.16b, v5.16b
706 eor v21.16b, v22.16b, v17.16b
707 eor v9.16b, v9.16b, v10.16b
708 eor v10.16b, v16.16b, v17.16b
709 eor v16.16b, v18.16b, v8.16b
710 and v17.16b, v4.16b, v0.16b
711 orr v18.16b, v7.16b, v3.16b
712 eor v21.16b, v21.16b, v8.16b
713 eor v8.16b, v9.16b, v8.16b
714 eor v9.16b, v10.16b, v19.16b
715 eor v10.16b, v3.16b, v0.16b
716 eor v16.16b, v16.16b, v17.16b
717 eor v17.16b, v5.16b, v1.16b
718 eor v19.16b, v21.16b, v20.16b
719 eor v20.16b, v8.16b, v18.16b
720 eor v8.16b, v8.16b, v18.16b
721 eor v18.16b, v7.16b, v4.16b
722 eor v21.16b, v9.16b, v16.16b
723 eor v22.16b, v6.16b, v2.16b
724 and v23.16b, v9.16b, v19.16b
725 eor v24.16b, v10.16b, v17.16b
726 eor v25.16b, v0.16b, v1.16b
727 eor v26.16b, v7.16b, v6.16b
728 eor v27.16b, v18.16b, v22.16b
729 eor v28.16b, v3.16b, v5.16b
730 eor v29.16b, v16.16b, v23.16b
731 eor v30.16b, v20.16b, v23.16b
732 eor v23.16b, v20.16b, v23.16b
733 eor v31.16b, v4.16b, v2.16b
734 bsl v29.16b, v19.16b, v20.16b
735 bsl v30.16b, v9.16b, v16.16b
736 bsl v8.16b, v29.16b, v23.16b
737 bsl v20.16b, v23.16b, v29.16b
738 eor v9.16b, v30.16b, v29.16b
739 and v5.16b, v5.16b, v30.16b
740 and v8.16b, v8.16b, v30.16b
741 and v1.16b, v1.16b, v29.16b
742 eor v16.16b, v19.16b, v20.16b
743 and v2.16b, v2.16b, v29.16b
744 eor v19.16b, v9.16b, v29.16b
745 and v17.16b, v17.16b, v9.16b
746 eor v8.16b, v8.16b, v21.16b
747 and v20.16b, v22.16b, v9.16b
748 eor v21.16b, v29.16b, v16.16b
749 eor v22.16b, v29.16b, v16.16b
750 and v23.16b, v25.16b, v16.16b
751 and v6.16b, v6.16b, v19.16b
752 eor v25.16b, v8.16b, v16.16b
753 eor v29.16b, v30.16b, v8.16b
754 and v4.16b, v21.16b, v4.16b
755 and v8.16b, v28.16b, v8.16b
756 and v0.16b, v22.16b, v0.16b
757 eor v21.16b, v23.16b, v1.16b
758 eor v22.16b, v9.16b, v25.16b
759 eor v9.16b, v9.16b, v25.16b
760 eor v23.16b, v25.16b, v16.16b
761 and v3.16b, v29.16b, v3.16b
762 and v24.16b, v24.16b, v25.16b
763 and v25.16b, v27.16b, v25.16b
764 and v10.16b, v22.16b, v10.16b
765 and v9.16b, v9.16b, v18.16b
766 eor v18.16b, v19.16b, v23.16b
767 and v19.16b, v26.16b, v23.16b
768 eor v3.16b, v5.16b, v3.16b
769 eor v17.16b, v17.16b, v24.16b
770 eor v10.16b, v24.16b, v10.16b
771 and v16.16b, v31.16b, v16.16b
772 eor v20.16b, v20.16b, v25.16b
773 eor v9.16b, v25.16b, v9.16b
774 eor v4.16b, v2.16b, v4.16b
775 and v7.16b, v18.16b, v7.16b
776 eor v18.16b, v19.16b, v6.16b
777 eor v5.16b, v8.16b, v5.16b
778 eor v0.16b, v1.16b, v0.16b
779 eor v1.16b, v21.16b, v10.16b
780 eor v8.16b, v3.16b, v17.16b
781 eor v2.16b, v16.16b, v2.16b
782 eor v3.16b, v6.16b, v7.16b
783 eor v6.16b, v18.16b, v9.16b
784 eor v4.16b, v4.16b, v20.16b
785 eor v10.16b, v5.16b, v10.16b
786 eor v0.16b, v0.16b, v17.16b
787 eor v9.16b, v2.16b, v9.16b
788 eor v3.16b, v3.16b, v20.16b
789 eor v7.16b, v6.16b, v1.16b
790 eor v5.16b, v8.16b, v4.16b
791 eor v6.16b, v10.16b, v1.16b
792 eor v2.16b, v4.16b, v0.16b
793 eor v4.16b, v3.16b, v10.16b
794 eor v9.16b, v9.16b, v7.16b
795 eor v3.16b, v0.16b, v5.16b
796 eor v0.16b, v1.16b, v4.16b
797 eor v1.16b, v4.16b, v8.16b
798 eor v4.16b, v9.16b, v5.16b
799 eor v6.16b, v6.16b, v3.16b
801 ext v8.16b, v0.16b, v0.16b, #12
802 ext v9.16b, v4.16b, v4.16b, #12
804 ext v10.16b, v6.16b, v6.16b, #12
805 ext v16.16b, v1.16b, v1.16b, #12
806 ext v17.16b, v3.16b, v3.16b, #12
807 ext v18.16b, v7.16b, v7.16b, #12
808 eor v0.16b, v0.16b, v8.16b
809 eor v4.16b, v4.16b, v9.16b
810 eor v6.16b, v6.16b, v10.16b
811 ext v19.16b, v2.16b, v2.16b, #12
812 ext v20.16b, v5.16b, v5.16b, #12
813 eor v1.16b, v1.16b, v16.16b
814 eor v3.16b, v3.16b, v17.16b
815 eor v7.16b, v7.16b, v18.16b
816 eor v2.16b, v2.16b, v19.16b
817 eor v16.16b, v16.16b, v0.16b
818 eor v5.16b, v5.16b, v20.16b
819 eor v17.16b, v17.16b, v6.16b
820 eor v10.16b, v10.16b, v4.16b
821 ext v0.16b, v0.16b, v0.16b, #8
822 eor v9.16b, v9.16b, v1.16b
823 ext v1.16b, v1.16b, v1.16b, #8
824 eor v8.16b, v8.16b, v5.16b
825 eor v16.16b, v16.16b, v5.16b
826 eor v18.16b, v18.16b, v3.16b
827 eor v19.16b, v19.16b, v7.16b
828 ext v3.16b, v3.16b, v3.16b, #8
829 ext v7.16b, v7.16b, v7.16b, #8
830 eor v20.16b, v20.16b, v2.16b
831 ext v6.16b, v6.16b, v6.16b, #8
832 ext v21.16b, v5.16b, v5.16b, #8
833 eor v17.16b, v17.16b, v5.16b
834 ext v2.16b, v2.16b, v2.16b, #8
835 eor v10.16b, v10.16b, v5.16b
836 ext v22.16b, v4.16b, v4.16b, #8
837 eor v0.16b, v0.16b, v8.16b
838 eor v1.16b, v1.16b, v16.16b
839 eor v5.16b, v7.16b, v18.16b
840 eor v4.16b, v3.16b, v17.16b
841 eor v3.16b, v6.16b, v10.16b
842 eor v7.16b, v21.16b, v20.16b
843 eor v6.16b, v2.16b, v19.16b
844 eor v2.16b, v22.16b, v9.16b
846 ldr q28, [x11, #16]! // load from .LSRM0 on last round (x10 == 0)
850 ushr v8.2d, v0.2d, #1
853 ushr v16.2d, v3.2d, #1
855 ushr v18.2d, v4.2d, #1
857 eor v8.16b, v8.16b, v1.16b
858 ushr v20.2d, v2.2d, #1
859 eor v16.16b, v16.16b, v7.16b
860 eor v18.16b, v18.16b, v6.16b
861 and v8.16b, v8.16b, v9.16b
862 eor v20.16b, v20.16b, v5.16b
863 and v16.16b, v16.16b, v9.16b
864 and v18.16b, v18.16b, v9.16b
865 shl v21.2d, v8.2d, #1
866 eor v1.16b, v1.16b, v8.16b
867 and v8.16b, v20.16b, v9.16b
868 eor v7.16b, v7.16b, v16.16b
869 shl v9.2d, v16.2d, #1
870 eor v6.16b, v6.16b, v18.16b
871 shl v16.2d, v18.2d, #1
872 eor v0.16b, v0.16b, v21.16b
873 shl v18.2d, v8.2d, #1
874 eor v5.16b, v5.16b, v8.16b
875 eor v3.16b, v3.16b, v9.16b
876 eor v4.16b, v4.16b, v16.16b
877 ushr v8.2d, v1.2d, #2
878 eor v2.16b, v2.16b, v18.16b
879 ushr v9.2d, v0.2d, #2
880 ushr v16.2d, v7.2d, #2
881 ushr v18.2d, v3.2d, #2
882 eor v8.16b, v8.16b, v6.16b
883 eor v9.16b, v9.16b, v4.16b
884 eor v16.16b, v16.16b, v5.16b
885 eor v18.16b, v18.16b, v2.16b
886 and v8.16b, v8.16b, v17.16b
887 and v9.16b, v9.16b, v17.16b
888 and v16.16b, v16.16b, v17.16b
889 and v17.16b, v18.16b, v17.16b
890 eor v6.16b, v6.16b, v8.16b
892 eor v4.16b, v4.16b, v9.16b
894 eor v5.16b, v5.16b, v16.16b
895 shl v16.2d, v16.2d, #2
896 eor v2.16b, v2.16b, v17.16b
897 shl v17.2d, v17.2d, #2
898 eor v1.16b, v1.16b, v8.16b
899 eor v0.16b, v0.16b, v9.16b
900 eor v7.16b, v7.16b, v16.16b
901 eor v3.16b, v3.16b, v17.16b
902 ushr v8.2d, v6.2d, #4
903 ushr v9.2d, v4.2d, #4
904 ushr v16.2d, v1.2d, #4
905 ushr v17.2d, v0.2d, #4
906 eor v8.16b, v8.16b, v5.16b
907 eor v9.16b, v9.16b, v2.16b
908 eor v16.16b, v16.16b, v7.16b
909 eor v17.16b, v17.16b, v3.16b
910 and v8.16b, v8.16b, v19.16b
911 and v9.16b, v9.16b, v19.16b
912 and v16.16b, v16.16b, v19.16b
913 and v17.16b, v17.16b, v19.16b
914 eor v5.16b, v5.16b, v8.16b
916 eor v2.16b, v2.16b, v9.16b
918 eor v7.16b, v7.16b, v16.16b
919 shl v16.2d, v16.2d, #4
920 eor v3.16b, v3.16b, v17.16b
921 shl v17.2d, v17.2d, #4
922 eor v6.16b, v6.16b, v8.16b
923 eor v4.16b, v4.16b, v9.16b
924 eor v7.16b, v7.16b, v10.16b
925 eor v1.16b, v1.16b, v16.16b
926 eor v3.16b, v3.16b, v10.16b
927 eor v0.16b, v0.16b, v17.16b
928 eor v6.16b, v6.16b, v10.16b
929 eor v4.16b, v4.16b, v10.16b
930 eor v2.16b, v2.16b, v10.16b
931 eor v5.16b, v5.16b, v10.16b
932 eor v1.16b, v1.16b, v10.16b
933 eor v0.16b, v0.16b, v10.16b
935 .size _bsaes_encrypt8,.-_bsaes_encrypt8
937 .type _bsaes_key_convert,%function
940 // x9 -> input key (big-endian)
941 // x10 = number of rounds
942 // x17 -> output key (native endianness)
945 // x11 -> .LM0_bigendian
946 // x17 -> last quadword of output key
947 // other general-purpose registers preserved
951 // v15 = last round key (converted to native endianness)
952 // other SIMD registers corrupted
955 adr x11, .LM0_littleendian
957 adr x11, .LM0_bigendian
959 ldr q0, [x9], #16 // load round 0 key
960 ldr q1, [x11] // .LM0
961 ldr q15, [x9], #16 // load round 1 key
963 movi v7.16b, #0x63 // compose .L63
964 movi v16.16b, #0x01 // bit masks
977 str q0, [x17], #16 // save round 0 key
981 tbl v0.16b, {v15.16b}, v1.16b
982 ldr q15, [x9], #16 // load next round key
984 eor v0.16b, v0.16b, v7.16b
985 cmtst v24.16b, v0.16b, v16.16b
986 cmtst v25.16b, v0.16b, v17.16b
987 cmtst v26.16b, v0.16b, v18.16b
988 cmtst v27.16b, v0.16b, v19.16b
989 cmtst v28.16b, v0.16b, v20.16b
990 cmtst v29.16b, v0.16b, v21.16b
991 cmtst v30.16b, v0.16b, v22.16b
992 cmtst v31.16b, v0.16b, v23.16b
994 st1 {v24.16b-v27.16b}, [x17], #64 // write bit-sliced round key
995 st1 {v28.16b-v31.16b}, [x17], #64
998 // don't save last round key
1000 rev32 v15.16b, v15.16b
1001 adr x11, .LM0_bigendian
1004 .size _bsaes_key_convert,.-_bsaes_key_convert
1006 .globl ossl_bsaes_cbc_encrypt
1007 .type ossl_bsaes_cbc_encrypt,%function
1010 // x0 -> input ciphertext
1011 // x1 -> output plaintext
1012 // x2 = size of ciphertext and plaintext in bytes (assumed a multiple of 16)
1014 // x4 -> 128-bit initialisation vector (or preceding 128-bit block of ciphertext if continuing after an earlier call)
1017 // Output plaintext filled in
1018 // Initialisation vector overwritten with last quadword of ciphertext
1019 // No output registers, usual AAPCS64 register preservation
1020 ossl_bsaes_cbc_encrypt:
1026 // it is up to the caller to make sure we are called with enc == 0
1028 stp x29, x30, [sp, #-48]!
1029 stp d8, d9, [sp, #16]
1030 stp d10, d15, [sp, #32]
1031 lsr x2, x2, #4 // len in 16 byte blocks
1033 ldr w15, [x3, #240] // get # of rounds
1036 // allocate the key schedule on the stack
1038 sub x17, x17, x15, lsl #7 // 128 bytes per inner round key, less 96 bytes
1040 // populate the key schedule
1041 mov x9, x3 // pass key
1042 mov x10, x15 // pass # of rounds
1043 mov sp, x17 // sp is sp
1044 bl _bsaes_key_convert
1046 str q15, [x17] // save last round key
1047 eor v6.16b, v6.16b, v7.16b // fix up round 0 key (by XORing with 0x63)
1050 ldr q15, [x4] // load IV
1056 bmi .Lcbc_dec_loop_finish
1058 ldr q0, [x0], #16 // load input
1059 mov x9, sp // pass the key
1067 ldr q7, [x0], #-7*16
1071 ldr q16, [x0], #16 // reload input
1072 eor v0.16b, v0.16b, v15.16b // ^= IV
1073 eor v1.16b, v1.16b, v16.16b
1074 str q0, [x1], #16 // write output
1078 eor v1.16b, v4.16b, v1.16b
1080 eor v2.16b, v2.16b, v4.16b
1081 eor v0.16b, v6.16b, v0.16b
1085 eor v0.16b, v7.16b, v4.16b
1091 eor v0.16b, v5.16b, v2.16b
1092 eor v1.16b, v3.16b, v1.16b
1098 .Lcbc_dec_loop_finish:
1102 ldr q0, [x0], #16 // load input
1106 mov x9, sp // pass the key
1119 ldr q6, [x0], #-6*16
1123 ldr q5, [x0], #16 // reload input
1124 eor v0.16b, v0.16b, v15.16b // ^= IV
1128 str q0, [x1], #16 // write output
1130 eor v1.16b, v1.16b, v5.16b
1132 eor v6.16b, v6.16b, v8.16b
1134 eor v4.16b, v4.16b, v9.16b
1135 eor v2.16b, v2.16b, v10.16b
1137 eor v0.16b, v7.16b, v0.16b
1139 eor v1.16b, v3.16b, v5.16b
1149 ldr q3, [x0], #16 // reload input
1150 eor v0.16b, v0.16b, v15.16b // ^= IV
1154 str q0, [x1], #16 // write output
1156 eor v1.16b, v1.16b, v3.16b
1158 eor v3.16b, v6.16b, v5.16b
1159 eor v4.16b, v4.16b, v8.16b
1160 eor v2.16b, v2.16b, v9.16b
1162 eor v0.16b, v7.16b, v0.16b
1172 ldr q3, [x0], #16 // reload input
1173 eor v0.16b, v0.16b, v15.16b // ^= IV
1177 str q0, [x1], #16 // write output
1179 eor v0.16b, v1.16b, v3.16b
1180 eor v1.16b, v6.16b, v5.16b
1181 eor v3.16b, v4.16b, v7.16b
1183 eor v0.16b, v2.16b, v8.16b
1192 ldr q2, [x0], #16 // reload input
1193 eor v0.16b, v0.16b, v15.16b // ^= IV
1196 str q0, [x1], #16 // write output
1198 eor v0.16b, v1.16b, v2.16b
1199 eor v1.16b, v6.16b, v3.16b
1200 eor v2.16b, v4.16b, v5.16b
1209 ldr q2, [x0], #16 // reload input
1210 eor v0.16b, v0.16b, v15.16b // ^= IV
1213 str q0, [x1], #16 // write output
1214 eor v0.16b, v1.16b, v2.16b
1215 eor v1.16b, v6.16b, v3.16b
1223 ldr q2, [x0], #16 // reload input
1224 eor v0.16b, v0.16b, v15.16b // ^= IV
1226 str q0, [x1], #16 // write output
1227 eor v0.16b, v1.16b, v2.16b
1233 stp x1, x4, [sp, #-32]!
1240 ldp x1, x4, [sp], #32
1241 ldr q0, [x1] // load result
1242 eor v0.16b, v0.16b, v8.16b // ^= IV
1243 str q0, [x1] // write output
1249 .Lcbc_dec_bzero:// wipe key schedule [if any]
1250 stp q0, q1, [sp], #32
1253 str q15, [x4] // return IV
1254 ldp d8, d9, [sp, #16]
1255 ldp d10, d15, [sp, #32]
1256 ldp x29, x30, [sp], #48
1258 .size ossl_bsaes_cbc_encrypt,.-ossl_bsaes_cbc_encrypt
1260 .globl ossl_bsaes_ctr32_encrypt_blocks
1261 .type ossl_bsaes_ctr32_encrypt_blocks,%function
1264 // x0 -> input text (whole 16-byte blocks)
1265 // x1 -> output text (whole 16-byte blocks)
1266 // x2 = number of 16-byte blocks to encrypt/decrypt (> 0)
1268 // x4 -> initial value of 128-bit counter (stored big-endian) which increments, modulo 2^32, for each block
1270 // Output text filled in
1271 // No output registers, usual AAPCS64 register preservation
1272 ossl_bsaes_ctr32_encrypt_blocks:
1274 cmp x2, #8 // use plain AES for
1275 blo .Lctr_enc_short // small sizes
1277 stp x29, x30, [sp, #-80]!
1278 stp d8, d9, [sp, #16]
1279 stp d10, d11, [sp, #32]
1280 stp d12, d13, [sp, #48]
1281 stp d14, d15, [sp, #64]
1283 ldr w15, [x3, #240] // get # of rounds
1286 // allocate the key schedule on the stack
1288 sub x17, x17, x15, lsl #7 // 128 bytes per inner round key, less 96 bytes
1290 // populate the key schedule
1291 mov x9, x3 // pass key
1292 mov x10, x15 // pass # of rounds
1293 mov sp, x17 // sp is sp
1294 bl _bsaes_key_convert
1295 eor v7.16b, v7.16b, v15.16b // fix up last round key
1296 str q7, [x17] // save last round key
1298 ldr q0, [x4] // load counter
1299 add x13, x11, #.LREVM0SR-.LM0_bigendian
1300 ldr q4, [sp] // load round0 key
1302 movi v8.4s, #1 // compose 1<<96
1304 rev32 v15.16b, v0.16b
1305 rev32 v0.16b, v0.16b
1306 ext v11.16b, v9.16b, v8.16b, #4
1307 rev32 v4.16b, v4.16b
1308 add v12.4s, v11.4s, v11.4s // compose 2<<96
1309 str q4, [sp] // save adjusted round0 key
1310 add v13.4s, v11.4s, v12.4s // compose 3<<96
1311 add v14.4s, v12.4s, v12.4s // compose 4<<96
1316 // Intermix prologue from _bsaes_encrypt8 to use the opportunity
1317 // to flip byte order in 32-bit counter
1319 add v1.4s, v15.4s, v11.4s // +1
1320 add x9, sp, #0x10 // pass next round key
1321 add v2.4s, v15.4s, v12.4s // +2
1322 ldr q9, [x13] // .LREVM0SR
1323 ldr q8, [sp] // load round0 key
1324 add v3.4s, v15.4s, v13.4s // +3
1325 mov x10, x15 // pass rounds
1326 sub x11, x13, #.LREVM0SR-.LSR // pass constants
1327 add v6.4s, v2.4s, v14.4s
1328 add v4.4s, v15.4s, v14.4s // +4
1329 add v7.4s, v3.4s, v14.4s
1330 add v15.4s, v4.4s, v14.4s // next counter
1331 add v5.4s, v1.4s, v14.4s
1333 bl _bsaes_encrypt8_alt
1336 blo .Lctr_enc_loop_done
1340 eor v1.16b, v1.16b, v17.16b
1342 eor v0.16b, v0.16b, v16.16b
1343 eor v4.16b, v4.16b, v17.16b
1350 eor v4.16b, v6.16b, v16.16b
1351 eor v1.16b, v3.16b, v1.16b
1353 eor v3.16b, v7.16b, v3.16b
1355 eor v2.16b, v2.16b, v6.16b
1357 eor v5.16b, v5.16b, v6.16b
1368 .Lctr_enc_loop_done:
1370 ldr q16, [x0], #16 // load input
1371 eor v0.16b, v0.16b, v16.16b
1372 str q0, [x1], #16 // write output
1376 eor v1.16b, v1.16b, v17.16b
1380 eor v4.16b, v4.16b, v18.16b
1385 eor v6.16b, v6.16b, v19.16b
1389 eor v3.16b, v3.16b, v20.16b
1394 eor v7.16b, v7.16b, v21.16b
1398 eor v2.16b, v2.16b, v22.16b
1404 .Lctr_enc_bzero: // wipe key schedule [if any]
1405 stp q0, q1, [sp], #32
1409 ldp d8, d9, [sp, #16]
1410 ldp d10, d11, [sp, #32]
1411 ldp d12, d13, [sp, #48]
1412 ldp d14, d15, [sp, #64]
1413 ldp x29, x30, [sp], #80
1417 stp x29, x30, [sp, #-96]!
1418 stp x19, x20, [sp, #16]
1419 stp x21, x22, [sp, #32]
1422 mov x19, x0 // copy arguments
1426 ldr w23, [x4, #12] // load counter .LSW
1427 ldr q1, [x4] // load whole counter value
1428 #ifdef __AARCH64EL__
1431 str q1, [sp, #80] // copy counter value
1433 .Lctr_enc_short_loop:
1434 add x0, sp, #80 // input counter value
1435 add x1, sp, #64 // output on the stack
1440 ldr q0, [x19], #16 // load input
1441 ldr q1, [sp, #64] // load encrypted counter
1443 #ifdef __AARCH64EL__
1445 str w0, [sp, #80+12] // next counter value
1447 str w23, [sp, #80+12] // next counter value
1449 eor v0.16b, v0.16b, v1.16b
1450 str q0, [x20], #16 // store output
1452 bne .Lctr_enc_short_loop
1456 stp q0, q1, [sp, #64]
1459 ldp x21, x22, [sp, #32]
1460 ldp x19, x20, [sp, #16]
1461 ldp x29, x30, [sp], #96
1463 .size ossl_bsaes_ctr32_encrypt_blocks,.-ossl_bsaes_ctr32_encrypt_blocks
1465 .globl ossl_bsaes_xts_encrypt
1466 .type ossl_bsaes_xts_encrypt,%function
1469 // x0 -> input plaintext
1470 // x1 -> output ciphertext
1471 // x2 -> length of text in bytes (must be at least 16)
1472 // x3 -> key1 (used to encrypt the XORed plaintext blocks)
1473 // x4 -> key2 (used to encrypt the initial vector to yield the initial tweak)
1474 // x5 -> 16-byte initial vector (typically, sector number)
1476 // Output ciphertext filled in
1477 // No output registers, usual AAPCS64 register preservation
1478 ossl_bsaes_xts_encrypt:
1481 // nrounds*128-96 bytes: key schedule
1483 // 16 bytes: frame record
1484 // 4*16 bytes: tweak storage across _bsaes_encrypt8
1485 // 6*8 bytes: storage for 5 callee-saved general-purpose registers
1486 // 8*8 bytes: storage for 8 callee-saved SIMD registers
1487 stp x29, x30, [sp, #-192]!
1488 stp x19, x20, [sp, #80]
1489 stp x21, x22, [sp, #96]
1491 stp d8, d9, [sp, #128]
1492 stp d10, d11, [sp, #144]
1493 stp d12, d13, [sp, #160]
1494 stp d14, d15, [sp, #176]
1502 // generate initial tweak
1510 ldr w1, [x23, #240] // get # of rounds
1511 // allocate the key schedule on the stack
1513 sub x17, x17, x1, lsl #7 // 128 bytes per inner round key, less 96 bytes
1515 // populate the key schedule
1516 mov x9, x23 // pass key
1517 mov x10, x1 // pass # of rounds
1519 bl _bsaes_key_convert
1520 eor v15.16b, v15.16b, v7.16b // fix up last round key
1521 str q15, [x17] // save last round key
1523 subs x22, x22, #0x80
1530 mov x10, x1 // pass rounds
1533 sshr v1.2d, v11.2d, #63
1534 mov x9, sp // pass key schedule
1535 ldr q6, .Lxts_magic+16
1536 add v2.2d, v11.2d, v11.2d
1537 cmtst v3.2d, v11.2d, v6.2d
1538 and v1.16b, v1.16b, v8.16b
1539 ext v1.16b, v1.16b, v1.16b, #8
1540 and v3.16b, v3.16b, v8.16b
1542 eor v12.16b, v2.16b, v1.16b
1543 eor v1.16b, v4.16b, v12.16b
1544 eor v0.16b, v0.16b, v11.16b
1545 cmtst v2.2d, v12.2d, v6.2d
1546 add v4.2d, v12.2d, v12.2d
1548 ext v3.16b, v3.16b, v3.16b, #8
1549 and v2.16b, v2.16b, v8.16b
1550 eor v13.16b, v4.16b, v3.16b
1552 ext v4.16b, v2.16b, v2.16b, #8
1553 eor v2.16b, v3.16b, v13.16b
1555 add v5.2d, v13.2d, v13.2d
1556 cmtst v7.2d, v13.2d, v6.2d
1557 and v7.16b, v7.16b, v8.16b
1559 ext v7.16b, v7.16b, v7.16b, #8
1561 eor v14.16b, v5.16b, v4.16b
1563 add v4.2d, v14.2d, v14.2d
1564 eor v3.16b, v3.16b, v14.16b
1565 eor v15.16b, v4.16b, v7.16b
1566 add v5.2d, v15.2d, v15.2d
1568 cmtst v4.2d, v14.2d, v6.2d
1569 and v17.16b, v4.16b, v8.16b
1570 cmtst v18.2d, v15.2d, v6.2d
1571 eor v4.16b, v9.16b, v15.16b
1572 ext v9.16b, v17.16b, v17.16b, #8
1573 eor v9.16b, v5.16b, v9.16b
1574 add v17.2d, v9.2d, v9.2d
1575 and v18.16b, v18.16b, v8.16b
1576 eor v5.16b, v10.16b, v9.16b
1578 ext v10.16b, v18.16b, v18.16b, #8
1579 cmtst v9.2d, v9.2d, v6.2d
1580 and v9.16b, v9.16b, v8.16b
1581 eor v10.16b, v17.16b, v10.16b
1582 cmtst v17.2d, v10.2d, v6.2d
1583 eor v6.16b, v16.16b, v10.16b
1585 ext v9.16b, v9.16b, v9.16b, #8
1586 add v10.2d, v10.2d, v10.2d
1587 eor v9.16b, v10.16b, v9.16b
1589 eor v7.16b, v7.16b, v9.16b
1590 add v9.2d, v9.2d, v9.2d
1591 and v8.16b, v17.16b, v8.16b
1592 ext v8.16b, v8.16b, v8.16b, #8
1593 eor v8.16b, v9.16b, v8.16b
1594 str q8, [x2] // next round tweak
1599 eor v0.16b, v0.16b, v11.16b
1600 eor v1.16b, v1.16b, v12.16b
1602 eor v4.16b, v4.16b, v13.16b
1603 eor v6.16b, v6.16b, v14.16b
1605 eor v3.16b, v3.16b, v15.16b
1606 subs x22, x22, #0x80
1608 ldr q11, [x0] // next round tweak
1610 eor v0.16b, v7.16b, v8.16b
1611 eor v1.16b, v2.16b, v9.16b
1613 eor v2.16b, v5.16b, v10.16b
1622 adds x22, x22, #0x70
1626 sshr v1.2d, v11.2d, #63
1627 add v2.2d, v11.2d, v11.2d
1628 ldr q9, .Lxts_magic+16
1629 subs x22, x22, #0x10
1631 and v1.16b, v1.16b, v8.16b
1632 cmtst v3.2d, v11.2d, v9.2d
1633 ext v1.16b, v1.16b, v1.16b, #8
1634 and v3.16b, v3.16b, v8.16b
1635 eor v12.16b, v2.16b, v1.16b
1636 ext v1.16b, v3.16b, v3.16b, #8
1637 add v2.2d, v12.2d, v12.2d
1638 cmtst v3.2d, v12.2d, v9.2d
1639 eor v13.16b, v2.16b, v1.16b
1640 and v22.16b, v3.16b, v8.16b
1643 ext v2.16b, v22.16b, v22.16b, #8
1644 add v3.2d, v13.2d, v13.2d
1646 cmtst v4.2d, v13.2d, v9.2d
1647 subs x22, x22, #0x10
1648 eor v14.16b, v3.16b, v2.16b
1649 and v23.16b, v4.16b, v8.16b
1652 ext v3.16b, v23.16b, v23.16b, #8
1653 add v4.2d, v14.2d, v14.2d
1655 cmtst v5.2d, v14.2d, v9.2d
1656 eor v0.16b, v0.16b, v11.16b
1657 subs x22, x22, #0x10
1658 eor v15.16b, v4.16b, v3.16b
1659 and v24.16b, v5.16b, v8.16b
1662 ext v4.16b, v24.16b, v24.16b, #8
1663 add v5.2d, v15.2d, v15.2d
1665 cmtst v6.2d, v15.2d, v9.2d
1666 eor v1.16b, v1.16b, v12.16b
1667 subs x22, x22, #0x10
1668 eor v16.16b, v5.16b, v4.16b
1669 and v25.16b, v6.16b, v8.16b
1672 ext v5.16b, v25.16b, v25.16b, #8
1673 add v6.2d, v16.2d, v16.2d
1675 cmtst v7.2d, v16.2d, v9.2d
1677 eor v2.16b, v2.16b, v13.16b
1679 subs x22, x22, #0x10
1680 eor v17.16b, v6.16b, v5.16b
1681 and v26.16b, v7.16b, v8.16b
1684 ext v7.16b, v26.16b, v26.16b, #8
1685 add v18.2d, v17.2d, v17.2d
1687 eor v3.16b, v3.16b, v14.16b
1689 subs x22, x22, #0x10
1690 eor v18.16b, v18.16b, v7.16b
1694 eor v4.16b, v4.16b, v15.16b
1695 eor v5.16b, v5.16b, v16.16b
1696 str q18, [x0] // next round tweak
1697 mov x9, sp // pass key schedule
1701 eor v6.16b, v6.16b, v17.16b
1706 eor v0.16b, v0.16b, v11.16b
1707 eor v1.16b, v1.16b, v12.16b
1709 eor v4.16b, v4.16b, v13.16b
1710 eor v6.16b, v6.16b, v14.16b
1711 eor v3.16b, v3.16b, v15.16b
1712 ldr q11, [x0] // next round tweak
1715 eor v0.16b, v7.16b, v16.16b
1716 eor v1.16b, v2.16b, v17.16b
1726 eor v4.16b, v4.16b, v15.16b
1727 eor v5.16b, v5.16b, v16.16b
1728 mov x9, sp // pass key schedule
1729 mov x10, x1 // pass rounds
1735 eor v0.16b, v0.16b, v11.16b
1736 eor v1.16b, v1.16b, v12.16b
1737 eor v4.16b, v4.16b, v13.16b
1738 eor v6.16b, v6.16b, v14.16b
1739 ldr q11, [x0] // next round tweak
1740 eor v3.16b, v3.16b, v15.16b
1743 eor v0.16b, v7.16b, v16.16b
1752 eor v3.16b, v3.16b, v14.16b
1753 eor v4.16b, v4.16b, v15.16b
1754 mov x9, sp // pass key schedule
1755 mov x10, x1 // pass rounds
1760 eor v0.16b, v0.16b, v11.16b
1761 eor v1.16b, v1.16b, v12.16b
1762 ldr q11, [x0] // next round tweak
1763 eor v4.16b, v4.16b, v13.16b
1764 eor v6.16b, v6.16b, v14.16b
1765 eor v3.16b, v3.16b, v15.16b
1775 eor v2.16b, v2.16b, v13.16b
1776 eor v3.16b, v3.16b, v14.16b
1777 mov x9, sp // pass key schedule
1778 mov x10, x1 // pass rounds
1783 eor v0.16b, v0.16b, v11.16b
1784 eor v1.16b, v1.16b, v12.16b
1785 eor v4.16b, v4.16b, v13.16b
1786 eor v6.16b, v6.16b, v14.16b
1787 mov v11.16b, v15.16b // next round tweak
1796 eor v1.16b, v1.16b, v12.16b
1797 eor v2.16b, v2.16b, v13.16b
1798 mov x9, sp // pass key schedule
1799 mov x10, x1 // pass rounds
1804 eor v0.16b, v0.16b, v11.16b
1805 eor v1.16b, v1.16b, v12.16b
1806 eor v4.16b, v4.16b, v13.16b
1807 mov v11.16b, v14.16b // next round tweak
1815 eor v0.16b, v0.16b, v11.16b
1816 eor v1.16b, v1.16b, v12.16b
1817 mov x9, sp // pass key schedule
1818 mov x10, x1 // pass rounds
1823 eor v0.16b, v0.16b, v11.16b
1824 eor v1.16b, v1.16b, v12.16b
1825 mov v11.16b, v13.16b // next round tweak
1832 eor v0.16b, v0.16b, v11.16b
1836 mov v13.d[0], v11.d[1] // just in case AES_encrypt corrupts top half of callee-saved SIMD registers
1837 mov v14.d[0], v12.d[1]
1843 trn1 v13.2d, v11.2d, v13.2d
1844 trn1 v11.2d, v12.2d, v14.2d // next round tweak
1845 eor v0.16b, v0.16b, v13.16b
1849 adds x22, x22, #0x10
1853 // Penultimate plaintext block produces final ciphertext part-block
1854 // plus remaining part of final plaintext block. Move ciphertext part
1855 // to final position and reuse penultimate ciphertext block buffer to
1856 // construct final plaintext block
1859 ldrb w1, [x21, #-0x10]
1860 strb w0, [x21, #-0x10]
1866 // Finally encrypt the penultimate ciphertext block using the
1869 eor v0.16b, v0.16b, v11.16b
1875 mov v13.d[0], v11.d[1] // just in case AES_encrypt corrupts top half of callee-saved SIMD registers
1879 trn1 v11.2d, v11.2d, v13.2d
1881 eor v0.16b, v0.16b, v11.16b
1888 .Lxts_enc_bzero: // wipe key schedule
1889 stp q0, q1, [sp], #32
1893 ldp x19, x20, [sp, #80]
1894 ldp x21, x22, [sp, #96]
1896 ldp d8, d9, [sp, #128]
1897 ldp d10, d11, [sp, #144]
1898 ldp d12, d13, [sp, #160]
1899 ldp d14, d15, [sp, #176]
1900 ldp x29, x30, [sp], #192
1902 .size ossl_bsaes_xts_encrypt,.-ossl_bsaes_xts_encrypt
1904 // The assembler doesn't seem capable of de-duplicating these when expressed
1905 // using `ldr qd,=` syntax, so assign a symbolic address
1908 .quad 1, 0x87, 0x4000000000000000, 0x4000000000000000
1910 .globl ossl_bsaes_xts_decrypt
1911 .type ossl_bsaes_xts_decrypt,%function
1914 // x0 -> input ciphertext
1915 // x1 -> output plaintext
1916 // x2 -> length of text in bytes (must be at least 16)
1917 // x3 -> key1 (used to decrypt the XORed ciphertext blocks)
1918 // x4 -> key2 (used to encrypt the initial vector to yield the initial tweak)
1919 // x5 -> 16-byte initial vector (typically, sector number)
1921 // Output plaintext filled in
1922 // No output registers, usual AAPCS64 register preservation
1923 ossl_bsaes_xts_decrypt:
1926 // nrounds*128-96 bytes: key schedule
1928 // 16 bytes: frame record
1929 // 4*16 bytes: tweak storage across _bsaes_decrypt8
1930 // 6*8 bytes: storage for 5 callee-saved general-purpose registers
1931 // 8*8 bytes: storage for 8 callee-saved SIMD registers
1932 stp x29, x30, [sp, #-192]!
1933 stp x19, x20, [sp, #80]
1934 stp x21, x22, [sp, #96]
1936 stp d8, d9, [sp, #128]
1937 stp d10, d11, [sp, #144]
1938 stp d12, d13, [sp, #160]
1939 stp d14, d15, [sp, #176]
1947 // generate initial tweak
1955 ldr w1, [x23, #240] // get # of rounds
1956 // allocate the key schedule on the stack
1958 sub x17, x17, x1, lsl #7 // 128 bytes per inner round key, less 96 bytes
1960 // populate the key schedule
1961 mov x9, x23 // pass key
1962 mov x10, x1 // pass # of rounds
1964 bl _bsaes_key_convert
1966 str q15, [x17] // save last round key
1967 eor v6.16b, v6.16b, v7.16b // fix up round 0 key (by XORing with 0x63)
1971 tst x22, #0xf // if not multiple of 16
1972 csel x22, x30, x22, ne // subtract another 16 bytes
1973 subs x22, x22, #0x80
1981 mov x10, x1 // pass rounds
1984 sshr v1.2d, v11.2d, #63
1985 mov x9, sp // pass key schedule
1986 ldr q6, .Lxts_magic+16
1987 add v2.2d, v11.2d, v11.2d
1988 cmtst v3.2d, v11.2d, v6.2d
1989 and v1.16b, v1.16b, v8.16b
1990 ext v1.16b, v1.16b, v1.16b, #8
1991 and v3.16b, v3.16b, v8.16b
1993 eor v12.16b, v2.16b, v1.16b
1994 eor v1.16b, v4.16b, v12.16b
1995 eor v0.16b, v0.16b, v11.16b
1996 cmtst v2.2d, v12.2d, v6.2d
1997 add v4.2d, v12.2d, v12.2d
1999 ext v3.16b, v3.16b, v3.16b, #8
2000 and v2.16b, v2.16b, v8.16b
2001 eor v13.16b, v4.16b, v3.16b
2003 ext v4.16b, v2.16b, v2.16b, #8
2004 eor v2.16b, v3.16b, v13.16b
2006 add v5.2d, v13.2d, v13.2d
2007 cmtst v7.2d, v13.2d, v6.2d
2008 and v7.16b, v7.16b, v8.16b
2010 ext v7.16b, v7.16b, v7.16b, #8
2012 eor v14.16b, v5.16b, v4.16b
2014 add v4.2d, v14.2d, v14.2d
2015 eor v3.16b, v3.16b, v14.16b
2016 eor v15.16b, v4.16b, v7.16b
2017 add v5.2d, v15.2d, v15.2d
2019 cmtst v4.2d, v14.2d, v6.2d
2020 and v17.16b, v4.16b, v8.16b
2021 cmtst v18.2d, v15.2d, v6.2d
2022 eor v4.16b, v9.16b, v15.16b
2023 ext v9.16b, v17.16b, v17.16b, #8
2024 eor v9.16b, v5.16b, v9.16b
2025 add v17.2d, v9.2d, v9.2d
2026 and v18.16b, v18.16b, v8.16b
2027 eor v5.16b, v10.16b, v9.16b
2029 ext v10.16b, v18.16b, v18.16b, #8
2030 cmtst v9.2d, v9.2d, v6.2d
2031 and v9.16b, v9.16b, v8.16b
2032 eor v10.16b, v17.16b, v10.16b
2033 cmtst v17.2d, v10.2d, v6.2d
2034 eor v6.16b, v16.16b, v10.16b
2036 ext v9.16b, v9.16b, v9.16b, #8
2037 add v10.2d, v10.2d, v10.2d
2038 eor v9.16b, v10.16b, v9.16b
2040 eor v7.16b, v7.16b, v9.16b
2041 add v9.2d, v9.2d, v9.2d
2042 and v8.16b, v17.16b, v8.16b
2043 ext v8.16b, v8.16b, v8.16b, #8
2044 eor v8.16b, v9.16b, v8.16b
2045 str q8, [x2] // next round tweak
2049 eor v6.16b, v6.16b, v13.16b
2050 eor v0.16b, v0.16b, v11.16b
2052 eor v7.16b, v7.16b, v8.16b
2054 eor v0.16b, v1.16b, v12.16b
2056 eor v1.16b, v3.16b, v1.16b
2057 subs x22, x22, #0x80
2058 eor v2.16b, v2.16b, v15.16b
2059 eor v3.16b, v4.16b, v14.16b
2062 ldr q11, [x0] // next round tweak
2063 eor v0.16b, v5.16b, v4.16b
2073 adds x22, x22, #0x70
2077 sshr v1.2d, v11.2d, #63
2078 add v2.2d, v11.2d, v11.2d
2079 ldr q9, .Lxts_magic+16
2080 subs x22, x22, #0x10
2082 and v1.16b, v1.16b, v8.16b
2083 cmtst v3.2d, v11.2d, v9.2d
2084 ext v1.16b, v1.16b, v1.16b, #8
2085 and v3.16b, v3.16b, v8.16b
2086 eor v12.16b, v2.16b, v1.16b
2087 ext v1.16b, v3.16b, v3.16b, #8
2088 add v2.2d, v12.2d, v12.2d
2089 cmtst v3.2d, v12.2d, v9.2d
2090 eor v13.16b, v2.16b, v1.16b
2091 and v22.16b, v3.16b, v8.16b
2094 ext v2.16b, v22.16b, v22.16b, #8
2095 add v3.2d, v13.2d, v13.2d
2097 cmtst v4.2d, v13.2d, v9.2d
2098 subs x22, x22, #0x10
2099 eor v14.16b, v3.16b, v2.16b
2100 and v23.16b, v4.16b, v8.16b
2103 ext v3.16b, v23.16b, v23.16b, #8
2104 add v4.2d, v14.2d, v14.2d
2106 cmtst v5.2d, v14.2d, v9.2d
2107 eor v0.16b, v0.16b, v11.16b
2108 subs x22, x22, #0x10
2109 eor v15.16b, v4.16b, v3.16b
2110 and v24.16b, v5.16b, v8.16b
2113 ext v4.16b, v24.16b, v24.16b, #8
2114 add v5.2d, v15.2d, v15.2d
2116 cmtst v6.2d, v15.2d, v9.2d
2117 eor v1.16b, v1.16b, v12.16b
2118 subs x22, x22, #0x10
2119 eor v16.16b, v5.16b, v4.16b
2120 and v25.16b, v6.16b, v8.16b
2123 ext v5.16b, v25.16b, v25.16b, #8
2124 add v6.2d, v16.2d, v16.2d
2126 cmtst v7.2d, v16.2d, v9.2d
2128 eor v2.16b, v2.16b, v13.16b
2130 subs x22, x22, #0x10
2131 eor v17.16b, v6.16b, v5.16b
2132 and v26.16b, v7.16b, v8.16b
2135 ext v7.16b, v26.16b, v26.16b, #8
2136 add v18.2d, v17.2d, v17.2d
2138 eor v3.16b, v3.16b, v14.16b
2140 subs x22, x22, #0x10
2141 eor v18.16b, v18.16b, v7.16b
2145 eor v4.16b, v4.16b, v15.16b
2146 eor v5.16b, v5.16b, v16.16b
2147 str q18, [x0] // next round tweak
2148 mov x9, sp // pass key schedule
2152 eor v6.16b, v6.16b, v17.16b
2157 eor v0.16b, v0.16b, v11.16b
2158 eor v1.16b, v1.16b, v12.16b
2160 eor v6.16b, v6.16b, v13.16b
2161 eor v4.16b, v4.16b, v14.16b
2162 eor v2.16b, v2.16b, v15.16b
2163 ldr q11, [x0] // next round tweak
2166 eor v0.16b, v7.16b, v16.16b
2167 eor v1.16b, v3.16b, v17.16b
2177 eor v4.16b, v4.16b, v15.16b
2178 eor v5.16b, v5.16b, v16.16b
2179 mov x9, sp // pass key schedule
2180 mov x10, x1 // pass rounds
2186 eor v0.16b, v0.16b, v11.16b
2187 eor v1.16b, v1.16b, v12.16b
2188 eor v6.16b, v6.16b, v13.16b
2189 eor v4.16b, v4.16b, v14.16b
2190 ldr q11, [x0] // next round tweak
2191 eor v2.16b, v2.16b, v15.16b
2194 eor v0.16b, v7.16b, v16.16b
2203 eor v3.16b, v3.16b, v14.16b
2204 eor v4.16b, v4.16b, v15.16b
2205 mov x9, sp // pass key schedule
2206 mov x10, x1 // pass rounds
2211 eor v0.16b, v0.16b, v11.16b
2212 eor v1.16b, v1.16b, v12.16b
2213 ldr q11, [x0] // next round tweak
2214 eor v6.16b, v6.16b, v13.16b
2215 eor v4.16b, v4.16b, v14.16b
2216 eor v2.16b, v2.16b, v15.16b
2226 eor v2.16b, v2.16b, v13.16b
2227 eor v3.16b, v3.16b, v14.16b
2228 mov x9, sp // pass key schedule
2229 mov x10, x1 // pass rounds
2234 eor v0.16b, v0.16b, v11.16b
2235 eor v1.16b, v1.16b, v12.16b
2236 eor v6.16b, v6.16b, v13.16b
2237 eor v4.16b, v4.16b, v14.16b
2238 mov v11.16b, v15.16b // next round tweak
2247 eor v1.16b, v1.16b, v12.16b
2248 eor v2.16b, v2.16b, v13.16b
2249 mov x9, sp // pass key schedule
2250 mov x10, x1 // pass rounds
2255 eor v0.16b, v0.16b, v11.16b
2256 eor v1.16b, v1.16b, v12.16b
2257 eor v6.16b, v6.16b, v13.16b
2258 mov v11.16b, v14.16b // next round tweak
2266 eor v0.16b, v0.16b, v11.16b
2267 eor v1.16b, v1.16b, v12.16b
2268 mov x9, sp // pass key schedule
2269 mov x10, x1 // pass rounds
2274 eor v0.16b, v0.16b, v11.16b
2275 eor v1.16b, v1.16b, v12.16b
2276 mov v11.16b, v13.16b // next round tweak
2283 eor v0.16b, v0.16b, v11.16b
2287 mov v13.d[0], v11.d[1] // just in case AES_decrypt corrupts top half of callee-saved SIMD registers
2288 mov v14.d[0], v12.d[1]
2294 trn1 v13.2d, v11.2d, v13.2d
2295 trn1 v11.2d, v12.2d, v14.2d // next round tweak
2296 eor v0.16b, v0.16b, v13.16b
2300 adds x22, x22, #0x10
2303 // calculate one round of extra tweak for the stolen ciphertext
2305 sshr v6.2d, v11.2d, #63
2306 and v6.16b, v6.16b, v8.16b
2307 add v12.2d, v11.2d, v11.2d
2308 ext v6.16b, v6.16b, v6.16b, #8
2309 eor v12.16b, v12.16b, v6.16b
2311 // perform the final decryption with the last tweak value
2313 eor v0.16b, v0.16b, v12.16b
2318 mov v13.d[0], v11.d[1] // just in case AES_decrypt corrupts top half of callee-saved SIMD registers
2319 mov v14.d[0], v12.d[1]
2323 trn1 v12.2d, v12.2d, v14.2d
2324 trn1 v11.2d, v11.2d, v13.2d
2326 eor v0.16b, v0.16b, v12.16b
2330 // Penultimate ciphertext block produces final plaintext part-block
2331 // plus remaining part of final ciphertext block. Move plaintext part
2332 // to final position and reuse penultimate plaintext block buffer to
2333 // construct final ciphertext block
2337 strb w1, [x21, #0x10]
2343 // Finally decrypt the penultimate plaintext block using the
2344 // penultimate tweak
2346 eor v0.16b, v0.16b, v11.16b
2355 trn1 v11.2d, v11.2d, v13.2d
2357 eor v0.16b, v0.16b, v11.16b
2364 .Lxts_dec_bzero: // wipe key schedule
2365 stp q0, q1, [sp], #32
2369 ldp x19, x20, [sp, #80]
2370 ldp x21, x22, [sp, #96]
2372 ldp d8, d9, [sp, #128]
2373 ldp d10, d11, [sp, #144]
2374 ldp d12, d13, [sp, #160]
2375 ldp d14, d15, [sp, #176]
2376 ldp x29, x30, [sp], #192
2378 .size ossl_bsaes_xts_decrypt,.-ossl_bsaes_xts_decrypt